aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-09-08 20:55:54 -0400
committerDan Williams <dan.j.williams@intel.com>2009-09-08 20:55:54 -0400
commit9134d02bc0af4a8747d448d1f811ec5f8eb96df6 (patch)
tree704c3e5dcc10f360815c4868a74711f82fb62e27 /drivers/md
parentbbb20089a3275a19e475dbc21320c3742e3ca423 (diff)
parent80ffb3cceaefa405f2ecd46d66500ed8d53efe74 (diff)
Merge commit 'md/for-linus' into async-tx-next
Conflicts: drivers/md/raid5.c
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-crypt.c4
-rw-r--r--drivers/md/dm-delay.c4
-rw-r--r--drivers/md/dm-exception-store.c9
-rw-r--r--drivers/md/dm-linear.c2
-rw-r--r--drivers/md/dm-mpath.c2
-rw-r--r--drivers/md/dm-raid1.c3
-rw-r--r--drivers/md/dm-stripe.c7
-rw-r--r--drivers/md/dm-table.c17
-rw-r--r--drivers/md/dm.c14
-rw-r--r--drivers/md/dm.h1
-rw-r--r--drivers/md/linear.c6
-rw-r--r--drivers/md/md.c251
-rw-r--r--drivers/md/md.h12
-rw-r--r--drivers/md/multipath.c12
-rw-r--r--drivers/md/raid0.c10
-rw-r--r--drivers/md/raid1.c16
-rw-r--r--drivers/md/raid10.c23
-rw-r--r--drivers/md/raid5.c87
18 files changed, 297 insertions, 183 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 9933eb861c71..ed1038164019 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -776,7 +776,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
776 * But don't wait if split was due to the io size restriction 776 * But don't wait if split was due to the io size restriction
777 */ 777 */
778 if (unlikely(out_of_pages)) 778 if (unlikely(out_of_pages))
779 congestion_wait(WRITE, HZ/100); 779 congestion_wait(BLK_RW_ASYNC, HZ/100);
780 780
781 /* 781 /*
782 * With async crypto it is unsafe to share the crypto context 782 * With async crypto it is unsafe to share the crypto context
@@ -1318,7 +1318,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
1318{ 1318{
1319 struct crypt_config *cc = ti->private; 1319 struct crypt_config *cc = ti->private;
1320 1320
1321 return fn(ti, cc->dev, cc->start, data); 1321 return fn(ti, cc->dev, cc->start, ti->len, data);
1322} 1322}
1323 1323
1324static struct target_type crypt_target = { 1324static struct target_type crypt_target = {
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 4e5b843cd4d7..ebe7381f47c8 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -324,12 +324,12 @@ static int delay_iterate_devices(struct dm_target *ti,
324 struct delay_c *dc = ti->private; 324 struct delay_c *dc = ti->private;
325 int ret = 0; 325 int ret = 0;
326 326
327 ret = fn(ti, dc->dev_read, dc->start_read, data); 327 ret = fn(ti, dc->dev_read, dc->start_read, ti->len, data);
328 if (ret) 328 if (ret)
329 goto out; 329 goto out;
330 330
331 if (dc->dev_write) 331 if (dc->dev_write)
332 ret = fn(ti, dc->dev_write, dc->start_write, data); 332 ret = fn(ti, dc->dev_write, dc->start_write, ti->len, data);
333 333
334out: 334out:
335 return ret; 335 return ret;
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index c3ae51584b12..3710ff88fc10 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -195,7 +195,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
195 struct dm_exception_store **store) 195 struct dm_exception_store **store)
196{ 196{
197 int r = 0; 197 int r = 0;
198 struct dm_exception_store_type *type; 198 struct dm_exception_store_type *type = NULL;
199 struct dm_exception_store *tmp_store; 199 struct dm_exception_store *tmp_store;
200 char persistent; 200 char persistent;
201 201
@@ -211,12 +211,15 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
211 } 211 }
212 212
213 persistent = toupper(*argv[1]); 213 persistent = toupper(*argv[1]);
214 if (persistent != 'P' && persistent != 'N') { 214 if (persistent == 'P')
215 type = get_type("P");
216 else if (persistent == 'N')
217 type = get_type("N");
218 else {
215 ti->error = "Persistent flag is not P or N"; 219 ti->error = "Persistent flag is not P or N";
216 return -EINVAL; 220 return -EINVAL;
217 } 221 }
218 222
219 type = get_type(&persistent);
220 if (!type) { 223 if (!type) {
221 ti->error = "Exception store type not recognised"; 224 ti->error = "Exception store type not recognised";
222 r = -EINVAL; 225 r = -EINVAL;
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 9184b6deb868..82f7d6e6b1ea 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -139,7 +139,7 @@ static int linear_iterate_devices(struct dm_target *ti,
139{ 139{
140 struct linear_c *lc = ti->private; 140 struct linear_c *lc = ti->private;
141 141
142 return fn(ti, lc->dev, lc->start, data); 142 return fn(ti, lc->dev, lc->start, ti->len, data);
143} 143}
144 144
145static struct target_type linear_target = { 145static struct target_type linear_target = {
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index c70604a20897..6f0d90d4a541 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1453,7 +1453,7 @@ static int multipath_iterate_devices(struct dm_target *ti,
1453 1453
1454 list_for_each_entry(pg, &m->priority_groups, list) { 1454 list_for_each_entry(pg, &m->priority_groups, list) {
1455 list_for_each_entry(p, &pg->pgpaths, list) { 1455 list_for_each_entry(p, &pg->pgpaths, list) {
1456 ret = fn(ti, p->path.dev, ti->begin, data); 1456 ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
1457 if (ret) 1457 if (ret)
1458 goto out; 1458 goto out;
1459 } 1459 }
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index ce8868c768cc..9726577cde49 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -638,6 +638,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
638 spin_lock_irq(&ms->lock); 638 spin_lock_irq(&ms->lock);
639 bio_list_merge(&ms->writes, &requeue); 639 bio_list_merge(&ms->writes, &requeue);
640 spin_unlock_irq(&ms->lock); 640 spin_unlock_irq(&ms->lock);
641 delayed_wake(ms);
641 } 642 }
642 643
643 /* 644 /*
@@ -1292,7 +1293,7 @@ static int mirror_iterate_devices(struct dm_target *ti,
1292 1293
1293 for (i = 0; !ret && i < ms->nr_mirrors; i++) 1294 for (i = 0; !ret && i < ms->nr_mirrors; i++)
1294 ret = fn(ti, ms->mirror[i].dev, 1295 ret = fn(ti, ms->mirror[i].dev,
1295 ms->mirror[i].offset, data); 1296 ms->mirror[i].offset, ti->len, data);
1296 1297
1297 return ret; 1298 return ret;
1298} 1299}
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index b240e85ae39a..4e0e5937e42a 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -320,10 +320,11 @@ static int stripe_iterate_devices(struct dm_target *ti,
320 int ret = 0; 320 int ret = 0;
321 unsigned i = 0; 321 unsigned i = 0;
322 322
323 do 323 do {
324 ret = fn(ti, sc->stripe[i].dev, 324 ret = fn(ti, sc->stripe[i].dev,
325 sc->stripe[i].physical_start, data); 325 sc->stripe[i].physical_start,
326 while (!ret && ++i < sc->stripes); 326 sc->stripe_width, data);
327 } while (!ret && ++i < sc->stripes);
327 328
328 return ret; 329 return ret;
329} 330}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 4899ebe767c8..d952b3441913 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -346,7 +346,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
346 * If possible, this checks an area of a destination device is valid. 346 * If possible, this checks an area of a destination device is valid.
347 */ 347 */
348static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, 348static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
349 sector_t start, void *data) 349 sector_t start, sector_t len, void *data)
350{ 350{
351 struct queue_limits *limits = data; 351 struct queue_limits *limits = data;
352 struct block_device *bdev = dev->bdev; 352 struct block_device *bdev = dev->bdev;
@@ -359,7 +359,7 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
359 if (!dev_size) 359 if (!dev_size)
360 return 1; 360 return 1;
361 361
362 if ((start >= dev_size) || (start + ti->len > dev_size)) { 362 if ((start >= dev_size) || (start + len > dev_size)) {
363 DMWARN("%s: %s too small for target", 363 DMWARN("%s: %s too small for target",
364 dm_device_name(ti->table->md), bdevname(bdev, b)); 364 dm_device_name(ti->table->md), bdevname(bdev, b));
365 return 0; 365 return 0;
@@ -377,11 +377,11 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
377 return 0; 377 return 0;
378 } 378 }
379 379
380 if (ti->len & (logical_block_size_sectors - 1)) { 380 if (len & (logical_block_size_sectors - 1)) {
381 DMWARN("%s: len=%llu not aligned to h/w " 381 DMWARN("%s: len=%llu not aligned to h/w "
382 "logical block size %hu of %s", 382 "logical block size %hu of %s",
383 dm_device_name(ti->table->md), 383 dm_device_name(ti->table->md),
384 (unsigned long long)ti->len, 384 (unsigned long long)len,
385 limits->logical_block_size, bdevname(bdev, b)); 385 limits->logical_block_size, bdevname(bdev, b));
386 return 0; 386 return 0;
387 } 387 }
@@ -482,7 +482,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
482#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) 482#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
483 483
484int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, 484int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
485 sector_t start, void *data) 485 sector_t start, sector_t len, void *data)
486{ 486{
487 struct queue_limits *limits = data; 487 struct queue_limits *limits = data;
488 struct block_device *bdev = dev->bdev; 488 struct block_device *bdev = dev->bdev;
@@ -495,7 +495,7 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
495 return 0; 495 return 0;
496 } 496 }
497 497
498 if (blk_stack_limits(limits, &q->limits, start) < 0) 498 if (blk_stack_limits(limits, &q->limits, start << 9) < 0)
499 DMWARN("%s: target device %s is misaligned", 499 DMWARN("%s: target device %s is misaligned",
500 dm_device_name(ti->table->md), bdevname(bdev, b)); 500 dm_device_name(ti->table->md), bdevname(bdev, b));
501 501
@@ -830,11 +830,6 @@ unsigned dm_table_get_type(struct dm_table *t)
830 return t->type; 830 return t->type;
831} 831}
832 832
833bool dm_table_bio_based(struct dm_table *t)
834{
835 return dm_table_get_type(t) == DM_TYPE_BIO_BASED;
836}
837
838bool dm_table_request_based(struct dm_table *t) 833bool dm_table_request_based(struct dm_table *t)
839{ 834{
840 return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED; 835 return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3c6d4ee8921d..8a311ea0d441 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1017,7 +1017,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
1017 clone->bi_flags |= 1 << BIO_CLONED; 1017 clone->bi_flags |= 1 << BIO_CLONED;
1018 1018
1019 if (bio_integrity(bio)) { 1019 if (bio_integrity(bio)) {
1020 bio_integrity_clone(clone, bio, GFP_NOIO); 1020 bio_integrity_clone(clone, bio, GFP_NOIO, bs);
1021 bio_integrity_trim(clone, 1021 bio_integrity_trim(clone,
1022 bio_sector_offset(bio, idx, offset), len); 1022 bio_sector_offset(bio, idx, offset), len);
1023 } 1023 }
@@ -1045,7 +1045,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
1045 clone->bi_flags &= ~(1 << BIO_SEG_VALID); 1045 clone->bi_flags &= ~(1 << BIO_SEG_VALID);
1046 1046
1047 if (bio_integrity(bio)) { 1047 if (bio_integrity(bio)) {
1048 bio_integrity_clone(clone, bio, GFP_NOIO); 1048 bio_integrity_clone(clone, bio, GFP_NOIO, bs);
1049 1049
1050 if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) 1050 if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
1051 bio_integrity_trim(clone, 1051 bio_integrity_trim(clone,
@@ -2203,16 +2203,6 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
2203 goto out; 2203 goto out;
2204 } 2204 }
2205 2205
2206 /*
2207 * It is enought that blk_queue_ordered() is called only once when
2208 * the first bio-based table is bound.
2209 *
2210 * This setting should be moved to alloc_dev() when request-based dm
2211 * supports barrier.
2212 */
2213 if (!md->map && dm_table_bio_based(table))
2214 blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
2215
2216 __unbind(md); 2206 __unbind(md);
2217 r = __bind(md, table, &limits); 2207 r = __bind(md, table, &limits);
2218 2208
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 23278ae80f08..a7663eba17e2 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -61,7 +61,6 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits);
61int dm_table_any_busy_target(struct dm_table *t); 61int dm_table_any_busy_target(struct dm_table *t);
62int dm_table_set_type(struct dm_table *t); 62int dm_table_set_type(struct dm_table *t);
63unsigned dm_table_get_type(struct dm_table *t); 63unsigned dm_table_get_type(struct dm_table *t);
64bool dm_table_bio_based(struct dm_table *t);
65bool dm_table_request_based(struct dm_table *t); 64bool dm_table_request_based(struct dm_table *t);
66int dm_table_alloc_md_mempools(struct dm_table *t); 65int dm_table_alloc_md_mempools(struct dm_table *t);
67void dm_table_free_md_mempools(struct dm_table *t); 66void dm_table_free_md_mempools(struct dm_table *t);
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 15c8b7b25a9b..5fe39c2a3d2b 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -166,8 +166,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
166 rdev->sectors = sectors * mddev->chunk_sectors; 166 rdev->sectors = sectors * mddev->chunk_sectors;
167 } 167 }
168 168
169 blk_queue_stack_limits(mddev->queue, 169 disk_stack_limits(mddev->gendisk, rdev->bdev,
170 rdev->bdev->bd_disk->queue); 170 rdev->data_offset << 9);
171 /* as we don't honour merge_bvec_fn, we must never risk 171 /* as we don't honour merge_bvec_fn, we must never risk
172 * violating it, so limit ->max_sector to one PAGE, as 172 * violating it, so limit ->max_sector to one PAGE, as
173 * a one page request is never in violation. 173 * a one page request is never in violation.
@@ -220,6 +220,7 @@ static int linear_run (mddev_t *mddev)
220 mddev->queue->unplug_fn = linear_unplug; 220 mddev->queue->unplug_fn = linear_unplug;
221 mddev->queue->backing_dev_info.congested_fn = linear_congested; 221 mddev->queue->backing_dev_info.congested_fn = linear_congested;
222 mddev->queue->backing_dev_info.congested_data = mddev; 222 mddev->queue->backing_dev_info.congested_data = mddev;
223 md_integrity_register(mddev);
223 return 0; 224 return 0;
224} 225}
225 226
@@ -256,6 +257,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
256 rcu_assign_pointer(mddev->private, newconf); 257 rcu_assign_pointer(mddev->private, newconf);
257 md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); 258 md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
258 set_capacity(mddev->gendisk, mddev->array_sectors); 259 set_capacity(mddev->gendisk, mddev->array_sectors);
260 revalidate_disk(mddev->gendisk);
259 call_rcu(&oldconf->rcu, free_conf); 261 call_rcu(&oldconf->rcu, free_conf);
260 return 0; 262 return 0;
261} 263}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 09be637d52cb..9dd872000cec 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
359 else 359 else
360 new->md_minor = MINOR(unit) >> MdpMinorShift; 360 new->md_minor = MINOR(unit) >> MdpMinorShift;
361 361
362 mutex_init(&new->open_mutex);
362 mutex_init(&new->reconfig_mutex); 363 mutex_init(&new->reconfig_mutex);
363 INIT_LIST_HEAD(&new->disks); 364 INIT_LIST_HEAD(&new->disks);
364 INIT_LIST_HEAD(&new->all_mddevs); 365 INIT_LIST_HEAD(&new->all_mddevs);
@@ -1308,7 +1309,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1308 } 1309 }
1309 if (mddev->level != LEVEL_MULTIPATH) { 1310 if (mddev->level != LEVEL_MULTIPATH) {
1310 int role; 1311 int role;
1311 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); 1312 if (rdev->desc_nr < 0 ||
1313 rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
1314 role = 0xffff;
1315 rdev->desc_nr = -1;
1316 } else
1317 role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
1312 switch(role) { 1318 switch(role) {
1313 case 0xffff: /* spare */ 1319 case 0xffff: /* spare */
1314 break; 1320 break;
@@ -1394,8 +1400,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
1394 if (rdev2->desc_nr+1 > max_dev) 1400 if (rdev2->desc_nr+1 > max_dev)
1395 max_dev = rdev2->desc_nr+1; 1401 max_dev = rdev2->desc_nr+1;
1396 1402
1397 if (max_dev > le32_to_cpu(sb->max_dev)) 1403 if (max_dev > le32_to_cpu(sb->max_dev)) {
1404 int bmask;
1398 sb->max_dev = cpu_to_le32(max_dev); 1405 sb->max_dev = cpu_to_le32(max_dev);
1406 rdev->sb_size = max_dev * 2 + 256;
1407 bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
1408 if (rdev->sb_size & bmask)
1409 rdev->sb_size = (rdev->sb_size | bmask) + 1;
1410 }
1399 for (i=0; i<max_dev;i++) 1411 for (i=0; i<max_dev;i++)
1400 sb->dev_roles[i] = cpu_to_le16(0xfffe); 1412 sb->dev_roles[i] = cpu_to_le16(0xfffe);
1401 1413
@@ -1487,37 +1499,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
1487 1499
1488static LIST_HEAD(pending_raid_disks); 1500static LIST_HEAD(pending_raid_disks);
1489 1501
1490static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) 1502/*
1503 * Try to register data integrity profile for an mddev
1504 *
1505 * This is called when an array is started and after a disk has been kicked
1506 * from the array. It only succeeds if all working and active component devices
1507 * are integrity capable with matching profiles.
1508 */
1509int md_integrity_register(mddev_t *mddev)
1510{
1511 mdk_rdev_t *rdev, *reference = NULL;
1512
1513 if (list_empty(&mddev->disks))
1514 return 0; /* nothing to do */
1515 if (blk_get_integrity(mddev->gendisk))
1516 return 0; /* already registered */
1517 list_for_each_entry(rdev, &mddev->disks, same_set) {
1518 /* skip spares and non-functional disks */
1519 if (test_bit(Faulty, &rdev->flags))
1520 continue;
1521 if (rdev->raid_disk < 0)
1522 continue;
1523 /*
1524 * If at least one rdev is not integrity capable, we can not
1525 * enable data integrity for the md device.
1526 */
1527 if (!bdev_get_integrity(rdev->bdev))
1528 return -EINVAL;
1529 if (!reference) {
1530 /* Use the first rdev as the reference */
1531 reference = rdev;
1532 continue;
1533 }
1534 /* does this rdev's profile match the reference profile? */
1535 if (blk_integrity_compare(reference->bdev->bd_disk,
1536 rdev->bdev->bd_disk) < 0)
1537 return -EINVAL;
1538 }
1539 /*
1540 * All component devices are integrity capable and have matching
1541 * profiles, register the common profile for the md device.
1542 */
1543 if (blk_integrity_register(mddev->gendisk,
1544 bdev_get_integrity(reference->bdev)) != 0) {
1545 printk(KERN_ERR "md: failed to register integrity for %s\n",
1546 mdname(mddev));
1547 return -EINVAL;
1548 }
1549 printk(KERN_NOTICE "md: data integrity on %s enabled\n",
1550 mdname(mddev));
1551 return 0;
1552}
1553EXPORT_SYMBOL(md_integrity_register);
1554
1555/* Disable data integrity if non-capable/non-matching disk is being added */
1556void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
1491{ 1557{
1492 struct mdk_personality *pers = mddev->pers;
1493 struct gendisk *disk = mddev->gendisk;
1494 struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); 1558 struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
1495 struct blk_integrity *bi_mddev = blk_get_integrity(disk); 1559 struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk);
1496 1560
1497 /* Data integrity passthrough not supported on RAID 4, 5 and 6 */ 1561 if (!bi_mddev) /* nothing to do */
1498 if (pers && pers->level >= 4 && pers->level <= 6)
1499 return; 1562 return;
1500 1563 if (rdev->raid_disk < 0) /* skip spares */
1501 /* If rdev is integrity capable, register profile for mddev */
1502 if (!bi_mddev && bi_rdev) {
1503 if (blk_integrity_register(disk, bi_rdev))
1504 printk(KERN_ERR "%s: %s Could not register integrity!\n",
1505 __func__, disk->disk_name);
1506 else
1507 printk(KERN_NOTICE "Enabling data integrity on %s\n",
1508 disk->disk_name);
1509 return; 1564 return;
1510 } 1565 if (bi_rdev && blk_integrity_compare(mddev->gendisk,
1511 1566 rdev->bdev->bd_disk) >= 0)
1512 /* Check that mddev and rdev have matching profiles */ 1567 return;
1513 if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { 1568 printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
1514 printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, 1569 blk_integrity_unregister(mddev->gendisk);
1515 disk->disk_name, rdev->bdev->bd_disk->disk_name);
1516 printk(KERN_NOTICE "Disabling data integrity on %s\n",
1517 disk->disk_name);
1518 blk_integrity_unregister(disk);
1519 }
1520} 1570}
1571EXPORT_SYMBOL(md_integrity_add_rdev);
1521 1572
1522static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) 1573static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1523{ 1574{
@@ -1591,7 +1642,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
1591 /* May as well allow recovery to be retried once */ 1642 /* May as well allow recovery to be retried once */
1592 mddev->recovery_disabled = 0; 1643 mddev->recovery_disabled = 0;
1593 1644
1594 md_integrity_check(rdev, mddev);
1595 return 0; 1645 return 0;
1596 1646
1597 fail: 1647 fail:
@@ -1756,9 +1806,10 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
1756 __u8 *uuid; 1806 __u8 *uuid;
1757 1807
1758 uuid = sb->set_uuid; 1808 uuid = sb->set_uuid;
1759 printk(KERN_INFO "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" 1809 printk(KERN_INFO
1760 ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" 1810 "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x"
1761 KERN_INFO "md: Name: \"%s\" CT:%llu\n", 1811 ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n"
1812 "md: Name: \"%s\" CT:%llu\n",
1762 le32_to_cpu(sb->major_version), 1813 le32_to_cpu(sb->major_version),
1763 le32_to_cpu(sb->feature_map), 1814 le32_to_cpu(sb->feature_map),
1764 uuid[0], uuid[1], uuid[2], uuid[3], 1815 uuid[0], uuid[1], uuid[2], uuid[3],
@@ -1770,12 +1821,13 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
1770 & MD_SUPERBLOCK_1_TIME_SEC_MASK); 1821 & MD_SUPERBLOCK_1_TIME_SEC_MASK);
1771 1822
1772 uuid = sb->device_uuid; 1823 uuid = sb->device_uuid;
1773 printk(KERN_INFO "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" 1824 printk(KERN_INFO
1825 "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
1774 " RO:%llu\n" 1826 " RO:%llu\n"
1775 KERN_INFO "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" 1827 "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x"
1776 ":%02x%02x%02x%02x%02x%02x\n" 1828 ":%02x%02x%02x%02x%02x%02x\n"
1777 KERN_INFO "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" 1829 "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
1778 KERN_INFO "md: (MaxDev:%u) \n", 1830 "md: (MaxDev:%u) \n",
1779 le32_to_cpu(sb->level), 1831 le32_to_cpu(sb->level),
1780 (unsigned long long)le64_to_cpu(sb->size), 1832 (unsigned long long)le64_to_cpu(sb->size),
1781 le32_to_cpu(sb->raid_disks), 1833 le32_to_cpu(sb->raid_disks),
@@ -1923,17 +1975,14 @@ repeat:
1923 /* otherwise we have to go forward and ... */ 1975 /* otherwise we have to go forward and ... */
1924 mddev->events ++; 1976 mddev->events ++;
1925 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ 1977 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
1926 /* .. if the array isn't clean, insist on an odd 'events' */ 1978 /* .. if the array isn't clean, an 'even' event must also go
1927 if ((mddev->events&1)==0) { 1979 * to spares. */
1928 mddev->events++; 1980 if ((mddev->events&1)==0)
1929 nospares = 0; 1981 nospares = 0;
1930 }
1931 } else { 1982 } else {
1932 /* otherwise insist on an even 'events' (for clean states) */ 1983 /* otherwise an 'odd' event must go to spares */
1933 if ((mddev->events&1)) { 1984 if ((mddev->events&1))
1934 mddev->events++;
1935 nospares = 0; 1985 nospares = 0;
1936 }
1937 } 1986 }
1938 } 1987 }
1939 1988
@@ -2655,6 +2704,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
2655 ssize_t rv = len; 2704 ssize_t rv = len;
2656 struct mdk_personality *pers; 2705 struct mdk_personality *pers;
2657 void *priv; 2706 void *priv;
2707 mdk_rdev_t *rdev;
2658 2708
2659 if (mddev->pers == NULL) { 2709 if (mddev->pers == NULL) {
2660 if (len == 0) 2710 if (len == 0)
@@ -2734,6 +2784,12 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
2734 mddev_suspend(mddev); 2784 mddev_suspend(mddev);
2735 mddev->pers->stop(mddev); 2785 mddev->pers->stop(mddev);
2736 module_put(mddev->pers->owner); 2786 module_put(mddev->pers->owner);
2787 /* Invalidate devices that are now superfluous */
2788 list_for_each_entry(rdev, &mddev->disks, same_set)
2789 if (rdev->raid_disk >= mddev->raid_disks) {
2790 rdev->raid_disk = -1;
2791 clear_bit(In_sync, &rdev->flags);
2792 }
2737 mddev->pers = pers; 2793 mddev->pers = pers;
2738 mddev->private = priv; 2794 mddev->private = priv;
2739 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); 2795 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
@@ -3543,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
3543 if (max < mddev->resync_min) 3599 if (max < mddev->resync_min)
3544 return -EINVAL; 3600 return -EINVAL;
3545 if (max < mddev->resync_max && 3601 if (max < mddev->resync_max &&
3602 mddev->ro == 0 &&
3546 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 3603 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3547 return -EBUSY; 3604 return -EBUSY;
3548 3605
@@ -3573,7 +3630,8 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
3573 char *e; 3630 char *e;
3574 unsigned long long new = simple_strtoull(buf, &e, 10); 3631 unsigned long long new = simple_strtoull(buf, &e, 10);
3575 3632
3576 if (mddev->pers->quiesce == NULL) 3633 if (mddev->pers == NULL ||
3634 mddev->pers->quiesce == NULL)
3577 return -EINVAL; 3635 return -EINVAL;
3578 if (buf == e || (*e && *e != '\n')) 3636 if (buf == e || (*e && *e != '\n'))
3579 return -EINVAL; 3637 return -EINVAL;
@@ -3601,7 +3659,8 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
3601 char *e; 3659 char *e;
3602 unsigned long long new = simple_strtoull(buf, &e, 10); 3660 unsigned long long new = simple_strtoull(buf, &e, 10);
3603 3661
3604 if (mddev->pers->quiesce == NULL) 3662 if (mddev->pers == NULL ||
3663 mddev->pers->quiesce == NULL)
3605 return -EINVAL; 3664 return -EINVAL;
3606 if (buf == e || (*e && *e != '\n')) 3665 if (buf == e || (*e && *e != '\n'))
3607 return -EINVAL; 3666 return -EINVAL;
@@ -3681,17 +3740,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)
3681 3740
3682 mddev->array_sectors = sectors; 3741 mddev->array_sectors = sectors;
3683 set_capacity(mddev->gendisk, mddev->array_sectors); 3742 set_capacity(mddev->gendisk, mddev->array_sectors);
3684 if (mddev->pers) { 3743 if (mddev->pers)
3685 struct block_device *bdev = bdget_disk(mddev->gendisk, 0); 3744 revalidate_disk(mddev->gendisk);
3686
3687 if (bdev) {
3688 mutex_lock(&bdev->bd_inode->i_mutex);
3689 i_size_write(bdev->bd_inode,
3690 (loff_t)mddev->array_sectors << 9);
3691 mutex_unlock(&bdev->bd_inode->i_mutex);
3692 bdput(bdev);
3693 }
3694 }
3695 3745
3696 return len; 3746 return len;
3697} 3747}
@@ -3844,11 +3894,9 @@ static int md_alloc(dev_t dev, char *name)
3844 flush_scheduled_work(); 3894 flush_scheduled_work();
3845 3895
3846 mutex_lock(&disks_mutex); 3896 mutex_lock(&disks_mutex);
3847 if (mddev->gendisk) { 3897 error = -EEXIST;
3848 mutex_unlock(&disks_mutex); 3898 if (mddev->gendisk)
3849 mddev_put(mddev); 3899 goto abort;
3850 return -EEXIST;
3851 }
3852 3900
3853 if (name) { 3901 if (name) {
3854 /* Need to ensure that 'name' is not a duplicate. 3902 /* Need to ensure that 'name' is not a duplicate.
@@ -3860,17 +3908,15 @@ static int md_alloc(dev_t dev, char *name)
3860 if (mddev2->gendisk && 3908 if (mddev2->gendisk &&
3861 strcmp(mddev2->gendisk->disk_name, name) == 0) { 3909 strcmp(mddev2->gendisk->disk_name, name) == 0) {
3862 spin_unlock(&all_mddevs_lock); 3910 spin_unlock(&all_mddevs_lock);
3863 return -EEXIST; 3911 goto abort;
3864 } 3912 }
3865 spin_unlock(&all_mddevs_lock); 3913 spin_unlock(&all_mddevs_lock);
3866 } 3914 }
3867 3915
3916 error = -ENOMEM;
3868 mddev->queue = blk_alloc_queue(GFP_KERNEL); 3917 mddev->queue = blk_alloc_queue(GFP_KERNEL);
3869 if (!mddev->queue) { 3918 if (!mddev->queue)
3870 mutex_unlock(&disks_mutex); 3919 goto abort;
3871 mddev_put(mddev);
3872 return -ENOMEM;
3873 }
3874 mddev->queue->queuedata = mddev; 3920 mddev->queue->queuedata = mddev;
3875 3921
3876 /* Can be unlocked because the queue is new: no concurrency */ 3922 /* Can be unlocked because the queue is new: no concurrency */
@@ -3880,11 +3926,9 @@ static int md_alloc(dev_t dev, char *name)
3880 3926
3881 disk = alloc_disk(1 << shift); 3927 disk = alloc_disk(1 << shift);
3882 if (!disk) { 3928 if (!disk) {
3883 mutex_unlock(&disks_mutex);
3884 blk_cleanup_queue(mddev->queue); 3929 blk_cleanup_queue(mddev->queue);
3885 mddev->queue = NULL; 3930 mddev->queue = NULL;
3886 mddev_put(mddev); 3931 goto abort;
3887 return -ENOMEM;
3888 } 3932 }
3889 disk->major = MAJOR(mddev->unit); 3933 disk->major = MAJOR(mddev->unit);
3890 disk->first_minor = unit << shift; 3934 disk->first_minor = unit << shift;
@@ -3906,16 +3950,22 @@ static int md_alloc(dev_t dev, char *name)
3906 mddev->gendisk = disk; 3950 mddev->gendisk = disk;
3907 error = kobject_init_and_add(&mddev->kobj, &md_ktype, 3951 error = kobject_init_and_add(&mddev->kobj, &md_ktype,
3908 &disk_to_dev(disk)->kobj, "%s", "md"); 3952 &disk_to_dev(disk)->kobj, "%s", "md");
3909 mutex_unlock(&disks_mutex); 3953 if (error) {
3910 if (error) 3954 /* This isn't possible, but as kobject_init_and_add is marked
3955 * __must_check, we must do something with the result
3956 */
3911 printk(KERN_WARNING "md: cannot register %s/md - name in use\n", 3957 printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
3912 disk->disk_name); 3958 disk->disk_name);
3913 else { 3959 error = 0;
3960 }
3961 abort:
3962 mutex_unlock(&disks_mutex);
3963 if (!error) {
3914 kobject_uevent(&mddev->kobj, KOBJ_ADD); 3964 kobject_uevent(&mddev->kobj, KOBJ_ADD);
3915 mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); 3965 mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
3916 } 3966 }
3917 mddev_put(mddev); 3967 mddev_put(mddev);
3918 return 0; 3968 return error;
3919} 3969}
3920 3970
3921static struct kobject *md_probe(dev_t dev, int *part, void *data) 3971static struct kobject *md_probe(dev_t dev, int *part, void *data)
@@ -4044,10 +4094,6 @@ static int do_md_run(mddev_t * mddev)
4044 } 4094 }
4045 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); 4095 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
4046 4096
4047 if (pers->level >= 4 && pers->level <= 6)
4048 /* Cannot support integrity (yet) */
4049 blk_integrity_unregister(mddev->gendisk);
4050
4051 if (mddev->reshape_position != MaxSector && 4097 if (mddev->reshape_position != MaxSector &&
4052 pers->start_reshape == NULL) { 4098 pers->start_reshape == NULL) {
4053 /* This personality cannot handle reshaping... */ 4099 /* This personality cannot handle reshaping... */
@@ -4185,6 +4231,7 @@ static int do_md_run(mddev_t * mddev)
4185 md_wakeup_thread(mddev->thread); 4231 md_wakeup_thread(mddev->thread);
4186 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ 4232 md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
4187 4233
4234 revalidate_disk(mddev->gendisk);
4188 mddev->changed = 1; 4235 mddev->changed = 1;
4189 md_new_event(mddev); 4236 md_new_event(mddev);
4190 sysfs_notify_dirent(mddev->sysfs_state); 4237 sysfs_notify_dirent(mddev->sysfs_state);
@@ -4256,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4256 struct gendisk *disk = mddev->gendisk; 4303 struct gendisk *disk = mddev->gendisk;
4257 mdk_rdev_t *rdev; 4304 mdk_rdev_t *rdev;
4258 4305
4306 mutex_lock(&mddev->open_mutex);
4259 if (atomic_read(&mddev->openers) > is_open) { 4307 if (atomic_read(&mddev->openers) > is_open) {
4260 printk("md: %s still in use.\n",mdname(mddev)); 4308 printk("md: %s still in use.\n",mdname(mddev));
4261 return -EBUSY; 4309 err = -EBUSY;
4262 } 4310 } else if (mddev->pers) {
4263
4264 if (mddev->pers) {
4265 4311
4266 if (mddev->sync_thread) { 4312 if (mddev->sync_thread) {
4267 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4313 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4318,8 +4364,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4318 if (mode == 1) 4364 if (mode == 1)
4319 set_disk_ro(disk, 1); 4365 set_disk_ro(disk, 1);
4320 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4366 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4367 err = 0;
4321 } 4368 }
4322 4369out:
4370 mutex_unlock(&mddev->open_mutex);
4371 if (err)
4372 return err;
4323 /* 4373 /*
4324 * Free resources if final stop 4374 * Free resources if final stop
4325 */ 4375 */
@@ -4385,7 +4435,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4385 blk_integrity_unregister(disk); 4435 blk_integrity_unregister(disk);
4386 md_new_event(mddev); 4436 md_new_event(mddev);
4387 sysfs_notify_dirent(mddev->sysfs_state); 4437 sysfs_notify_dirent(mddev->sysfs_state);
4388out:
4389 return err; 4438 return err;
4390} 4439}
4391 4440
@@ -5083,18 +5132,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
5083 return -ENOSPC; 5132 return -ENOSPC;
5084 } 5133 }
5085 rv = mddev->pers->resize(mddev, num_sectors); 5134 rv = mddev->pers->resize(mddev, num_sectors);
5086 if (!rv) { 5135 if (!rv)
5087 struct block_device *bdev; 5136 revalidate_disk(mddev->gendisk);
5088
5089 bdev = bdget_disk(mddev->gendisk, 0);
5090 if (bdev) {
5091 mutex_lock(&bdev->bd_inode->i_mutex);
5092 i_size_write(bdev->bd_inode,
5093 (loff_t)mddev->array_sectors << 9);
5094 mutex_unlock(&bdev->bd_inode->i_mutex);
5095 bdput(bdev);
5096 }
5097 }
5098 return rv; 5137 return rv;
5099} 5138}
5100 5139
@@ -5480,12 +5519,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
5480 } 5519 }
5481 BUG_ON(mddev != bdev->bd_disk->private_data); 5520 BUG_ON(mddev != bdev->bd_disk->private_data);
5482 5521
5483 if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) 5522 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
5484 goto out; 5523 goto out;
5485 5524
5486 err = 0; 5525 err = 0;
5487 atomic_inc(&mddev->openers); 5526 atomic_inc(&mddev->openers);
5488 mddev_unlock(mddev); 5527 mutex_unlock(&mddev->open_mutex);
5489 5528
5490 check_disk_change(bdev); 5529 check_disk_change(bdev);
5491 out: 5530 out:
@@ -6334,10 +6373,16 @@ void md_do_sync(mddev_t *mddev)
6334 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 6373 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
6335 } 6374 }
6336 6375
6337 if (j >= mddev->resync_max) 6376 while (j >= mddev->resync_max && !kthread_should_stop()) {
6338 wait_event(mddev->recovery_wait, 6377 /* As this condition is controlled by user-space,
6339 mddev->resync_max > j 6378 * we can block indefinitely, so use '_interruptible'
6340 || kthread_should_stop()); 6379 * to avoid triggering warnings.
6380 */
6381 flush_signals(current); /* just in case */
6382 wait_event_interruptible(mddev->recovery_wait,
6383 mddev->resync_max > j
6384 || kthread_should_stop());
6385 }
6341 6386
6342 if (kthread_should_stop()) 6387 if (kthread_should_stop())
6343 goto interrupted; 6388 goto interrupted;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 9430a110db93..f8fc188bc762 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -223,6 +223,16 @@ struct mddev_s
223 * so we don't loop trying */ 223 * so we don't loop trying */
224 224
225 int in_sync; /* know to not need resync */ 225 int in_sync; /* know to not need resync */
226 /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
227 * that we are never stopping an array while it is open.
228 * 'reconfig_mutex' protects all other reconfiguration.
229 * These locks are separate due to conflicting interactions
230 * with bdev->bd_mutex.
231 * Lock ordering is:
232 * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
233 * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
234 */
235 struct mutex open_mutex;
226 struct mutex reconfig_mutex; 236 struct mutex reconfig_mutex;
227 atomic_t active; /* general refcount */ 237 atomic_t active; /* general refcount */
228 atomic_t openers; /* number of active opens */ 238 atomic_t openers; /* number of active opens */
@@ -431,5 +441,7 @@ extern int md_allow_write(mddev_t *mddev);
431extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); 441extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
432extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); 442extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
433extern int md_check_no_bitmap(mddev_t *mddev); 443extern int md_check_no_bitmap(mddev_t *mddev);
444extern int md_integrity_register(mddev_t *mddev);
445void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
434 446
435#endif /* _MD_MD_H */ 447#endif /* _MD_MD_H */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index cbe368fa6598..7140909f6662 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -294,7 +294,8 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
294 for (path = first; path <= last; path++) 294 for (path = first; path <= last; path++)
295 if ((p=conf->multipaths+path)->rdev == NULL) { 295 if ((p=conf->multipaths+path)->rdev == NULL) {
296 q = rdev->bdev->bd_disk->queue; 296 q = rdev->bdev->bd_disk->queue;
297 blk_queue_stack_limits(mddev->queue, q); 297 disk_stack_limits(mddev->gendisk, rdev->bdev,
298 rdev->data_offset << 9);
298 299
299 /* as we don't honour merge_bvec_fn, we must never risk 300 /* as we don't honour merge_bvec_fn, we must never risk
300 * violating it, so limit ->max_sector to one PAGE, as 301 * violating it, so limit ->max_sector to one PAGE, as
@@ -312,6 +313,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
312 set_bit(In_sync, &rdev->flags); 313 set_bit(In_sync, &rdev->flags);
313 rcu_assign_pointer(p->rdev, rdev); 314 rcu_assign_pointer(p->rdev, rdev);
314 err = 0; 315 err = 0;
316 md_integrity_add_rdev(rdev, mddev);
315 break; 317 break;
316 } 318 }
317 319
@@ -344,7 +346,9 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
344 /* lost the race, try later */ 346 /* lost the race, try later */
345 err = -EBUSY; 347 err = -EBUSY;
346 p->rdev = rdev; 348 p->rdev = rdev;
349 goto abort;
347 } 350 }
351 md_integrity_register(mddev);
348 } 352 }
349abort: 353abort:
350 354
@@ -463,9 +467,9 @@ static int multipath_run (mddev_t *mddev)
463 467
464 disk = conf->multipaths + disk_idx; 468 disk = conf->multipaths + disk_idx;
465 disk->rdev = rdev; 469 disk->rdev = rdev;
470 disk_stack_limits(mddev->gendisk, rdev->bdev,
471 rdev->data_offset << 9);
466 472
467 blk_queue_stack_limits(mddev->queue,
468 rdev->bdev->bd_disk->queue);
469 /* as we don't honour merge_bvec_fn, we must never risk 473 /* as we don't honour merge_bvec_fn, we must never risk
470 * violating it, not that we ever expect a device with 474 * violating it, not that we ever expect a device with
471 * a merge_bvec_fn to be involved in multipath */ 475 * a merge_bvec_fn to be involved in multipath */
@@ -518,7 +522,7 @@ static int multipath_run (mddev_t *mddev)
518 mddev->queue->unplug_fn = multipath_unplug; 522 mddev->queue->unplug_fn = multipath_unplug;
519 mddev->queue->backing_dev_info.congested_fn = multipath_congested; 523 mddev->queue->backing_dev_info.congested_fn = multipath_congested;
520 mddev->queue->backing_dev_info.congested_data = mddev; 524 mddev->queue->backing_dev_info.congested_data = mddev;
521 525 md_integrity_register(mddev);
522 return 0; 526 return 0;
523 527
524out_free_conf: 528out_free_conf:
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index ab4a489d8695..898e2bdfee47 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -170,8 +170,8 @@ static int create_strip_zones(mddev_t *mddev)
170 } 170 }
171 dev[j] = rdev1; 171 dev[j] = rdev1;
172 172
173 blk_queue_stack_limits(mddev->queue, 173 disk_stack_limits(mddev->gendisk, rdev1->bdev,
174 rdev1->bdev->bd_disk->queue); 174 rdev1->data_offset << 9);
175 /* as we don't honour merge_bvec_fn, we must never risk 175 /* as we don't honour merge_bvec_fn, we must never risk
176 * violating it, so limit ->max_sector to one PAGE, as 176 * violating it, so limit ->max_sector to one PAGE, as
177 * a one page request is never in violation. 177 * a one page request is never in violation.
@@ -250,6 +250,11 @@ static int create_strip_zones(mddev_t *mddev)
250 mddev->chunk_sectors << 9); 250 mddev->chunk_sectors << 9);
251 goto abort; 251 goto abort;
252 } 252 }
253
254 blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
255 blk_queue_io_opt(mddev->queue,
256 (mddev->chunk_sectors << 9) * mddev->raid_disks);
257
253 printk(KERN_INFO "raid0: done.\n"); 258 printk(KERN_INFO "raid0: done.\n");
254 mddev->private = conf; 259 mddev->private = conf;
255 return 0; 260 return 0;
@@ -346,6 +351,7 @@ static int raid0_run(mddev_t *mddev)
346 351
347 blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); 352 blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
348 dump_zones(mddev); 353 dump_zones(mddev);
354 md_integrity_register(mddev);
349 return 0; 355 return 0;
350} 356}
351 357
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 89939a7aef57..8726fd7ebce5 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1123,8 +1123,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
1123 for (mirror = first; mirror <= last; mirror++) 1123 for (mirror = first; mirror <= last; mirror++)
1124 if ( !(p=conf->mirrors+mirror)->rdev) { 1124 if ( !(p=conf->mirrors+mirror)->rdev) {
1125 1125
1126 blk_queue_stack_limits(mddev->queue, 1126 disk_stack_limits(mddev->gendisk, rdev->bdev,
1127 rdev->bdev->bd_disk->queue); 1127 rdev->data_offset << 9);
1128 /* as we don't honour merge_bvec_fn, we must never risk 1128 /* as we don't honour merge_bvec_fn, we must never risk
1129 * violating it, so limit ->max_sector to one PAGE, as 1129 * violating it, so limit ->max_sector to one PAGE, as
1130 * a one page request is never in violation. 1130 * a one page request is never in violation.
@@ -1144,7 +1144,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
1144 rcu_assign_pointer(p->rdev, rdev); 1144 rcu_assign_pointer(p->rdev, rdev);
1145 break; 1145 break;
1146 } 1146 }
1147 1147 md_integrity_add_rdev(rdev, mddev);
1148 print_conf(conf); 1148 print_conf(conf);
1149 return err; 1149 return err;
1150} 1150}
@@ -1178,7 +1178,9 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
1178 /* lost the race, try later */ 1178 /* lost the race, try later */
1179 err = -EBUSY; 1179 err = -EBUSY;
1180 p->rdev = rdev; 1180 p->rdev = rdev;
1181 goto abort;
1181 } 1182 }
1183 md_integrity_register(mddev);
1182 } 1184 }
1183abort: 1185abort:
1184 1186
@@ -1988,9 +1990,8 @@ static int run(mddev_t *mddev)
1988 disk = conf->mirrors + disk_idx; 1990 disk = conf->mirrors + disk_idx;
1989 1991
1990 disk->rdev = rdev; 1992 disk->rdev = rdev;
1991 1993 disk_stack_limits(mddev->gendisk, rdev->bdev,
1992 blk_queue_stack_limits(mddev->queue, 1994 rdev->data_offset << 9);
1993 rdev->bdev->bd_disk->queue);
1994 /* as we don't honour merge_bvec_fn, we must never risk 1995 /* as we don't honour merge_bvec_fn, we must never risk
1995 * violating it, so limit ->max_sector to one PAGE, as 1996 * violating it, so limit ->max_sector to one PAGE, as
1996 * a one page request is never in violation. 1997 * a one page request is never in violation.
@@ -2068,7 +2069,7 @@ static int run(mddev_t *mddev)
2068 mddev->queue->unplug_fn = raid1_unplug; 2069 mddev->queue->unplug_fn = raid1_unplug;
2069 mddev->queue->backing_dev_info.congested_fn = raid1_congested; 2070 mddev->queue->backing_dev_info.congested_fn = raid1_congested;
2070 mddev->queue->backing_dev_info.congested_data = mddev; 2071 mddev->queue->backing_dev_info.congested_data = mddev;
2071 2072 md_integrity_register(mddev);
2072 return 0; 2073 return 0;
2073 2074
2074out_no_mem: 2075out_no_mem:
@@ -2133,6 +2134,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors)
2133 return -EINVAL; 2134 return -EINVAL;
2134 set_capacity(mddev->gendisk, mddev->array_sectors); 2135 set_capacity(mddev->gendisk, mddev->array_sectors);
2135 mddev->changed = 1; 2136 mddev->changed = 1;
2137 revalidate_disk(mddev->gendisk);
2136 if (sectors > mddev->dev_sectors && 2138 if (sectors > mddev->dev_sectors &&
2137 mddev->recovery_cp == MaxSector) { 2139 mddev->recovery_cp == MaxSector) {
2138 mddev->recovery_cp = mddev->dev_sectors; 2140 mddev->recovery_cp = mddev->dev_sectors;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index ae12ceafe10c..3d9020cf6f6e 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1151,8 +1151,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
1151 for ( ; mirror <= last ; mirror++) 1151 for ( ; mirror <= last ; mirror++)
1152 if ( !(p=conf->mirrors+mirror)->rdev) { 1152 if ( !(p=conf->mirrors+mirror)->rdev) {
1153 1153
1154 blk_queue_stack_limits(mddev->queue, 1154 disk_stack_limits(mddev->gendisk, rdev->bdev,
1155 rdev->bdev->bd_disk->queue); 1155 rdev->data_offset << 9);
1156 /* as we don't honour merge_bvec_fn, we must never risk 1156 /* as we don't honour merge_bvec_fn, we must never risk
1157 * violating it, so limit ->max_sector to one PAGE, as 1157 * violating it, so limit ->max_sector to one PAGE, as
1158 * a one page request is never in violation. 1158 * a one page request is never in violation.
@@ -1170,6 +1170,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
1170 break; 1170 break;
1171 } 1171 }
1172 1172
1173 md_integrity_add_rdev(rdev, mddev);
1173 print_conf(conf); 1174 print_conf(conf);
1174 return err; 1175 return err;
1175} 1176}
@@ -1203,7 +1204,9 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
1203 /* lost the race, try later */ 1204 /* lost the race, try later */
1204 err = -EBUSY; 1205 err = -EBUSY;
1205 p->rdev = rdev; 1206 p->rdev = rdev;
1207 goto abort;
1206 } 1208 }
1209 md_integrity_register(mddev);
1207 } 1210 }
1208abort: 1211abort:
1209 1212
@@ -2044,7 +2047,7 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks)
2044static int run(mddev_t *mddev) 2047static int run(mddev_t *mddev)
2045{ 2048{
2046 conf_t *conf; 2049 conf_t *conf;
2047 int i, disk_idx; 2050 int i, disk_idx, chunk_size;
2048 mirror_info_t *disk; 2051 mirror_info_t *disk;
2049 mdk_rdev_t *rdev; 2052 mdk_rdev_t *rdev;
2050 int nc, fc, fo; 2053 int nc, fc, fo;
@@ -2130,6 +2133,14 @@ static int run(mddev_t *mddev)
2130 spin_lock_init(&conf->device_lock); 2133 spin_lock_init(&conf->device_lock);
2131 mddev->queue->queue_lock = &conf->device_lock; 2134 mddev->queue->queue_lock = &conf->device_lock;
2132 2135
2136 chunk_size = mddev->chunk_sectors << 9;
2137 blk_queue_io_min(mddev->queue, chunk_size);
2138 if (conf->raid_disks % conf->near_copies)
2139 blk_queue_io_opt(mddev->queue, chunk_size * conf->raid_disks);
2140 else
2141 blk_queue_io_opt(mddev->queue, chunk_size *
2142 (conf->raid_disks / conf->near_copies));
2143
2133 list_for_each_entry(rdev, &mddev->disks, same_set) { 2144 list_for_each_entry(rdev, &mddev->disks, same_set) {
2134 disk_idx = rdev->raid_disk; 2145 disk_idx = rdev->raid_disk;
2135 if (disk_idx >= mddev->raid_disks 2146 if (disk_idx >= mddev->raid_disks
@@ -2138,9 +2149,8 @@ static int run(mddev_t *mddev)
2138 disk = conf->mirrors + disk_idx; 2149 disk = conf->mirrors + disk_idx;
2139 2150
2140 disk->rdev = rdev; 2151 disk->rdev = rdev;
2141 2152 disk_stack_limits(mddev->gendisk, rdev->bdev,
2142 blk_queue_stack_limits(mddev->queue, 2153 rdev->data_offset << 9);
2143 rdev->bdev->bd_disk->queue);
2144 /* as we don't honour merge_bvec_fn, we must never risk 2154 /* as we don't honour merge_bvec_fn, we must never risk
2145 * violating it, so limit ->max_sector to one PAGE, as 2155 * violating it, so limit ->max_sector to one PAGE, as
2146 * a one page request is never in violation. 2156 * a one page request is never in violation.
@@ -2218,6 +2228,7 @@ static int run(mddev_t *mddev)
2218 2228
2219 if (conf->near_copies < mddev->raid_disks) 2229 if (conf->near_copies < mddev->raid_disks)
2220 blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); 2230 blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec);
2231 md_integrity_register(mddev);
2221 return 0; 2232 return 0;
2222 2233
2223out_free_conf: 2234out_free_conf:
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index cac6f4d3a143..9b00a229015a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3911,13 +3911,21 @@ static int make_request(struct request_queue *q, struct bio * bi)
3911 goto retry; 3911 goto retry;
3912 } 3912 }
3913 } 3913 }
3914 /* FIXME what if we get a false positive because these 3914
3915 * are being updated. 3915 if (bio_data_dir(bi) == WRITE &&
3916 */ 3916 logical_sector >= mddev->suspend_lo &&
3917 if (logical_sector >= mddev->suspend_lo &&
3918 logical_sector < mddev->suspend_hi) { 3917 logical_sector < mddev->suspend_hi) {
3919 release_stripe(sh); 3918 release_stripe(sh);
3920 schedule(); 3919 /* As the suspend_* range is controlled by
3920 * userspace, we want an interruptible
3921 * wait.
3922 */
3923 flush_signals(current);
3924 prepare_to_wait(&conf->wait_for_overlap,
3925 &w, TASK_INTERRUPTIBLE);
3926 if (logical_sector >= mddev->suspend_lo &&
3927 logical_sector < mddev->suspend_hi)
3928 schedule();
3921 goto retry; 3929 goto retry;
3922 } 3930 }
3923 3931
@@ -3989,7 +3997,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3989 conf->reshape_progress < raid5_size(mddev, 0, 0)) { 3997 conf->reshape_progress < raid5_size(mddev, 0, 0)) {
3990 sector_nr = raid5_size(mddev, 0, 0) 3998 sector_nr = raid5_size(mddev, 0, 0)
3991 - conf->reshape_progress; 3999 - conf->reshape_progress;
3992 } else if (mddev->delta_disks > 0 && 4000 } else if (mddev->delta_disks >= 0 &&
3993 conf->reshape_progress > 0) 4001 conf->reshape_progress > 0)
3994 sector_nr = conf->reshape_progress; 4002 sector_nr = conf->reshape_progress;
3995 sector_div(sector_nr, new_data_disks); 4003 sector_div(sector_nr, new_data_disks);
@@ -4203,6 +4211,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
4203 return 0; 4211 return 0;
4204 } 4212 }
4205 4213
4214 /* Allow raid5_quiesce to complete */
4215 wait_event(conf->wait_for_overlap, conf->quiesce != 2);
4216
4206 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 4217 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
4207 return reshape_request(mddev, sector_nr, skipped); 4218 return reshape_request(mddev, sector_nr, skipped);
4208 4219
@@ -4803,7 +4814,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev)
4803static int run(mddev_t *mddev) 4814static int run(mddev_t *mddev)
4804{ 4815{
4805 raid5_conf_t *conf; 4816 raid5_conf_t *conf;
4806 int working_disks = 0; 4817 int working_disks = 0, chunk_size;
4807 mdk_rdev_t *rdev; 4818 mdk_rdev_t *rdev;
4808 4819
4809 if (mddev->recovery_cp != MaxSector) 4820 if (mddev->recovery_cp != MaxSector)
@@ -4844,7 +4855,26 @@ static int run(mddev_t *mddev)
4844 (old_disks-max_degraded)); 4855 (old_disks-max_degraded));
4845 /* here_old is the first stripe that we might need to read 4856 /* here_old is the first stripe that we might need to read
4846 * from */ 4857 * from */
4847 if (here_new >= here_old) { 4858 if (mddev->delta_disks == 0) {
4859 /* We cannot be sure it is safe to start an in-place
4860 * reshape. It is only safe if user-space if monitoring
4861 * and taking constant backups.
4862 * mdadm always starts a situation like this in
4863 * readonly mode so it can take control before
4864 * allowing any writes. So just check for that.
4865 */
4866 if ((here_new * mddev->new_chunk_sectors !=
4867 here_old * mddev->chunk_sectors) ||
4868 mddev->ro == 0) {
4869 printk(KERN_ERR "raid5: in-place reshape must be started"
4870 " in read-only mode - aborting\n");
4871 return -EINVAL;
4872 }
4873 } else if (mddev->delta_disks < 0
4874 ? (here_new * mddev->new_chunk_sectors <=
4875 here_old * mddev->chunk_sectors)
4876 : (here_new * mddev->new_chunk_sectors >=
4877 here_old * mddev->chunk_sectors)) {
4848 /* Reading from the same stripe as writing to - bad */ 4878 /* Reading from the same stripe as writing to - bad */
4849 printk(KERN_ERR "raid5: reshape_position too early for " 4879 printk(KERN_ERR "raid5: reshape_position too early for "
4850 "auto-recovery - aborting.\n"); 4880 "auto-recovery - aborting.\n");
@@ -4958,6 +4988,14 @@ static int run(mddev_t *mddev)
4958 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 4988 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
4959 4989
4960 blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); 4990 blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec);
4991 chunk_size = mddev->chunk_sectors << 9;
4992 blk_queue_io_min(mddev->queue, chunk_size);
4993 blk_queue_io_opt(mddev->queue, chunk_size *
4994 (conf->raid_disks - conf->max_degraded));
4995
4996 list_for_each_entry(rdev, &mddev->disks, same_set)
4997 disk_stack_limits(mddev->gendisk, rdev->bdev,
4998 rdev->data_offset << 9);
4961 4999
4962 return 0; 5000 return 0;
4963abort: 5001abort:
@@ -5185,6 +5223,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
5185 return -EINVAL; 5223 return -EINVAL;
5186 set_capacity(mddev->gendisk, mddev->array_sectors); 5224 set_capacity(mddev->gendisk, mddev->array_sectors);
5187 mddev->changed = 1; 5225 mddev->changed = 1;
5226 revalidate_disk(mddev->gendisk);
5188 if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { 5227 if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) {
5189 mddev->recovery_cp = mddev->dev_sectors; 5228 mddev->recovery_cp = mddev->dev_sectors;
5190 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); 5229 set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
@@ -5330,7 +5369,7 @@ static int raid5_start_reshape(mddev_t *mddev)
5330 spin_unlock_irqrestore(&conf->device_lock, flags); 5369 spin_unlock_irqrestore(&conf->device_lock, flags);
5331 } 5370 }
5332 mddev->raid_disks = conf->raid_disks; 5371 mddev->raid_disks = conf->raid_disks;
5333 mddev->reshape_position = 0; 5372 mddev->reshape_position = conf->reshape_progress;
5334 set_bit(MD_CHANGE_DEVS, &mddev->flags); 5373 set_bit(MD_CHANGE_DEVS, &mddev->flags);
5335 5374
5336 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); 5375 clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
@@ -5385,7 +5424,6 @@ static void end_reshape(raid5_conf_t *conf)
5385 */ 5424 */
5386static void raid5_finish_reshape(mddev_t *mddev) 5425static void raid5_finish_reshape(mddev_t *mddev)
5387{ 5426{
5388 struct block_device *bdev;
5389 raid5_conf_t *conf = mddev->private; 5427 raid5_conf_t *conf = mddev->private;
5390 5428
5391 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 5429 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -5394,15 +5432,7 @@ static void raid5_finish_reshape(mddev_t *mddev)
5394 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); 5432 md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
5395 set_capacity(mddev->gendisk, mddev->array_sectors); 5433 set_capacity(mddev->gendisk, mddev->array_sectors);
5396 mddev->changed = 1; 5434 mddev->changed = 1;
5397 5435 revalidate_disk(mddev->gendisk);
5398 bdev = bdget_disk(mddev->gendisk, 0);
5399 if (bdev) {
5400 mutex_lock(&bdev->bd_inode->i_mutex);
5401 i_size_write(bdev->bd_inode,
5402 (loff_t)mddev->array_sectors << 9);
5403 mutex_unlock(&bdev->bd_inode->i_mutex);
5404 bdput(bdev);
5405 }
5406 } else { 5436 } else {
5407 int d; 5437 int d;
5408 mddev->degraded = conf->raid_disks; 5438 mddev->degraded = conf->raid_disks;
@@ -5413,8 +5443,15 @@ static void raid5_finish_reshape(mddev_t *mddev)
5413 mddev->degraded--; 5443 mddev->degraded--;
5414 for (d = conf->raid_disks ; 5444 for (d = conf->raid_disks ;
5415 d < conf->raid_disks - mddev->delta_disks; 5445 d < conf->raid_disks - mddev->delta_disks;
5416 d++) 5446 d++) {
5417 raid5_remove_disk(mddev, d); 5447 mdk_rdev_t *rdev = conf->disks[d].rdev;
5448 if (rdev && raid5_remove_disk(mddev, d) == 0) {
5449 char nm[20];
5450 sprintf(nm, "rd%d", rdev->raid_disk);
5451 sysfs_remove_link(&mddev->kobj, nm);
5452 rdev->raid_disk = -1;
5453 }
5454 }
5418 } 5455 }
5419 mddev->layout = conf->algorithm; 5456 mddev->layout = conf->algorithm;
5420 mddev->chunk_sectors = conf->chunk_sectors; 5457 mddev->chunk_sectors = conf->chunk_sectors;
@@ -5434,12 +5471,18 @@ static void raid5_quiesce(mddev_t *mddev, int state)
5434 5471
5435 case 1: /* stop all writes */ 5472 case 1: /* stop all writes */
5436 spin_lock_irq(&conf->device_lock); 5473 spin_lock_irq(&conf->device_lock);
5437 conf->quiesce = 1; 5474 /* '2' tells resync/reshape to pause so that all
5475 * active stripes can drain
5476 */
5477 conf->quiesce = 2;
5438 wait_event_lock_irq(conf->wait_for_stripe, 5478 wait_event_lock_irq(conf->wait_for_stripe,
5439 atomic_read(&conf->active_stripes) == 0 && 5479 atomic_read(&conf->active_stripes) == 0 &&
5440 atomic_read(&conf->active_aligned_reads) == 0, 5480 atomic_read(&conf->active_aligned_reads) == 0,
5441 conf->device_lock, /* nothing */); 5481 conf->device_lock, /* nothing */);
5482 conf->quiesce = 1;
5442 spin_unlock_irq(&conf->device_lock); 5483 spin_unlock_irq(&conf->device_lock);
5484 /* allow reshape to continue */
5485 wake_up(&conf->wait_for_overlap);
5443 break; 5486 break;
5444 5487
5445 case 0: /* re-enable writes */ 5488 case 0: /* re-enable writes */