diff options
author | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:55:54 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:55:54 -0400 |
commit | 9134d02bc0af4a8747d448d1f811ec5f8eb96df6 (patch) | |
tree | 704c3e5dcc10f360815c4868a74711f82fb62e27 /drivers/md | |
parent | bbb20089a3275a19e475dbc21320c3742e3ca423 (diff) | |
parent | 80ffb3cceaefa405f2ecd46d66500ed8d53efe74 (diff) |
Merge commit 'md/for-linus' into async-tx-next
Conflicts:
drivers/md/raid5.c
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-crypt.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-delay.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-exception-store.c | 9 | ||||
-rw-r--r-- | drivers/md/dm-linear.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 7 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 17 | ||||
-rw-r--r-- | drivers/md/dm.c | 14 | ||||
-rw-r--r-- | drivers/md/dm.h | 1 | ||||
-rw-r--r-- | drivers/md/linear.c | 6 | ||||
-rw-r--r-- | drivers/md/md.c | 251 | ||||
-rw-r--r-- | drivers/md/md.h | 12 | ||||
-rw-r--r-- | drivers/md/multipath.c | 12 | ||||
-rw-r--r-- | drivers/md/raid0.c | 10 | ||||
-rw-r--r-- | drivers/md/raid1.c | 16 | ||||
-rw-r--r-- | drivers/md/raid10.c | 23 | ||||
-rw-r--r-- | drivers/md/raid5.c | 87 |
18 files changed, 297 insertions, 183 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9933eb861c71..ed1038164019 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -776,7 +776,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | |||
776 | * But don't wait if split was due to the io size restriction | 776 | * But don't wait if split was due to the io size restriction |
777 | */ | 777 | */ |
778 | if (unlikely(out_of_pages)) | 778 | if (unlikely(out_of_pages)) |
779 | congestion_wait(WRITE, HZ/100); | 779 | congestion_wait(BLK_RW_ASYNC, HZ/100); |
780 | 780 | ||
781 | /* | 781 | /* |
782 | * With async crypto it is unsafe to share the crypto context | 782 | * With async crypto it is unsafe to share the crypto context |
@@ -1318,7 +1318,7 @@ static int crypt_iterate_devices(struct dm_target *ti, | |||
1318 | { | 1318 | { |
1319 | struct crypt_config *cc = ti->private; | 1319 | struct crypt_config *cc = ti->private; |
1320 | 1320 | ||
1321 | return fn(ti, cc->dev, cc->start, data); | 1321 | return fn(ti, cc->dev, cc->start, ti->len, data); |
1322 | } | 1322 | } |
1323 | 1323 | ||
1324 | static struct target_type crypt_target = { | 1324 | static struct target_type crypt_target = { |
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 4e5b843cd4d7..ebe7381f47c8 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c | |||
@@ -324,12 +324,12 @@ static int delay_iterate_devices(struct dm_target *ti, | |||
324 | struct delay_c *dc = ti->private; | 324 | struct delay_c *dc = ti->private; |
325 | int ret = 0; | 325 | int ret = 0; |
326 | 326 | ||
327 | ret = fn(ti, dc->dev_read, dc->start_read, data); | 327 | ret = fn(ti, dc->dev_read, dc->start_read, ti->len, data); |
328 | if (ret) | 328 | if (ret) |
329 | goto out; | 329 | goto out; |
330 | 330 | ||
331 | if (dc->dev_write) | 331 | if (dc->dev_write) |
332 | ret = fn(ti, dc->dev_write, dc->start_write, data); | 332 | ret = fn(ti, dc->dev_write, dc->start_write, ti->len, data); |
333 | 333 | ||
334 | out: | 334 | out: |
335 | return ret; | 335 | return ret; |
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index c3ae51584b12..3710ff88fc10 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c | |||
@@ -195,7 +195,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, | |||
195 | struct dm_exception_store **store) | 195 | struct dm_exception_store **store) |
196 | { | 196 | { |
197 | int r = 0; | 197 | int r = 0; |
198 | struct dm_exception_store_type *type; | 198 | struct dm_exception_store_type *type = NULL; |
199 | struct dm_exception_store *tmp_store; | 199 | struct dm_exception_store *tmp_store; |
200 | char persistent; | 200 | char persistent; |
201 | 201 | ||
@@ -211,12 +211,15 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, | |||
211 | } | 211 | } |
212 | 212 | ||
213 | persistent = toupper(*argv[1]); | 213 | persistent = toupper(*argv[1]); |
214 | if (persistent != 'P' && persistent != 'N') { | 214 | if (persistent == 'P') |
215 | type = get_type("P"); | ||
216 | else if (persistent == 'N') | ||
217 | type = get_type("N"); | ||
218 | else { | ||
215 | ti->error = "Persistent flag is not P or N"; | 219 | ti->error = "Persistent flag is not P or N"; |
216 | return -EINVAL; | 220 | return -EINVAL; |
217 | } | 221 | } |
218 | 222 | ||
219 | type = get_type(&persistent); | ||
220 | if (!type) { | 223 | if (!type) { |
221 | ti->error = "Exception store type not recognised"; | 224 | ti->error = "Exception store type not recognised"; |
222 | r = -EINVAL; | 225 | r = -EINVAL; |
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 9184b6deb868..82f7d6e6b1ea 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c | |||
@@ -139,7 +139,7 @@ static int linear_iterate_devices(struct dm_target *ti, | |||
139 | { | 139 | { |
140 | struct linear_c *lc = ti->private; | 140 | struct linear_c *lc = ti->private; |
141 | 141 | ||
142 | return fn(ti, lc->dev, lc->start, data); | 142 | return fn(ti, lc->dev, lc->start, ti->len, data); |
143 | } | 143 | } |
144 | 144 | ||
145 | static struct target_type linear_target = { | 145 | static struct target_type linear_target = { |
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c70604a20897..6f0d90d4a541 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -1453,7 +1453,7 @@ static int multipath_iterate_devices(struct dm_target *ti, | |||
1453 | 1453 | ||
1454 | list_for_each_entry(pg, &m->priority_groups, list) { | 1454 | list_for_each_entry(pg, &m->priority_groups, list) { |
1455 | list_for_each_entry(p, &pg->pgpaths, list) { | 1455 | list_for_each_entry(p, &pg->pgpaths, list) { |
1456 | ret = fn(ti, p->path.dev, ti->begin, data); | 1456 | ret = fn(ti, p->path.dev, ti->begin, ti->len, data); |
1457 | if (ret) | 1457 | if (ret) |
1458 | goto out; | 1458 | goto out; |
1459 | } | 1459 | } |
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ce8868c768cc..9726577cde49 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
@@ -638,6 +638,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) | |||
638 | spin_lock_irq(&ms->lock); | 638 | spin_lock_irq(&ms->lock); |
639 | bio_list_merge(&ms->writes, &requeue); | 639 | bio_list_merge(&ms->writes, &requeue); |
640 | spin_unlock_irq(&ms->lock); | 640 | spin_unlock_irq(&ms->lock); |
641 | delayed_wake(ms); | ||
641 | } | 642 | } |
642 | 643 | ||
643 | /* | 644 | /* |
@@ -1292,7 +1293,7 @@ static int mirror_iterate_devices(struct dm_target *ti, | |||
1292 | 1293 | ||
1293 | for (i = 0; !ret && i < ms->nr_mirrors; i++) | 1294 | for (i = 0; !ret && i < ms->nr_mirrors; i++) |
1294 | ret = fn(ti, ms->mirror[i].dev, | 1295 | ret = fn(ti, ms->mirror[i].dev, |
1295 | ms->mirror[i].offset, data); | 1296 | ms->mirror[i].offset, ti->len, data); |
1296 | 1297 | ||
1297 | return ret; | 1298 | return ret; |
1298 | } | 1299 | } |
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index b240e85ae39a..4e0e5937e42a 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
@@ -320,10 +320,11 @@ static int stripe_iterate_devices(struct dm_target *ti, | |||
320 | int ret = 0; | 320 | int ret = 0; |
321 | unsigned i = 0; | 321 | unsigned i = 0; |
322 | 322 | ||
323 | do | 323 | do { |
324 | ret = fn(ti, sc->stripe[i].dev, | 324 | ret = fn(ti, sc->stripe[i].dev, |
325 | sc->stripe[i].physical_start, data); | 325 | sc->stripe[i].physical_start, |
326 | while (!ret && ++i < sc->stripes); | 326 | sc->stripe_width, data); |
327 | } while (!ret && ++i < sc->stripes); | ||
327 | 328 | ||
328 | return ret; | 329 | return ret; |
329 | } | 330 | } |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 4899ebe767c8..d952b3441913 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
@@ -346,7 +346,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) | |||
346 | * If possible, this checks an area of a destination device is valid. | 346 | * If possible, this checks an area of a destination device is valid. |
347 | */ | 347 | */ |
348 | static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, | 348 | static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, |
349 | sector_t start, void *data) | 349 | sector_t start, sector_t len, void *data) |
350 | { | 350 | { |
351 | struct queue_limits *limits = data; | 351 | struct queue_limits *limits = data; |
352 | struct block_device *bdev = dev->bdev; | 352 | struct block_device *bdev = dev->bdev; |
@@ -359,7 +359,7 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, | |||
359 | if (!dev_size) | 359 | if (!dev_size) |
360 | return 1; | 360 | return 1; |
361 | 361 | ||
362 | if ((start >= dev_size) || (start + ti->len > dev_size)) { | 362 | if ((start >= dev_size) || (start + len > dev_size)) { |
363 | DMWARN("%s: %s too small for target", | 363 | DMWARN("%s: %s too small for target", |
364 | dm_device_name(ti->table->md), bdevname(bdev, b)); | 364 | dm_device_name(ti->table->md), bdevname(bdev, b)); |
365 | return 0; | 365 | return 0; |
@@ -377,11 +377,11 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, | |||
377 | return 0; | 377 | return 0; |
378 | } | 378 | } |
379 | 379 | ||
380 | if (ti->len & (logical_block_size_sectors - 1)) { | 380 | if (len & (logical_block_size_sectors - 1)) { |
381 | DMWARN("%s: len=%llu not aligned to h/w " | 381 | DMWARN("%s: len=%llu not aligned to h/w " |
382 | "logical block size %hu of %s", | 382 | "logical block size %hu of %s", |
383 | dm_device_name(ti->table->md), | 383 | dm_device_name(ti->table->md), |
384 | (unsigned long long)ti->len, | 384 | (unsigned long long)len, |
385 | limits->logical_block_size, bdevname(bdev, b)); | 385 | limits->logical_block_size, bdevname(bdev, b)); |
386 | return 0; | 386 | return 0; |
387 | } | 387 | } |
@@ -482,7 +482,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, | |||
482 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) | 482 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) |
483 | 483 | ||
484 | int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, | 484 | int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, |
485 | sector_t start, void *data) | 485 | sector_t start, sector_t len, void *data) |
486 | { | 486 | { |
487 | struct queue_limits *limits = data; | 487 | struct queue_limits *limits = data; |
488 | struct block_device *bdev = dev->bdev; | 488 | struct block_device *bdev = dev->bdev; |
@@ -495,7 +495,7 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, | |||
495 | return 0; | 495 | return 0; |
496 | } | 496 | } |
497 | 497 | ||
498 | if (blk_stack_limits(limits, &q->limits, start) < 0) | 498 | if (blk_stack_limits(limits, &q->limits, start << 9) < 0) |
499 | DMWARN("%s: target device %s is misaligned", | 499 | DMWARN("%s: target device %s is misaligned", |
500 | dm_device_name(ti->table->md), bdevname(bdev, b)); | 500 | dm_device_name(ti->table->md), bdevname(bdev, b)); |
501 | 501 | ||
@@ -830,11 +830,6 @@ unsigned dm_table_get_type(struct dm_table *t) | |||
830 | return t->type; | 830 | return t->type; |
831 | } | 831 | } |
832 | 832 | ||
833 | bool dm_table_bio_based(struct dm_table *t) | ||
834 | { | ||
835 | return dm_table_get_type(t) == DM_TYPE_BIO_BASED; | ||
836 | } | ||
837 | |||
838 | bool dm_table_request_based(struct dm_table *t) | 833 | bool dm_table_request_based(struct dm_table *t) |
839 | { | 834 | { |
840 | return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED; | 835 | return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED; |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3c6d4ee8921d..8a311ea0d441 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -1017,7 +1017,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
1017 | clone->bi_flags |= 1 << BIO_CLONED; | 1017 | clone->bi_flags |= 1 << BIO_CLONED; |
1018 | 1018 | ||
1019 | if (bio_integrity(bio)) { | 1019 | if (bio_integrity(bio)) { |
1020 | bio_integrity_clone(clone, bio, GFP_NOIO); | 1020 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); |
1021 | bio_integrity_trim(clone, | 1021 | bio_integrity_trim(clone, |
1022 | bio_sector_offset(bio, idx, offset), len); | 1022 | bio_sector_offset(bio, idx, offset), len); |
1023 | } | 1023 | } |
@@ -1045,7 +1045,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
1045 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); | 1045 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); |
1046 | 1046 | ||
1047 | if (bio_integrity(bio)) { | 1047 | if (bio_integrity(bio)) { |
1048 | bio_integrity_clone(clone, bio, GFP_NOIO); | 1048 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); |
1049 | 1049 | ||
1050 | if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) | 1050 | if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) |
1051 | bio_integrity_trim(clone, | 1051 | bio_integrity_trim(clone, |
@@ -2203,16 +2203,6 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table) | |||
2203 | goto out; | 2203 | goto out; |
2204 | } | 2204 | } |
2205 | 2205 | ||
2206 | /* | ||
2207 | * It is enought that blk_queue_ordered() is called only once when | ||
2208 | * the first bio-based table is bound. | ||
2209 | * | ||
2210 | * This setting should be moved to alloc_dev() when request-based dm | ||
2211 | * supports barrier. | ||
2212 | */ | ||
2213 | if (!md->map && dm_table_bio_based(table)) | ||
2214 | blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL); | ||
2215 | |||
2216 | __unbind(md); | 2206 | __unbind(md); |
2217 | r = __bind(md, table, &limits); | 2207 | r = __bind(md, table, &limits); |
2218 | 2208 | ||
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 23278ae80f08..a7663eba17e2 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
@@ -61,7 +61,6 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits); | |||
61 | int dm_table_any_busy_target(struct dm_table *t); | 61 | int dm_table_any_busy_target(struct dm_table *t); |
62 | int dm_table_set_type(struct dm_table *t); | 62 | int dm_table_set_type(struct dm_table *t); |
63 | unsigned dm_table_get_type(struct dm_table *t); | 63 | unsigned dm_table_get_type(struct dm_table *t); |
64 | bool dm_table_bio_based(struct dm_table *t); | ||
65 | bool dm_table_request_based(struct dm_table *t); | 64 | bool dm_table_request_based(struct dm_table *t); |
66 | int dm_table_alloc_md_mempools(struct dm_table *t); | 65 | int dm_table_alloc_md_mempools(struct dm_table *t); |
67 | void dm_table_free_md_mempools(struct dm_table *t); | 66 | void dm_table_free_md_mempools(struct dm_table *t); |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 15c8b7b25a9b..5fe39c2a3d2b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
@@ -166,8 +166,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
166 | rdev->sectors = sectors * mddev->chunk_sectors; | 166 | rdev->sectors = sectors * mddev->chunk_sectors; |
167 | } | 167 | } |
168 | 168 | ||
169 | blk_queue_stack_limits(mddev->queue, | 169 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
170 | rdev->bdev->bd_disk->queue); | 170 | rdev->data_offset << 9); |
171 | /* as we don't honour merge_bvec_fn, we must never risk | 171 | /* as we don't honour merge_bvec_fn, we must never risk |
172 | * violating it, so limit ->max_sector to one PAGE, as | 172 | * violating it, so limit ->max_sector to one PAGE, as |
173 | * a one page request is never in violation. | 173 | * a one page request is never in violation. |
@@ -220,6 +220,7 @@ static int linear_run (mddev_t *mddev) | |||
220 | mddev->queue->unplug_fn = linear_unplug; | 220 | mddev->queue->unplug_fn = linear_unplug; |
221 | mddev->queue->backing_dev_info.congested_fn = linear_congested; | 221 | mddev->queue->backing_dev_info.congested_fn = linear_congested; |
222 | mddev->queue->backing_dev_info.congested_data = mddev; | 222 | mddev->queue->backing_dev_info.congested_data = mddev; |
223 | md_integrity_register(mddev); | ||
223 | return 0; | 224 | return 0; |
224 | } | 225 | } |
225 | 226 | ||
@@ -256,6 +257,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) | |||
256 | rcu_assign_pointer(mddev->private, newconf); | 257 | rcu_assign_pointer(mddev->private, newconf); |
257 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); | 258 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); |
258 | set_capacity(mddev->gendisk, mddev->array_sectors); | 259 | set_capacity(mddev->gendisk, mddev->array_sectors); |
260 | revalidate_disk(mddev->gendisk); | ||
259 | call_rcu(&oldconf->rcu, free_conf); | 261 | call_rcu(&oldconf->rcu, free_conf); |
260 | return 0; | 262 | return 0; |
261 | } | 263 | } |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 09be637d52cb..9dd872000cec 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
359 | else | 359 | else |
360 | new->md_minor = MINOR(unit) >> MdpMinorShift; | 360 | new->md_minor = MINOR(unit) >> MdpMinorShift; |
361 | 361 | ||
362 | mutex_init(&new->open_mutex); | ||
362 | mutex_init(&new->reconfig_mutex); | 363 | mutex_init(&new->reconfig_mutex); |
363 | INIT_LIST_HEAD(&new->disks); | 364 | INIT_LIST_HEAD(&new->disks); |
364 | INIT_LIST_HEAD(&new->all_mddevs); | 365 | INIT_LIST_HEAD(&new->all_mddevs); |
@@ -1308,7 +1309,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1308 | } | 1309 | } |
1309 | if (mddev->level != LEVEL_MULTIPATH) { | 1310 | if (mddev->level != LEVEL_MULTIPATH) { |
1310 | int role; | 1311 | int role; |
1311 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | 1312 | if (rdev->desc_nr < 0 || |
1313 | rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { | ||
1314 | role = 0xffff; | ||
1315 | rdev->desc_nr = -1; | ||
1316 | } else | ||
1317 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | ||
1312 | switch(role) { | 1318 | switch(role) { |
1313 | case 0xffff: /* spare */ | 1319 | case 0xffff: /* spare */ |
1314 | break; | 1320 | break; |
@@ -1394,8 +1400,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1394 | if (rdev2->desc_nr+1 > max_dev) | 1400 | if (rdev2->desc_nr+1 > max_dev) |
1395 | max_dev = rdev2->desc_nr+1; | 1401 | max_dev = rdev2->desc_nr+1; |
1396 | 1402 | ||
1397 | if (max_dev > le32_to_cpu(sb->max_dev)) | 1403 | if (max_dev > le32_to_cpu(sb->max_dev)) { |
1404 | int bmask; | ||
1398 | sb->max_dev = cpu_to_le32(max_dev); | 1405 | sb->max_dev = cpu_to_le32(max_dev); |
1406 | rdev->sb_size = max_dev * 2 + 256; | ||
1407 | bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; | ||
1408 | if (rdev->sb_size & bmask) | ||
1409 | rdev->sb_size = (rdev->sb_size | bmask) + 1; | ||
1410 | } | ||
1399 | for (i=0; i<max_dev;i++) | 1411 | for (i=0; i<max_dev;i++) |
1400 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1412 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
1401 | 1413 | ||
@@ -1487,37 +1499,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | |||
1487 | 1499 | ||
1488 | static LIST_HEAD(pending_raid_disks); | 1500 | static LIST_HEAD(pending_raid_disks); |
1489 | 1501 | ||
1490 | static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) | 1502 | /* |
1503 | * Try to register data integrity profile for an mddev | ||
1504 | * | ||
1505 | * This is called when an array is started and after a disk has been kicked | ||
1506 | * from the array. It only succeeds if all working and active component devices | ||
1507 | * are integrity capable with matching profiles. | ||
1508 | */ | ||
1509 | int md_integrity_register(mddev_t *mddev) | ||
1510 | { | ||
1511 | mdk_rdev_t *rdev, *reference = NULL; | ||
1512 | |||
1513 | if (list_empty(&mddev->disks)) | ||
1514 | return 0; /* nothing to do */ | ||
1515 | if (blk_get_integrity(mddev->gendisk)) | ||
1516 | return 0; /* already registered */ | ||
1517 | list_for_each_entry(rdev, &mddev->disks, same_set) { | ||
1518 | /* skip spares and non-functional disks */ | ||
1519 | if (test_bit(Faulty, &rdev->flags)) | ||
1520 | continue; | ||
1521 | if (rdev->raid_disk < 0) | ||
1522 | continue; | ||
1523 | /* | ||
1524 | * If at least one rdev is not integrity capable, we can not | ||
1525 | * enable data integrity for the md device. | ||
1526 | */ | ||
1527 | if (!bdev_get_integrity(rdev->bdev)) | ||
1528 | return -EINVAL; | ||
1529 | if (!reference) { | ||
1530 | /* Use the first rdev as the reference */ | ||
1531 | reference = rdev; | ||
1532 | continue; | ||
1533 | } | ||
1534 | /* does this rdev's profile match the reference profile? */ | ||
1535 | if (blk_integrity_compare(reference->bdev->bd_disk, | ||
1536 | rdev->bdev->bd_disk) < 0) | ||
1537 | return -EINVAL; | ||
1538 | } | ||
1539 | /* | ||
1540 | * All component devices are integrity capable and have matching | ||
1541 | * profiles, register the common profile for the md device. | ||
1542 | */ | ||
1543 | if (blk_integrity_register(mddev->gendisk, | ||
1544 | bdev_get_integrity(reference->bdev)) != 0) { | ||
1545 | printk(KERN_ERR "md: failed to register integrity for %s\n", | ||
1546 | mdname(mddev)); | ||
1547 | return -EINVAL; | ||
1548 | } | ||
1549 | printk(KERN_NOTICE "md: data integrity on %s enabled\n", | ||
1550 | mdname(mddev)); | ||
1551 | return 0; | ||
1552 | } | ||
1553 | EXPORT_SYMBOL(md_integrity_register); | ||
1554 | |||
1555 | /* Disable data integrity if non-capable/non-matching disk is being added */ | ||
1556 | void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev) | ||
1491 | { | 1557 | { |
1492 | struct mdk_personality *pers = mddev->pers; | ||
1493 | struct gendisk *disk = mddev->gendisk; | ||
1494 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); | 1558 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); |
1495 | struct blk_integrity *bi_mddev = blk_get_integrity(disk); | 1559 | struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk); |
1496 | 1560 | ||
1497 | /* Data integrity passthrough not supported on RAID 4, 5 and 6 */ | 1561 | if (!bi_mddev) /* nothing to do */ |
1498 | if (pers && pers->level >= 4 && pers->level <= 6) | ||
1499 | return; | 1562 | return; |
1500 | 1563 | if (rdev->raid_disk < 0) /* skip spares */ | |
1501 | /* If rdev is integrity capable, register profile for mddev */ | ||
1502 | if (!bi_mddev && bi_rdev) { | ||
1503 | if (blk_integrity_register(disk, bi_rdev)) | ||
1504 | printk(KERN_ERR "%s: %s Could not register integrity!\n", | ||
1505 | __func__, disk->disk_name); | ||
1506 | else | ||
1507 | printk(KERN_NOTICE "Enabling data integrity on %s\n", | ||
1508 | disk->disk_name); | ||
1509 | return; | 1564 | return; |
1510 | } | 1565 | if (bi_rdev && blk_integrity_compare(mddev->gendisk, |
1511 | 1566 | rdev->bdev->bd_disk) >= 0) | |
1512 | /* Check that mddev and rdev have matching profiles */ | 1567 | return; |
1513 | if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { | 1568 | printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev)); |
1514 | printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, | 1569 | blk_integrity_unregister(mddev->gendisk); |
1515 | disk->disk_name, rdev->bdev->bd_disk->disk_name); | ||
1516 | printk(KERN_NOTICE "Disabling data integrity on %s\n", | ||
1517 | disk->disk_name); | ||
1518 | blk_integrity_unregister(disk); | ||
1519 | } | ||
1520 | } | 1570 | } |
1571 | EXPORT_SYMBOL(md_integrity_add_rdev); | ||
1521 | 1572 | ||
1522 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | 1573 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) |
1523 | { | 1574 | { |
@@ -1591,7 +1642,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
1591 | /* May as well allow recovery to be retried once */ | 1642 | /* May as well allow recovery to be retried once */ |
1592 | mddev->recovery_disabled = 0; | 1643 | mddev->recovery_disabled = 0; |
1593 | 1644 | ||
1594 | md_integrity_check(rdev, mddev); | ||
1595 | return 0; | 1645 | return 0; |
1596 | 1646 | ||
1597 | fail: | 1647 | fail: |
@@ -1756,9 +1806,10 @@ static void print_sb_1(struct mdp_superblock_1 *sb) | |||
1756 | __u8 *uuid; | 1806 | __u8 *uuid; |
1757 | 1807 | ||
1758 | uuid = sb->set_uuid; | 1808 | uuid = sb->set_uuid; |
1759 | printk(KERN_INFO "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" | 1809 | printk(KERN_INFO |
1760 | ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" | 1810 | "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" |
1761 | KERN_INFO "md: Name: \"%s\" CT:%llu\n", | 1811 | ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" |
1812 | "md: Name: \"%s\" CT:%llu\n", | ||
1762 | le32_to_cpu(sb->major_version), | 1813 | le32_to_cpu(sb->major_version), |
1763 | le32_to_cpu(sb->feature_map), | 1814 | le32_to_cpu(sb->feature_map), |
1764 | uuid[0], uuid[1], uuid[2], uuid[3], | 1815 | uuid[0], uuid[1], uuid[2], uuid[3], |
@@ -1770,12 +1821,13 @@ static void print_sb_1(struct mdp_superblock_1 *sb) | |||
1770 | & MD_SUPERBLOCK_1_TIME_SEC_MASK); | 1821 | & MD_SUPERBLOCK_1_TIME_SEC_MASK); |
1771 | 1822 | ||
1772 | uuid = sb->device_uuid; | 1823 | uuid = sb->device_uuid; |
1773 | printk(KERN_INFO "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" | 1824 | printk(KERN_INFO |
1825 | "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" | ||
1774 | " RO:%llu\n" | 1826 | " RO:%llu\n" |
1775 | KERN_INFO "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" | 1827 | "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" |
1776 | ":%02x%02x%02x%02x%02x%02x\n" | 1828 | ":%02x%02x%02x%02x%02x%02x\n" |
1777 | KERN_INFO "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" | 1829 | "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" |
1778 | KERN_INFO "md: (MaxDev:%u) \n", | 1830 | "md: (MaxDev:%u) \n", |
1779 | le32_to_cpu(sb->level), | 1831 | le32_to_cpu(sb->level), |
1780 | (unsigned long long)le64_to_cpu(sb->size), | 1832 | (unsigned long long)le64_to_cpu(sb->size), |
1781 | le32_to_cpu(sb->raid_disks), | 1833 | le32_to_cpu(sb->raid_disks), |
@@ -1923,17 +1975,14 @@ repeat: | |||
1923 | /* otherwise we have to go forward and ... */ | 1975 | /* otherwise we have to go forward and ... */ |
1924 | mddev->events ++; | 1976 | mddev->events ++; |
1925 | if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ | 1977 | if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ |
1926 | /* .. if the array isn't clean, insist on an odd 'events' */ | 1978 | /* .. if the array isn't clean, an 'even' event must also go |
1927 | if ((mddev->events&1)==0) { | 1979 | * to spares. */ |
1928 | mddev->events++; | 1980 | if ((mddev->events&1)==0) |
1929 | nospares = 0; | 1981 | nospares = 0; |
1930 | } | ||
1931 | } else { | 1982 | } else { |
1932 | /* otherwise insist on an even 'events' (for clean states) */ | 1983 | /* otherwise an 'odd' event must go to spares */ |
1933 | if ((mddev->events&1)) { | 1984 | if ((mddev->events&1)) |
1934 | mddev->events++; | ||
1935 | nospares = 0; | 1985 | nospares = 0; |
1936 | } | ||
1937 | } | 1986 | } |
1938 | } | 1987 | } |
1939 | 1988 | ||
@@ -2655,6 +2704,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
2655 | ssize_t rv = len; | 2704 | ssize_t rv = len; |
2656 | struct mdk_personality *pers; | 2705 | struct mdk_personality *pers; |
2657 | void *priv; | 2706 | void *priv; |
2707 | mdk_rdev_t *rdev; | ||
2658 | 2708 | ||
2659 | if (mddev->pers == NULL) { | 2709 | if (mddev->pers == NULL) { |
2660 | if (len == 0) | 2710 | if (len == 0) |
@@ -2734,6 +2784,12 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
2734 | mddev_suspend(mddev); | 2784 | mddev_suspend(mddev); |
2735 | mddev->pers->stop(mddev); | 2785 | mddev->pers->stop(mddev); |
2736 | module_put(mddev->pers->owner); | 2786 | module_put(mddev->pers->owner); |
2787 | /* Invalidate devices that are now superfluous */ | ||
2788 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
2789 | if (rdev->raid_disk >= mddev->raid_disks) { | ||
2790 | rdev->raid_disk = -1; | ||
2791 | clear_bit(In_sync, &rdev->flags); | ||
2792 | } | ||
2737 | mddev->pers = pers; | 2793 | mddev->pers = pers; |
2738 | mddev->private = priv; | 2794 | mddev->private = priv; |
2739 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 2795 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
@@ -3543,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) | |||
3543 | if (max < mddev->resync_min) | 3599 | if (max < mddev->resync_min) |
3544 | return -EINVAL; | 3600 | return -EINVAL; |
3545 | if (max < mddev->resync_max && | 3601 | if (max < mddev->resync_max && |
3602 | mddev->ro == 0 && | ||
3546 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 3603 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
3547 | return -EBUSY; | 3604 | return -EBUSY; |
3548 | 3605 | ||
@@ -3573,7 +3630,8 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len) | |||
3573 | char *e; | 3630 | char *e; |
3574 | unsigned long long new = simple_strtoull(buf, &e, 10); | 3631 | unsigned long long new = simple_strtoull(buf, &e, 10); |
3575 | 3632 | ||
3576 | if (mddev->pers->quiesce == NULL) | 3633 | if (mddev->pers == NULL || |
3634 | mddev->pers->quiesce == NULL) | ||
3577 | return -EINVAL; | 3635 | return -EINVAL; |
3578 | if (buf == e || (*e && *e != '\n')) | 3636 | if (buf == e || (*e && *e != '\n')) |
3579 | return -EINVAL; | 3637 | return -EINVAL; |
@@ -3601,7 +3659,8 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len) | |||
3601 | char *e; | 3659 | char *e; |
3602 | unsigned long long new = simple_strtoull(buf, &e, 10); | 3660 | unsigned long long new = simple_strtoull(buf, &e, 10); |
3603 | 3661 | ||
3604 | if (mddev->pers->quiesce == NULL) | 3662 | if (mddev->pers == NULL || |
3663 | mddev->pers->quiesce == NULL) | ||
3605 | return -EINVAL; | 3664 | return -EINVAL; |
3606 | if (buf == e || (*e && *e != '\n')) | 3665 | if (buf == e || (*e && *e != '\n')) |
3607 | return -EINVAL; | 3666 | return -EINVAL; |
@@ -3681,17 +3740,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len) | |||
3681 | 3740 | ||
3682 | mddev->array_sectors = sectors; | 3741 | mddev->array_sectors = sectors; |
3683 | set_capacity(mddev->gendisk, mddev->array_sectors); | 3742 | set_capacity(mddev->gendisk, mddev->array_sectors); |
3684 | if (mddev->pers) { | 3743 | if (mddev->pers) |
3685 | struct block_device *bdev = bdget_disk(mddev->gendisk, 0); | 3744 | revalidate_disk(mddev->gendisk); |
3686 | |||
3687 | if (bdev) { | ||
3688 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
3689 | i_size_write(bdev->bd_inode, | ||
3690 | (loff_t)mddev->array_sectors << 9); | ||
3691 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
3692 | bdput(bdev); | ||
3693 | } | ||
3694 | } | ||
3695 | 3745 | ||
3696 | return len; | 3746 | return len; |
3697 | } | 3747 | } |
@@ -3844,11 +3894,9 @@ static int md_alloc(dev_t dev, char *name) | |||
3844 | flush_scheduled_work(); | 3894 | flush_scheduled_work(); |
3845 | 3895 | ||
3846 | mutex_lock(&disks_mutex); | 3896 | mutex_lock(&disks_mutex); |
3847 | if (mddev->gendisk) { | 3897 | error = -EEXIST; |
3848 | mutex_unlock(&disks_mutex); | 3898 | if (mddev->gendisk) |
3849 | mddev_put(mddev); | 3899 | goto abort; |
3850 | return -EEXIST; | ||
3851 | } | ||
3852 | 3900 | ||
3853 | if (name) { | 3901 | if (name) { |
3854 | /* Need to ensure that 'name' is not a duplicate. | 3902 | /* Need to ensure that 'name' is not a duplicate. |
@@ -3860,17 +3908,15 @@ static int md_alloc(dev_t dev, char *name) | |||
3860 | if (mddev2->gendisk && | 3908 | if (mddev2->gendisk && |
3861 | strcmp(mddev2->gendisk->disk_name, name) == 0) { | 3909 | strcmp(mddev2->gendisk->disk_name, name) == 0) { |
3862 | spin_unlock(&all_mddevs_lock); | 3910 | spin_unlock(&all_mddevs_lock); |
3863 | return -EEXIST; | 3911 | goto abort; |
3864 | } | 3912 | } |
3865 | spin_unlock(&all_mddevs_lock); | 3913 | spin_unlock(&all_mddevs_lock); |
3866 | } | 3914 | } |
3867 | 3915 | ||
3916 | error = -ENOMEM; | ||
3868 | mddev->queue = blk_alloc_queue(GFP_KERNEL); | 3917 | mddev->queue = blk_alloc_queue(GFP_KERNEL); |
3869 | if (!mddev->queue) { | 3918 | if (!mddev->queue) |
3870 | mutex_unlock(&disks_mutex); | 3919 | goto abort; |
3871 | mddev_put(mddev); | ||
3872 | return -ENOMEM; | ||
3873 | } | ||
3874 | mddev->queue->queuedata = mddev; | 3920 | mddev->queue->queuedata = mddev; |
3875 | 3921 | ||
3876 | /* Can be unlocked because the queue is new: no concurrency */ | 3922 | /* Can be unlocked because the queue is new: no concurrency */ |
@@ -3880,11 +3926,9 @@ static int md_alloc(dev_t dev, char *name) | |||
3880 | 3926 | ||
3881 | disk = alloc_disk(1 << shift); | 3927 | disk = alloc_disk(1 << shift); |
3882 | if (!disk) { | 3928 | if (!disk) { |
3883 | mutex_unlock(&disks_mutex); | ||
3884 | blk_cleanup_queue(mddev->queue); | 3929 | blk_cleanup_queue(mddev->queue); |
3885 | mddev->queue = NULL; | 3930 | mddev->queue = NULL; |
3886 | mddev_put(mddev); | 3931 | goto abort; |
3887 | return -ENOMEM; | ||
3888 | } | 3932 | } |
3889 | disk->major = MAJOR(mddev->unit); | 3933 | disk->major = MAJOR(mddev->unit); |
3890 | disk->first_minor = unit << shift; | 3934 | disk->first_minor = unit << shift; |
@@ -3906,16 +3950,22 @@ static int md_alloc(dev_t dev, char *name) | |||
3906 | mddev->gendisk = disk; | 3950 | mddev->gendisk = disk; |
3907 | error = kobject_init_and_add(&mddev->kobj, &md_ktype, | 3951 | error = kobject_init_and_add(&mddev->kobj, &md_ktype, |
3908 | &disk_to_dev(disk)->kobj, "%s", "md"); | 3952 | &disk_to_dev(disk)->kobj, "%s", "md"); |
3909 | mutex_unlock(&disks_mutex); | 3953 | if (error) { |
3910 | if (error) | 3954 | /* This isn't possible, but as kobject_init_and_add is marked |
3955 | * __must_check, we must do something with the result | ||
3956 | */ | ||
3911 | printk(KERN_WARNING "md: cannot register %s/md - name in use\n", | 3957 | printk(KERN_WARNING "md: cannot register %s/md - name in use\n", |
3912 | disk->disk_name); | 3958 | disk->disk_name); |
3913 | else { | 3959 | error = 0; |
3960 | } | ||
3961 | abort: | ||
3962 | mutex_unlock(&disks_mutex); | ||
3963 | if (!error) { | ||
3914 | kobject_uevent(&mddev->kobj, KOBJ_ADD); | 3964 | kobject_uevent(&mddev->kobj, KOBJ_ADD); |
3915 | mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); | 3965 | mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); |
3916 | } | 3966 | } |
3917 | mddev_put(mddev); | 3967 | mddev_put(mddev); |
3918 | return 0; | 3968 | return error; |
3919 | } | 3969 | } |
3920 | 3970 | ||
3921 | static struct kobject *md_probe(dev_t dev, int *part, void *data) | 3971 | static struct kobject *md_probe(dev_t dev, int *part, void *data) |
@@ -4044,10 +4094,6 @@ static int do_md_run(mddev_t * mddev) | |||
4044 | } | 4094 | } |
4045 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 4095 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
4046 | 4096 | ||
4047 | if (pers->level >= 4 && pers->level <= 6) | ||
4048 | /* Cannot support integrity (yet) */ | ||
4049 | blk_integrity_unregister(mddev->gendisk); | ||
4050 | |||
4051 | if (mddev->reshape_position != MaxSector && | 4097 | if (mddev->reshape_position != MaxSector && |
4052 | pers->start_reshape == NULL) { | 4098 | pers->start_reshape == NULL) { |
4053 | /* This personality cannot handle reshaping... */ | 4099 | /* This personality cannot handle reshaping... */ |
@@ -4185,6 +4231,7 @@ static int do_md_run(mddev_t * mddev) | |||
4185 | md_wakeup_thread(mddev->thread); | 4231 | md_wakeup_thread(mddev->thread); |
4186 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ | 4232 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ |
4187 | 4233 | ||
4234 | revalidate_disk(mddev->gendisk); | ||
4188 | mddev->changed = 1; | 4235 | mddev->changed = 1; |
4189 | md_new_event(mddev); | 4236 | md_new_event(mddev); |
4190 | sysfs_notify_dirent(mddev->sysfs_state); | 4237 | sysfs_notify_dirent(mddev->sysfs_state); |
@@ -4256,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4256 | struct gendisk *disk = mddev->gendisk; | 4303 | struct gendisk *disk = mddev->gendisk; |
4257 | mdk_rdev_t *rdev; | 4304 | mdk_rdev_t *rdev; |
4258 | 4305 | ||
4306 | mutex_lock(&mddev->open_mutex); | ||
4259 | if (atomic_read(&mddev->openers) > is_open) { | 4307 | if (atomic_read(&mddev->openers) > is_open) { |
4260 | printk("md: %s still in use.\n",mdname(mddev)); | 4308 | printk("md: %s still in use.\n",mdname(mddev)); |
4261 | return -EBUSY; | 4309 | err = -EBUSY; |
4262 | } | 4310 | } else if (mddev->pers) { |
4263 | |||
4264 | if (mddev->pers) { | ||
4265 | 4311 | ||
4266 | if (mddev->sync_thread) { | 4312 | if (mddev->sync_thread) { |
4267 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4313 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
@@ -4318,8 +4364,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4318 | if (mode == 1) | 4364 | if (mode == 1) |
4319 | set_disk_ro(disk, 1); | 4365 | set_disk_ro(disk, 1); |
4320 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4366 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
4367 | err = 0; | ||
4321 | } | 4368 | } |
4322 | 4369 | out: | |
4370 | mutex_unlock(&mddev->open_mutex); | ||
4371 | if (err) | ||
4372 | return err; | ||
4323 | /* | 4373 | /* |
4324 | * Free resources if final stop | 4374 | * Free resources if final stop |
4325 | */ | 4375 | */ |
@@ -4385,7 +4435,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4385 | blk_integrity_unregister(disk); | 4435 | blk_integrity_unregister(disk); |
4386 | md_new_event(mddev); | 4436 | md_new_event(mddev); |
4387 | sysfs_notify_dirent(mddev->sysfs_state); | 4437 | sysfs_notify_dirent(mddev->sysfs_state); |
4388 | out: | ||
4389 | return err; | 4438 | return err; |
4390 | } | 4439 | } |
4391 | 4440 | ||
@@ -5083,18 +5132,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) | |||
5083 | return -ENOSPC; | 5132 | return -ENOSPC; |
5084 | } | 5133 | } |
5085 | rv = mddev->pers->resize(mddev, num_sectors); | 5134 | rv = mddev->pers->resize(mddev, num_sectors); |
5086 | if (!rv) { | 5135 | if (!rv) |
5087 | struct block_device *bdev; | 5136 | revalidate_disk(mddev->gendisk); |
5088 | |||
5089 | bdev = bdget_disk(mddev->gendisk, 0); | ||
5090 | if (bdev) { | ||
5091 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
5092 | i_size_write(bdev->bd_inode, | ||
5093 | (loff_t)mddev->array_sectors << 9); | ||
5094 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
5095 | bdput(bdev); | ||
5096 | } | ||
5097 | } | ||
5098 | return rv; | 5137 | return rv; |
5099 | } | 5138 | } |
5100 | 5139 | ||
@@ -5480,12 +5519,12 @@ static int md_open(struct block_device *bdev, fmode_t mode) | |||
5480 | } | 5519 | } |
5481 | BUG_ON(mddev != bdev->bd_disk->private_data); | 5520 | BUG_ON(mddev != bdev->bd_disk->private_data); |
5482 | 5521 | ||
5483 | if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) | 5522 | if ((err = mutex_lock_interruptible(&mddev->open_mutex))) |
5484 | goto out; | 5523 | goto out; |
5485 | 5524 | ||
5486 | err = 0; | 5525 | err = 0; |
5487 | atomic_inc(&mddev->openers); | 5526 | atomic_inc(&mddev->openers); |
5488 | mddev_unlock(mddev); | 5527 | mutex_unlock(&mddev->open_mutex); |
5489 | 5528 | ||
5490 | check_disk_change(bdev); | 5529 | check_disk_change(bdev); |
5491 | out: | 5530 | out: |
@@ -6334,10 +6373,16 @@ void md_do_sync(mddev_t *mddev) | |||
6334 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | 6373 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
6335 | } | 6374 | } |
6336 | 6375 | ||
6337 | if (j >= mddev->resync_max) | 6376 | while (j >= mddev->resync_max && !kthread_should_stop()) { |
6338 | wait_event(mddev->recovery_wait, | 6377 | /* As this condition is controlled by user-space, |
6339 | mddev->resync_max > j | 6378 | * we can block indefinitely, so use '_interruptible' |
6340 | || kthread_should_stop()); | 6379 | * to avoid triggering warnings. |
6380 | */ | ||
6381 | flush_signals(current); /* just in case */ | ||
6382 | wait_event_interruptible(mddev->recovery_wait, | ||
6383 | mddev->resync_max > j | ||
6384 | || kthread_should_stop()); | ||
6385 | } | ||
6341 | 6386 | ||
6342 | if (kthread_should_stop()) | 6387 | if (kthread_should_stop()) |
6343 | goto interrupted; | 6388 | goto interrupted; |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 9430a110db93..f8fc188bc762 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -223,6 +223,16 @@ struct mddev_s | |||
223 | * so we don't loop trying */ | 223 | * so we don't loop trying */ |
224 | 224 | ||
225 | int in_sync; /* know to not need resync */ | 225 | int in_sync; /* know to not need resync */ |
226 | /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so | ||
227 | * that we are never stopping an array while it is open. | ||
228 | * 'reconfig_mutex' protects all other reconfiguration. | ||
229 | * These locks are separate due to conflicting interactions | ||
230 | * with bdev->bd_mutex. | ||
231 | * Lock ordering is: | ||
232 | * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk | ||
233 | * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open | ||
234 | */ | ||
235 | struct mutex open_mutex; | ||
226 | struct mutex reconfig_mutex; | 236 | struct mutex reconfig_mutex; |
227 | atomic_t active; /* general refcount */ | 237 | atomic_t active; /* general refcount */ |
228 | atomic_t openers; /* number of active opens */ | 238 | atomic_t openers; /* number of active opens */ |
@@ -431,5 +441,7 @@ extern int md_allow_write(mddev_t *mddev); | |||
431 | extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | 441 | extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); |
432 | extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); | 442 | extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); |
433 | extern int md_check_no_bitmap(mddev_t *mddev); | 443 | extern int md_check_no_bitmap(mddev_t *mddev); |
444 | extern int md_integrity_register(mddev_t *mddev); | ||
445 | void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | ||
434 | 446 | ||
435 | #endif /* _MD_MD_H */ | 447 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index cbe368fa6598..7140909f6662 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
@@ -294,7 +294,8 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
294 | for (path = first; path <= last; path++) | 294 | for (path = first; path <= last; path++) |
295 | if ((p=conf->multipaths+path)->rdev == NULL) { | 295 | if ((p=conf->multipaths+path)->rdev == NULL) { |
296 | q = rdev->bdev->bd_disk->queue; | 296 | q = rdev->bdev->bd_disk->queue; |
297 | blk_queue_stack_limits(mddev->queue, q); | 297 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
298 | rdev->data_offset << 9); | ||
298 | 299 | ||
299 | /* as we don't honour merge_bvec_fn, we must never risk | 300 | /* as we don't honour merge_bvec_fn, we must never risk |
300 | * violating it, so limit ->max_sector to one PAGE, as | 301 | * violating it, so limit ->max_sector to one PAGE, as |
@@ -312,6 +313,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
312 | set_bit(In_sync, &rdev->flags); | 313 | set_bit(In_sync, &rdev->flags); |
313 | rcu_assign_pointer(p->rdev, rdev); | 314 | rcu_assign_pointer(p->rdev, rdev); |
314 | err = 0; | 315 | err = 0; |
316 | md_integrity_add_rdev(rdev, mddev); | ||
315 | break; | 317 | break; |
316 | } | 318 | } |
317 | 319 | ||
@@ -344,7 +346,9 @@ static int multipath_remove_disk(mddev_t *mddev, int number) | |||
344 | /* lost the race, try later */ | 346 | /* lost the race, try later */ |
345 | err = -EBUSY; | 347 | err = -EBUSY; |
346 | p->rdev = rdev; | 348 | p->rdev = rdev; |
349 | goto abort; | ||
347 | } | 350 | } |
351 | md_integrity_register(mddev); | ||
348 | } | 352 | } |
349 | abort: | 353 | abort: |
350 | 354 | ||
@@ -463,9 +467,9 @@ static int multipath_run (mddev_t *mddev) | |||
463 | 467 | ||
464 | disk = conf->multipaths + disk_idx; | 468 | disk = conf->multipaths + disk_idx; |
465 | disk->rdev = rdev; | 469 | disk->rdev = rdev; |
470 | disk_stack_limits(mddev->gendisk, rdev->bdev, | ||
471 | rdev->data_offset << 9); | ||
466 | 472 | ||
467 | blk_queue_stack_limits(mddev->queue, | ||
468 | rdev->bdev->bd_disk->queue); | ||
469 | /* as we don't honour merge_bvec_fn, we must never risk | 473 | /* as we don't honour merge_bvec_fn, we must never risk |
470 | * violating it, not that we ever expect a device with | 474 | * violating it, not that we ever expect a device with |
471 | * a merge_bvec_fn to be involved in multipath */ | 475 | * a merge_bvec_fn to be involved in multipath */ |
@@ -518,7 +522,7 @@ static int multipath_run (mddev_t *mddev) | |||
518 | mddev->queue->unplug_fn = multipath_unplug; | 522 | mddev->queue->unplug_fn = multipath_unplug; |
519 | mddev->queue->backing_dev_info.congested_fn = multipath_congested; | 523 | mddev->queue->backing_dev_info.congested_fn = multipath_congested; |
520 | mddev->queue->backing_dev_info.congested_data = mddev; | 524 | mddev->queue->backing_dev_info.congested_data = mddev; |
521 | 525 | md_integrity_register(mddev); | |
522 | return 0; | 526 | return 0; |
523 | 527 | ||
524 | out_free_conf: | 528 | out_free_conf: |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index ab4a489d8695..898e2bdfee47 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -170,8 +170,8 @@ static int create_strip_zones(mddev_t *mddev) | |||
170 | } | 170 | } |
171 | dev[j] = rdev1; | 171 | dev[j] = rdev1; |
172 | 172 | ||
173 | blk_queue_stack_limits(mddev->queue, | 173 | disk_stack_limits(mddev->gendisk, rdev1->bdev, |
174 | rdev1->bdev->bd_disk->queue); | 174 | rdev1->data_offset << 9); |
175 | /* as we don't honour merge_bvec_fn, we must never risk | 175 | /* as we don't honour merge_bvec_fn, we must never risk |
176 | * violating it, so limit ->max_sector to one PAGE, as | 176 | * violating it, so limit ->max_sector to one PAGE, as |
177 | * a one page request is never in violation. | 177 | * a one page request is never in violation. |
@@ -250,6 +250,11 @@ static int create_strip_zones(mddev_t *mddev) | |||
250 | mddev->chunk_sectors << 9); | 250 | mddev->chunk_sectors << 9); |
251 | goto abort; | 251 | goto abort; |
252 | } | 252 | } |
253 | |||
254 | blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); | ||
255 | blk_queue_io_opt(mddev->queue, | ||
256 | (mddev->chunk_sectors << 9) * mddev->raid_disks); | ||
257 | |||
253 | printk(KERN_INFO "raid0: done.\n"); | 258 | printk(KERN_INFO "raid0: done.\n"); |
254 | mddev->private = conf; | 259 | mddev->private = conf; |
255 | return 0; | 260 | return 0; |
@@ -346,6 +351,7 @@ static int raid0_run(mddev_t *mddev) | |||
346 | 351 | ||
347 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); | 352 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); |
348 | dump_zones(mddev); | 353 | dump_zones(mddev); |
354 | md_integrity_register(mddev); | ||
349 | return 0; | 355 | return 0; |
350 | } | 356 | } |
351 | 357 | ||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 89939a7aef57..8726fd7ebce5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -1123,8 +1123,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1123 | for (mirror = first; mirror <= last; mirror++) | 1123 | for (mirror = first; mirror <= last; mirror++) |
1124 | if ( !(p=conf->mirrors+mirror)->rdev) { | 1124 | if ( !(p=conf->mirrors+mirror)->rdev) { |
1125 | 1125 | ||
1126 | blk_queue_stack_limits(mddev->queue, | 1126 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
1127 | rdev->bdev->bd_disk->queue); | 1127 | rdev->data_offset << 9); |
1128 | /* as we don't honour merge_bvec_fn, we must never risk | 1128 | /* as we don't honour merge_bvec_fn, we must never risk |
1129 | * violating it, so limit ->max_sector to one PAGE, as | 1129 | * violating it, so limit ->max_sector to one PAGE, as |
1130 | * a one page request is never in violation. | 1130 | * a one page request is never in violation. |
@@ -1144,7 +1144,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1144 | rcu_assign_pointer(p->rdev, rdev); | 1144 | rcu_assign_pointer(p->rdev, rdev); |
1145 | break; | 1145 | break; |
1146 | } | 1146 | } |
1147 | 1147 | md_integrity_add_rdev(rdev, mddev); | |
1148 | print_conf(conf); | 1148 | print_conf(conf); |
1149 | return err; | 1149 | return err; |
1150 | } | 1150 | } |
@@ -1178,7 +1178,9 @@ static int raid1_remove_disk(mddev_t *mddev, int number) | |||
1178 | /* lost the race, try later */ | 1178 | /* lost the race, try later */ |
1179 | err = -EBUSY; | 1179 | err = -EBUSY; |
1180 | p->rdev = rdev; | 1180 | p->rdev = rdev; |
1181 | goto abort; | ||
1181 | } | 1182 | } |
1183 | md_integrity_register(mddev); | ||
1182 | } | 1184 | } |
1183 | abort: | 1185 | abort: |
1184 | 1186 | ||
@@ -1988,9 +1990,8 @@ static int run(mddev_t *mddev) | |||
1988 | disk = conf->mirrors + disk_idx; | 1990 | disk = conf->mirrors + disk_idx; |
1989 | 1991 | ||
1990 | disk->rdev = rdev; | 1992 | disk->rdev = rdev; |
1991 | 1993 | disk_stack_limits(mddev->gendisk, rdev->bdev, | |
1992 | blk_queue_stack_limits(mddev->queue, | 1994 | rdev->data_offset << 9); |
1993 | rdev->bdev->bd_disk->queue); | ||
1994 | /* as we don't honour merge_bvec_fn, we must never risk | 1995 | /* as we don't honour merge_bvec_fn, we must never risk |
1995 | * violating it, so limit ->max_sector to one PAGE, as | 1996 | * violating it, so limit ->max_sector to one PAGE, as |
1996 | * a one page request is never in violation. | 1997 | * a one page request is never in violation. |
@@ -2068,7 +2069,7 @@ static int run(mddev_t *mddev) | |||
2068 | mddev->queue->unplug_fn = raid1_unplug; | 2069 | mddev->queue->unplug_fn = raid1_unplug; |
2069 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; | 2070 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; |
2070 | mddev->queue->backing_dev_info.congested_data = mddev; | 2071 | mddev->queue->backing_dev_info.congested_data = mddev; |
2071 | 2072 | md_integrity_register(mddev); | |
2072 | return 0; | 2073 | return 0; |
2073 | 2074 | ||
2074 | out_no_mem: | 2075 | out_no_mem: |
@@ -2133,6 +2134,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) | |||
2133 | return -EINVAL; | 2134 | return -EINVAL; |
2134 | set_capacity(mddev->gendisk, mddev->array_sectors); | 2135 | set_capacity(mddev->gendisk, mddev->array_sectors); |
2135 | mddev->changed = 1; | 2136 | mddev->changed = 1; |
2137 | revalidate_disk(mddev->gendisk); | ||
2136 | if (sectors > mddev->dev_sectors && | 2138 | if (sectors > mddev->dev_sectors && |
2137 | mddev->recovery_cp == MaxSector) { | 2139 | mddev->recovery_cp == MaxSector) { |
2138 | mddev->recovery_cp = mddev->dev_sectors; | 2140 | mddev->recovery_cp = mddev->dev_sectors; |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ae12ceafe10c..3d9020cf6f6e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1151,8 +1151,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1151 | for ( ; mirror <= last ; mirror++) | 1151 | for ( ; mirror <= last ; mirror++) |
1152 | if ( !(p=conf->mirrors+mirror)->rdev) { | 1152 | if ( !(p=conf->mirrors+mirror)->rdev) { |
1153 | 1153 | ||
1154 | blk_queue_stack_limits(mddev->queue, | 1154 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
1155 | rdev->bdev->bd_disk->queue); | 1155 | rdev->data_offset << 9); |
1156 | /* as we don't honour merge_bvec_fn, we must never risk | 1156 | /* as we don't honour merge_bvec_fn, we must never risk |
1157 | * violating it, so limit ->max_sector to one PAGE, as | 1157 | * violating it, so limit ->max_sector to one PAGE, as |
1158 | * a one page request is never in violation. | 1158 | * a one page request is never in violation. |
@@ -1170,6 +1170,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1170 | break; | 1170 | break; |
1171 | } | 1171 | } |
1172 | 1172 | ||
1173 | md_integrity_add_rdev(rdev, mddev); | ||
1173 | print_conf(conf); | 1174 | print_conf(conf); |
1174 | return err; | 1175 | return err; |
1175 | } | 1176 | } |
@@ -1203,7 +1204,9 @@ static int raid10_remove_disk(mddev_t *mddev, int number) | |||
1203 | /* lost the race, try later */ | 1204 | /* lost the race, try later */ |
1204 | err = -EBUSY; | 1205 | err = -EBUSY; |
1205 | p->rdev = rdev; | 1206 | p->rdev = rdev; |
1207 | goto abort; | ||
1206 | } | 1208 | } |
1209 | md_integrity_register(mddev); | ||
1207 | } | 1210 | } |
1208 | abort: | 1211 | abort: |
1209 | 1212 | ||
@@ -2044,7 +2047,7 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
2044 | static int run(mddev_t *mddev) | 2047 | static int run(mddev_t *mddev) |
2045 | { | 2048 | { |
2046 | conf_t *conf; | 2049 | conf_t *conf; |
2047 | int i, disk_idx; | 2050 | int i, disk_idx, chunk_size; |
2048 | mirror_info_t *disk; | 2051 | mirror_info_t *disk; |
2049 | mdk_rdev_t *rdev; | 2052 | mdk_rdev_t *rdev; |
2050 | int nc, fc, fo; | 2053 | int nc, fc, fo; |
@@ -2130,6 +2133,14 @@ static int run(mddev_t *mddev) | |||
2130 | spin_lock_init(&conf->device_lock); | 2133 | spin_lock_init(&conf->device_lock); |
2131 | mddev->queue->queue_lock = &conf->device_lock; | 2134 | mddev->queue->queue_lock = &conf->device_lock; |
2132 | 2135 | ||
2136 | chunk_size = mddev->chunk_sectors << 9; | ||
2137 | blk_queue_io_min(mddev->queue, chunk_size); | ||
2138 | if (conf->raid_disks % conf->near_copies) | ||
2139 | blk_queue_io_opt(mddev->queue, chunk_size * conf->raid_disks); | ||
2140 | else | ||
2141 | blk_queue_io_opt(mddev->queue, chunk_size * | ||
2142 | (conf->raid_disks / conf->near_copies)); | ||
2143 | |||
2133 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2144 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
2134 | disk_idx = rdev->raid_disk; | 2145 | disk_idx = rdev->raid_disk; |
2135 | if (disk_idx >= mddev->raid_disks | 2146 | if (disk_idx >= mddev->raid_disks |
@@ -2138,9 +2149,8 @@ static int run(mddev_t *mddev) | |||
2138 | disk = conf->mirrors + disk_idx; | 2149 | disk = conf->mirrors + disk_idx; |
2139 | 2150 | ||
2140 | disk->rdev = rdev; | 2151 | disk->rdev = rdev; |
2141 | 2152 | disk_stack_limits(mddev->gendisk, rdev->bdev, | |
2142 | blk_queue_stack_limits(mddev->queue, | 2153 | rdev->data_offset << 9); |
2143 | rdev->bdev->bd_disk->queue); | ||
2144 | /* as we don't honour merge_bvec_fn, we must never risk | 2154 | /* as we don't honour merge_bvec_fn, we must never risk |
2145 | * violating it, so limit ->max_sector to one PAGE, as | 2155 | * violating it, so limit ->max_sector to one PAGE, as |
2146 | * a one page request is never in violation. | 2156 | * a one page request is never in violation. |
@@ -2218,6 +2228,7 @@ static int run(mddev_t *mddev) | |||
2218 | 2228 | ||
2219 | if (conf->near_copies < mddev->raid_disks) | 2229 | if (conf->near_copies < mddev->raid_disks) |
2220 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | 2230 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); |
2231 | md_integrity_register(mddev); | ||
2221 | return 0; | 2232 | return 0; |
2222 | 2233 | ||
2223 | out_free_conf: | 2234 | out_free_conf: |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index cac6f4d3a143..9b00a229015a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3911,13 +3911,21 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3911 | goto retry; | 3911 | goto retry; |
3912 | } | 3912 | } |
3913 | } | 3913 | } |
3914 | /* FIXME what if we get a false positive because these | 3914 | |
3915 | * are being updated. | 3915 | if (bio_data_dir(bi) == WRITE && |
3916 | */ | 3916 | logical_sector >= mddev->suspend_lo && |
3917 | if (logical_sector >= mddev->suspend_lo && | ||
3918 | logical_sector < mddev->suspend_hi) { | 3917 | logical_sector < mddev->suspend_hi) { |
3919 | release_stripe(sh); | 3918 | release_stripe(sh); |
3920 | schedule(); | 3919 | /* As the suspend_* range is controlled by |
3920 | * userspace, we want an interruptible | ||
3921 | * wait. | ||
3922 | */ | ||
3923 | flush_signals(current); | ||
3924 | prepare_to_wait(&conf->wait_for_overlap, | ||
3925 | &w, TASK_INTERRUPTIBLE); | ||
3926 | if (logical_sector >= mddev->suspend_lo && | ||
3927 | logical_sector < mddev->suspend_hi) | ||
3928 | schedule(); | ||
3921 | goto retry; | 3929 | goto retry; |
3922 | } | 3930 | } |
3923 | 3931 | ||
@@ -3989,7 +3997,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3989 | conf->reshape_progress < raid5_size(mddev, 0, 0)) { | 3997 | conf->reshape_progress < raid5_size(mddev, 0, 0)) { |
3990 | sector_nr = raid5_size(mddev, 0, 0) | 3998 | sector_nr = raid5_size(mddev, 0, 0) |
3991 | - conf->reshape_progress; | 3999 | - conf->reshape_progress; |
3992 | } else if (mddev->delta_disks > 0 && | 4000 | } else if (mddev->delta_disks >= 0 && |
3993 | conf->reshape_progress > 0) | 4001 | conf->reshape_progress > 0) |
3994 | sector_nr = conf->reshape_progress; | 4002 | sector_nr = conf->reshape_progress; |
3995 | sector_div(sector_nr, new_data_disks); | 4003 | sector_div(sector_nr, new_data_disks); |
@@ -4203,6 +4211,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
4203 | return 0; | 4211 | return 0; |
4204 | } | 4212 | } |
4205 | 4213 | ||
4214 | /* Allow raid5_quiesce to complete */ | ||
4215 | wait_event(conf->wait_for_overlap, conf->quiesce != 2); | ||
4216 | |||
4206 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 4217 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
4207 | return reshape_request(mddev, sector_nr, skipped); | 4218 | return reshape_request(mddev, sector_nr, skipped); |
4208 | 4219 | ||
@@ -4803,7 +4814,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
4803 | static int run(mddev_t *mddev) | 4814 | static int run(mddev_t *mddev) |
4804 | { | 4815 | { |
4805 | raid5_conf_t *conf; | 4816 | raid5_conf_t *conf; |
4806 | int working_disks = 0; | 4817 | int working_disks = 0, chunk_size; |
4807 | mdk_rdev_t *rdev; | 4818 | mdk_rdev_t *rdev; |
4808 | 4819 | ||
4809 | if (mddev->recovery_cp != MaxSector) | 4820 | if (mddev->recovery_cp != MaxSector) |
@@ -4844,7 +4855,26 @@ static int run(mddev_t *mddev) | |||
4844 | (old_disks-max_degraded)); | 4855 | (old_disks-max_degraded)); |
4845 | /* here_old is the first stripe that we might need to read | 4856 | /* here_old is the first stripe that we might need to read |
4846 | * from */ | 4857 | * from */ |
4847 | if (here_new >= here_old) { | 4858 | if (mddev->delta_disks == 0) { |
4859 | /* We cannot be sure it is safe to start an in-place | ||
4860 | * reshape. It is only safe if user-space if monitoring | ||
4861 | * and taking constant backups. | ||
4862 | * mdadm always starts a situation like this in | ||
4863 | * readonly mode so it can take control before | ||
4864 | * allowing any writes. So just check for that. | ||
4865 | */ | ||
4866 | if ((here_new * mddev->new_chunk_sectors != | ||
4867 | here_old * mddev->chunk_sectors) || | ||
4868 | mddev->ro == 0) { | ||
4869 | printk(KERN_ERR "raid5: in-place reshape must be started" | ||
4870 | " in read-only mode - aborting\n"); | ||
4871 | return -EINVAL; | ||
4872 | } | ||
4873 | } else if (mddev->delta_disks < 0 | ||
4874 | ? (here_new * mddev->new_chunk_sectors <= | ||
4875 | here_old * mddev->chunk_sectors) | ||
4876 | : (here_new * mddev->new_chunk_sectors >= | ||
4877 | here_old * mddev->chunk_sectors)) { | ||
4848 | /* Reading from the same stripe as writing to - bad */ | 4878 | /* Reading from the same stripe as writing to - bad */ |
4849 | printk(KERN_ERR "raid5: reshape_position too early for " | 4879 | printk(KERN_ERR "raid5: reshape_position too early for " |
4850 | "auto-recovery - aborting.\n"); | 4880 | "auto-recovery - aborting.\n"); |
@@ -4958,6 +4988,14 @@ static int run(mddev_t *mddev) | |||
4958 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 4988 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); |
4959 | 4989 | ||
4960 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); | 4990 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); |
4991 | chunk_size = mddev->chunk_sectors << 9; | ||
4992 | blk_queue_io_min(mddev->queue, chunk_size); | ||
4993 | blk_queue_io_opt(mddev->queue, chunk_size * | ||
4994 | (conf->raid_disks - conf->max_degraded)); | ||
4995 | |||
4996 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
4997 | disk_stack_limits(mddev->gendisk, rdev->bdev, | ||
4998 | rdev->data_offset << 9); | ||
4961 | 4999 | ||
4962 | return 0; | 5000 | return 0; |
4963 | abort: | 5001 | abort: |
@@ -5185,6 +5223,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) | |||
5185 | return -EINVAL; | 5223 | return -EINVAL; |
5186 | set_capacity(mddev->gendisk, mddev->array_sectors); | 5224 | set_capacity(mddev->gendisk, mddev->array_sectors); |
5187 | mddev->changed = 1; | 5225 | mddev->changed = 1; |
5226 | revalidate_disk(mddev->gendisk); | ||
5188 | if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { | 5227 | if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { |
5189 | mddev->recovery_cp = mddev->dev_sectors; | 5228 | mddev->recovery_cp = mddev->dev_sectors; |
5190 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 5229 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
@@ -5330,7 +5369,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
5330 | spin_unlock_irqrestore(&conf->device_lock, flags); | 5369 | spin_unlock_irqrestore(&conf->device_lock, flags); |
5331 | } | 5370 | } |
5332 | mddev->raid_disks = conf->raid_disks; | 5371 | mddev->raid_disks = conf->raid_disks; |
5333 | mddev->reshape_position = 0; | 5372 | mddev->reshape_position = conf->reshape_progress; |
5334 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 5373 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
5335 | 5374 | ||
5336 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 5375 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
@@ -5385,7 +5424,6 @@ static void end_reshape(raid5_conf_t *conf) | |||
5385 | */ | 5424 | */ |
5386 | static void raid5_finish_reshape(mddev_t *mddev) | 5425 | static void raid5_finish_reshape(mddev_t *mddev) |
5387 | { | 5426 | { |
5388 | struct block_device *bdev; | ||
5389 | raid5_conf_t *conf = mddev->private; | 5427 | raid5_conf_t *conf = mddev->private; |
5390 | 5428 | ||
5391 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 5429 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
@@ -5394,15 +5432,7 @@ static void raid5_finish_reshape(mddev_t *mddev) | |||
5394 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 5432 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); |
5395 | set_capacity(mddev->gendisk, mddev->array_sectors); | 5433 | set_capacity(mddev->gendisk, mddev->array_sectors); |
5396 | mddev->changed = 1; | 5434 | mddev->changed = 1; |
5397 | 5435 | revalidate_disk(mddev->gendisk); | |
5398 | bdev = bdget_disk(mddev->gendisk, 0); | ||
5399 | if (bdev) { | ||
5400 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
5401 | i_size_write(bdev->bd_inode, | ||
5402 | (loff_t)mddev->array_sectors << 9); | ||
5403 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
5404 | bdput(bdev); | ||
5405 | } | ||
5406 | } else { | 5436 | } else { |
5407 | int d; | 5437 | int d; |
5408 | mddev->degraded = conf->raid_disks; | 5438 | mddev->degraded = conf->raid_disks; |
@@ -5413,8 +5443,15 @@ static void raid5_finish_reshape(mddev_t *mddev) | |||
5413 | mddev->degraded--; | 5443 | mddev->degraded--; |
5414 | for (d = conf->raid_disks ; | 5444 | for (d = conf->raid_disks ; |
5415 | d < conf->raid_disks - mddev->delta_disks; | 5445 | d < conf->raid_disks - mddev->delta_disks; |
5416 | d++) | 5446 | d++) { |
5417 | raid5_remove_disk(mddev, d); | 5447 | mdk_rdev_t *rdev = conf->disks[d].rdev; |
5448 | if (rdev && raid5_remove_disk(mddev, d) == 0) { | ||
5449 | char nm[20]; | ||
5450 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
5451 | sysfs_remove_link(&mddev->kobj, nm); | ||
5452 | rdev->raid_disk = -1; | ||
5453 | } | ||
5454 | } | ||
5418 | } | 5455 | } |
5419 | mddev->layout = conf->algorithm; | 5456 | mddev->layout = conf->algorithm; |
5420 | mddev->chunk_sectors = conf->chunk_sectors; | 5457 | mddev->chunk_sectors = conf->chunk_sectors; |
@@ -5434,12 +5471,18 @@ static void raid5_quiesce(mddev_t *mddev, int state) | |||
5434 | 5471 | ||
5435 | case 1: /* stop all writes */ | 5472 | case 1: /* stop all writes */ |
5436 | spin_lock_irq(&conf->device_lock); | 5473 | spin_lock_irq(&conf->device_lock); |
5437 | conf->quiesce = 1; | 5474 | /* '2' tells resync/reshape to pause so that all |
5475 | * active stripes can drain | ||
5476 | */ | ||
5477 | conf->quiesce = 2; | ||
5438 | wait_event_lock_irq(conf->wait_for_stripe, | 5478 | wait_event_lock_irq(conf->wait_for_stripe, |
5439 | atomic_read(&conf->active_stripes) == 0 && | 5479 | atomic_read(&conf->active_stripes) == 0 && |
5440 | atomic_read(&conf->active_aligned_reads) == 0, | 5480 | atomic_read(&conf->active_aligned_reads) == 0, |
5441 | conf->device_lock, /* nothing */); | 5481 | conf->device_lock, /* nothing */); |
5482 | conf->quiesce = 1; | ||
5442 | spin_unlock_irq(&conf->device_lock); | 5483 | spin_unlock_irq(&conf->device_lock); |
5484 | /* allow reshape to continue */ | ||
5485 | wake_up(&conf->wait_for_overlap); | ||
5443 | break; | 5486 | break; |
5444 | 5487 | ||
5445 | case 0: /* re-enable writes */ | 5488 | case 0: /* re-enable writes */ |