diff options
| author | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:55:54 -0400 |
|---|---|---|
| committer | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:55:54 -0400 |
| commit | 9134d02bc0af4a8747d448d1f811ec5f8eb96df6 (patch) | |
| tree | 704c3e5dcc10f360815c4868a74711f82fb62e27 /drivers/md | |
| parent | bbb20089a3275a19e475dbc21320c3742e3ca423 (diff) | |
| parent | 80ffb3cceaefa405f2ecd46d66500ed8d53efe74 (diff) | |
Merge commit 'md/for-linus' into async-tx-next
Conflicts:
drivers/md/raid5.c
Diffstat (limited to 'drivers/md')
| -rw-r--r-- | drivers/md/dm-crypt.c | 4 | ||||
| -rw-r--r-- | drivers/md/dm-delay.c | 4 | ||||
| -rw-r--r-- | drivers/md/dm-exception-store.c | 9 | ||||
| -rw-r--r-- | drivers/md/dm-linear.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-mpath.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-raid1.c | 3 | ||||
| -rw-r--r-- | drivers/md/dm-stripe.c | 7 | ||||
| -rw-r--r-- | drivers/md/dm-table.c | 17 | ||||
| -rw-r--r-- | drivers/md/dm.c | 14 | ||||
| -rw-r--r-- | drivers/md/dm.h | 1 | ||||
| -rw-r--r-- | drivers/md/linear.c | 6 | ||||
| -rw-r--r-- | drivers/md/md.c | 251 | ||||
| -rw-r--r-- | drivers/md/md.h | 12 | ||||
| -rw-r--r-- | drivers/md/multipath.c | 12 | ||||
| -rw-r--r-- | drivers/md/raid0.c | 10 | ||||
| -rw-r--r-- | drivers/md/raid1.c | 16 | ||||
| -rw-r--r-- | drivers/md/raid10.c | 23 | ||||
| -rw-r--r-- | drivers/md/raid5.c | 87 |
18 files changed, 297 insertions, 183 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 9933eb861c71..ed1038164019 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
| @@ -776,7 +776,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io) | |||
| 776 | * But don't wait if split was due to the io size restriction | 776 | * But don't wait if split was due to the io size restriction |
| 777 | */ | 777 | */ |
| 778 | if (unlikely(out_of_pages)) | 778 | if (unlikely(out_of_pages)) |
| 779 | congestion_wait(WRITE, HZ/100); | 779 | congestion_wait(BLK_RW_ASYNC, HZ/100); |
| 780 | 780 | ||
| 781 | /* | 781 | /* |
| 782 | * With async crypto it is unsafe to share the crypto context | 782 | * With async crypto it is unsafe to share the crypto context |
| @@ -1318,7 +1318,7 @@ static int crypt_iterate_devices(struct dm_target *ti, | |||
| 1318 | { | 1318 | { |
| 1319 | struct crypt_config *cc = ti->private; | 1319 | struct crypt_config *cc = ti->private; |
| 1320 | 1320 | ||
| 1321 | return fn(ti, cc->dev, cc->start, data); | 1321 | return fn(ti, cc->dev, cc->start, ti->len, data); |
| 1322 | } | 1322 | } |
| 1323 | 1323 | ||
| 1324 | static struct target_type crypt_target = { | 1324 | static struct target_type crypt_target = { |
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 4e5b843cd4d7..ebe7381f47c8 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c | |||
| @@ -324,12 +324,12 @@ static int delay_iterate_devices(struct dm_target *ti, | |||
| 324 | struct delay_c *dc = ti->private; | 324 | struct delay_c *dc = ti->private; |
| 325 | int ret = 0; | 325 | int ret = 0; |
| 326 | 326 | ||
| 327 | ret = fn(ti, dc->dev_read, dc->start_read, data); | 327 | ret = fn(ti, dc->dev_read, dc->start_read, ti->len, data); |
| 328 | if (ret) | 328 | if (ret) |
| 329 | goto out; | 329 | goto out; |
| 330 | 330 | ||
| 331 | if (dc->dev_write) | 331 | if (dc->dev_write) |
| 332 | ret = fn(ti, dc->dev_write, dc->start_write, data); | 332 | ret = fn(ti, dc->dev_write, dc->start_write, ti->len, data); |
| 333 | 333 | ||
| 334 | out: | 334 | out: |
| 335 | return ret; | 335 | return ret; |
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index c3ae51584b12..3710ff88fc10 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c | |||
| @@ -195,7 +195,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, | |||
| 195 | struct dm_exception_store **store) | 195 | struct dm_exception_store **store) |
| 196 | { | 196 | { |
| 197 | int r = 0; | 197 | int r = 0; |
| 198 | struct dm_exception_store_type *type; | 198 | struct dm_exception_store_type *type = NULL; |
| 199 | struct dm_exception_store *tmp_store; | 199 | struct dm_exception_store *tmp_store; |
| 200 | char persistent; | 200 | char persistent; |
| 201 | 201 | ||
| @@ -211,12 +211,15 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, | |||
| 211 | } | 211 | } |
| 212 | 212 | ||
| 213 | persistent = toupper(*argv[1]); | 213 | persistent = toupper(*argv[1]); |
| 214 | if (persistent != 'P' && persistent != 'N') { | 214 | if (persistent == 'P') |
| 215 | type = get_type("P"); | ||
| 216 | else if (persistent == 'N') | ||
| 217 | type = get_type("N"); | ||
| 218 | else { | ||
| 215 | ti->error = "Persistent flag is not P or N"; | 219 | ti->error = "Persistent flag is not P or N"; |
| 216 | return -EINVAL; | 220 | return -EINVAL; |
| 217 | } | 221 | } |
| 218 | 222 | ||
| 219 | type = get_type(&persistent); | ||
| 220 | if (!type) { | 223 | if (!type) { |
| 221 | ti->error = "Exception store type not recognised"; | 224 | ti->error = "Exception store type not recognised"; |
| 222 | r = -EINVAL; | 225 | r = -EINVAL; |
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 9184b6deb868..82f7d6e6b1ea 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c | |||
| @@ -139,7 +139,7 @@ static int linear_iterate_devices(struct dm_target *ti, | |||
| 139 | { | 139 | { |
| 140 | struct linear_c *lc = ti->private; | 140 | struct linear_c *lc = ti->private; |
| 141 | 141 | ||
| 142 | return fn(ti, lc->dev, lc->start, data); | 142 | return fn(ti, lc->dev, lc->start, ti->len, data); |
| 143 | } | 143 | } |
| 144 | 144 | ||
| 145 | static struct target_type linear_target = { | 145 | static struct target_type linear_target = { |
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c70604a20897..6f0d90d4a541 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
| @@ -1453,7 +1453,7 @@ static int multipath_iterate_devices(struct dm_target *ti, | |||
| 1453 | 1453 | ||
| 1454 | list_for_each_entry(pg, &m->priority_groups, list) { | 1454 | list_for_each_entry(pg, &m->priority_groups, list) { |
| 1455 | list_for_each_entry(p, &pg->pgpaths, list) { | 1455 | list_for_each_entry(p, &pg->pgpaths, list) { |
| 1456 | ret = fn(ti, p->path.dev, ti->begin, data); | 1456 | ret = fn(ti, p->path.dev, ti->begin, ti->len, data); |
| 1457 | if (ret) | 1457 | if (ret) |
| 1458 | goto out; | 1458 | goto out; |
| 1459 | } | 1459 | } |
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index ce8868c768cc..9726577cde49 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c | |||
| @@ -638,6 +638,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) | |||
| 638 | spin_lock_irq(&ms->lock); | 638 | spin_lock_irq(&ms->lock); |
| 639 | bio_list_merge(&ms->writes, &requeue); | 639 | bio_list_merge(&ms->writes, &requeue); |
| 640 | spin_unlock_irq(&ms->lock); | 640 | spin_unlock_irq(&ms->lock); |
| 641 | delayed_wake(ms); | ||
| 641 | } | 642 | } |
| 642 | 643 | ||
| 643 | /* | 644 | /* |
| @@ -1292,7 +1293,7 @@ static int mirror_iterate_devices(struct dm_target *ti, | |||
| 1292 | 1293 | ||
| 1293 | for (i = 0; !ret && i < ms->nr_mirrors; i++) | 1294 | for (i = 0; !ret && i < ms->nr_mirrors; i++) |
| 1294 | ret = fn(ti, ms->mirror[i].dev, | 1295 | ret = fn(ti, ms->mirror[i].dev, |
| 1295 | ms->mirror[i].offset, data); | 1296 | ms->mirror[i].offset, ti->len, data); |
| 1296 | 1297 | ||
| 1297 | return ret; | 1298 | return ret; |
| 1298 | } | 1299 | } |
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index b240e85ae39a..4e0e5937e42a 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c | |||
| @@ -320,10 +320,11 @@ static int stripe_iterate_devices(struct dm_target *ti, | |||
| 320 | int ret = 0; | 320 | int ret = 0; |
| 321 | unsigned i = 0; | 321 | unsigned i = 0; |
| 322 | 322 | ||
| 323 | do | 323 | do { |
| 324 | ret = fn(ti, sc->stripe[i].dev, | 324 | ret = fn(ti, sc->stripe[i].dev, |
| 325 | sc->stripe[i].physical_start, data); | 325 | sc->stripe[i].physical_start, |
| 326 | while (!ret && ++i < sc->stripes); | 326 | sc->stripe_width, data); |
| 327 | } while (!ret && ++i < sc->stripes); | ||
| 327 | 328 | ||
| 328 | return ret; | 329 | return ret; |
| 329 | } | 330 | } |
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 4899ebe767c8..d952b3441913 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c | |||
| @@ -346,7 +346,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) | |||
| 346 | * If possible, this checks an area of a destination device is valid. | 346 | * If possible, this checks an area of a destination device is valid. |
| 347 | */ | 347 | */ |
| 348 | static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, | 348 | static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, |
| 349 | sector_t start, void *data) | 349 | sector_t start, sector_t len, void *data) |
| 350 | { | 350 | { |
| 351 | struct queue_limits *limits = data; | 351 | struct queue_limits *limits = data; |
| 352 | struct block_device *bdev = dev->bdev; | 352 | struct block_device *bdev = dev->bdev; |
| @@ -359,7 +359,7 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, | |||
| 359 | if (!dev_size) | 359 | if (!dev_size) |
| 360 | return 1; | 360 | return 1; |
| 361 | 361 | ||
| 362 | if ((start >= dev_size) || (start + ti->len > dev_size)) { | 362 | if ((start >= dev_size) || (start + len > dev_size)) { |
| 363 | DMWARN("%s: %s too small for target", | 363 | DMWARN("%s: %s too small for target", |
| 364 | dm_device_name(ti->table->md), bdevname(bdev, b)); | 364 | dm_device_name(ti->table->md), bdevname(bdev, b)); |
| 365 | return 0; | 365 | return 0; |
| @@ -377,11 +377,11 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev, | |||
| 377 | return 0; | 377 | return 0; |
| 378 | } | 378 | } |
| 379 | 379 | ||
| 380 | if (ti->len & (logical_block_size_sectors - 1)) { | 380 | if (len & (logical_block_size_sectors - 1)) { |
| 381 | DMWARN("%s: len=%llu not aligned to h/w " | 381 | DMWARN("%s: len=%llu not aligned to h/w " |
| 382 | "logical block size %hu of %s", | 382 | "logical block size %hu of %s", |
| 383 | dm_device_name(ti->table->md), | 383 | dm_device_name(ti->table->md), |
| 384 | (unsigned long long)ti->len, | 384 | (unsigned long long)len, |
| 385 | limits->logical_block_size, bdevname(bdev, b)); | 385 | limits->logical_block_size, bdevname(bdev, b)); |
| 386 | return 0; | 386 | return 0; |
| 387 | } | 387 | } |
| @@ -482,7 +482,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti, | |||
| 482 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) | 482 | #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) |
| 483 | 483 | ||
| 484 | int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, | 484 | int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, |
| 485 | sector_t start, void *data) | 485 | sector_t start, sector_t len, void *data) |
| 486 | { | 486 | { |
| 487 | struct queue_limits *limits = data; | 487 | struct queue_limits *limits = data; |
| 488 | struct block_device *bdev = dev->bdev; | 488 | struct block_device *bdev = dev->bdev; |
| @@ -495,7 +495,7 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, | |||
| 495 | return 0; | 495 | return 0; |
| 496 | } | 496 | } |
| 497 | 497 | ||
| 498 | if (blk_stack_limits(limits, &q->limits, start) < 0) | 498 | if (blk_stack_limits(limits, &q->limits, start << 9) < 0) |
| 499 | DMWARN("%s: target device %s is misaligned", | 499 | DMWARN("%s: target device %s is misaligned", |
| 500 | dm_device_name(ti->table->md), bdevname(bdev, b)); | 500 | dm_device_name(ti->table->md), bdevname(bdev, b)); |
| 501 | 501 | ||
| @@ -830,11 +830,6 @@ unsigned dm_table_get_type(struct dm_table *t) | |||
| 830 | return t->type; | 830 | return t->type; |
| 831 | } | 831 | } |
| 832 | 832 | ||
| 833 | bool dm_table_bio_based(struct dm_table *t) | ||
| 834 | { | ||
| 835 | return dm_table_get_type(t) == DM_TYPE_BIO_BASED; | ||
| 836 | } | ||
| 837 | |||
| 838 | bool dm_table_request_based(struct dm_table *t) | 833 | bool dm_table_request_based(struct dm_table *t) |
| 839 | { | 834 | { |
| 840 | return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED; | 835 | return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED; |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3c6d4ee8921d..8a311ea0d441 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
| @@ -1017,7 +1017,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
| 1017 | clone->bi_flags |= 1 << BIO_CLONED; | 1017 | clone->bi_flags |= 1 << BIO_CLONED; |
| 1018 | 1018 | ||
| 1019 | if (bio_integrity(bio)) { | 1019 | if (bio_integrity(bio)) { |
| 1020 | bio_integrity_clone(clone, bio, GFP_NOIO); | 1020 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); |
| 1021 | bio_integrity_trim(clone, | 1021 | bio_integrity_trim(clone, |
| 1022 | bio_sector_offset(bio, idx, offset), len); | 1022 | bio_sector_offset(bio, idx, offset), len); |
| 1023 | } | 1023 | } |
| @@ -1045,7 +1045,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
| 1045 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); | 1045 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); |
| 1046 | 1046 | ||
| 1047 | if (bio_integrity(bio)) { | 1047 | if (bio_integrity(bio)) { |
| 1048 | bio_integrity_clone(clone, bio, GFP_NOIO); | 1048 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); |
| 1049 | 1049 | ||
| 1050 | if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) | 1050 | if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) |
| 1051 | bio_integrity_trim(clone, | 1051 | bio_integrity_trim(clone, |
| @@ -2203,16 +2203,6 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table) | |||
| 2203 | goto out; | 2203 | goto out; |
| 2204 | } | 2204 | } |
| 2205 | 2205 | ||
| 2206 | /* | ||
| 2207 | * It is enought that blk_queue_ordered() is called only once when | ||
| 2208 | * the first bio-based table is bound. | ||
| 2209 | * | ||
| 2210 | * This setting should be moved to alloc_dev() when request-based dm | ||
| 2211 | * supports barrier. | ||
| 2212 | */ | ||
| 2213 | if (!md->map && dm_table_bio_based(table)) | ||
| 2214 | blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL); | ||
| 2215 | |||
| 2216 | __unbind(md); | 2206 | __unbind(md); |
| 2217 | r = __bind(md, table, &limits); | 2207 | r = __bind(md, table, &limits); |
| 2218 | 2208 | ||
diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 23278ae80f08..a7663eba17e2 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h | |||
| @@ -61,7 +61,6 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits); | |||
| 61 | int dm_table_any_busy_target(struct dm_table *t); | 61 | int dm_table_any_busy_target(struct dm_table *t); |
| 62 | int dm_table_set_type(struct dm_table *t); | 62 | int dm_table_set_type(struct dm_table *t); |
| 63 | unsigned dm_table_get_type(struct dm_table *t); | 63 | unsigned dm_table_get_type(struct dm_table *t); |
| 64 | bool dm_table_bio_based(struct dm_table *t); | ||
| 65 | bool dm_table_request_based(struct dm_table *t); | 64 | bool dm_table_request_based(struct dm_table *t); |
| 66 | int dm_table_alloc_md_mempools(struct dm_table *t); | 65 | int dm_table_alloc_md_mempools(struct dm_table *t); |
| 67 | void dm_table_free_md_mempools(struct dm_table *t); | 66 | void dm_table_free_md_mempools(struct dm_table *t); |
diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 15c8b7b25a9b..5fe39c2a3d2b 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c | |||
| @@ -166,8 +166,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) | |||
| 166 | rdev->sectors = sectors * mddev->chunk_sectors; | 166 | rdev->sectors = sectors * mddev->chunk_sectors; |
| 167 | } | 167 | } |
| 168 | 168 | ||
| 169 | blk_queue_stack_limits(mddev->queue, | 169 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
| 170 | rdev->bdev->bd_disk->queue); | 170 | rdev->data_offset << 9); |
| 171 | /* as we don't honour merge_bvec_fn, we must never risk | 171 | /* as we don't honour merge_bvec_fn, we must never risk |
| 172 | * violating it, so limit ->max_sector to one PAGE, as | 172 | * violating it, so limit ->max_sector to one PAGE, as |
| 173 | * a one page request is never in violation. | 173 | * a one page request is never in violation. |
| @@ -220,6 +220,7 @@ static int linear_run (mddev_t *mddev) | |||
| 220 | mddev->queue->unplug_fn = linear_unplug; | 220 | mddev->queue->unplug_fn = linear_unplug; |
| 221 | mddev->queue->backing_dev_info.congested_fn = linear_congested; | 221 | mddev->queue->backing_dev_info.congested_fn = linear_congested; |
| 222 | mddev->queue->backing_dev_info.congested_data = mddev; | 222 | mddev->queue->backing_dev_info.congested_data = mddev; |
| 223 | md_integrity_register(mddev); | ||
| 223 | return 0; | 224 | return 0; |
| 224 | } | 225 | } |
| 225 | 226 | ||
| @@ -256,6 +257,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 256 | rcu_assign_pointer(mddev->private, newconf); | 257 | rcu_assign_pointer(mddev->private, newconf); |
| 257 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); | 258 | md_set_array_sectors(mddev, linear_size(mddev, 0, 0)); |
| 258 | set_capacity(mddev->gendisk, mddev->array_sectors); | 259 | set_capacity(mddev->gendisk, mddev->array_sectors); |
| 260 | revalidate_disk(mddev->gendisk); | ||
| 259 | call_rcu(&oldconf->rcu, free_conf); | 261 | call_rcu(&oldconf->rcu, free_conf); |
| 260 | return 0; | 262 | return 0; |
| 261 | } | 263 | } |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 09be637d52cb..9dd872000cec 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
| 359 | else | 359 | else |
| 360 | new->md_minor = MINOR(unit) >> MdpMinorShift; | 360 | new->md_minor = MINOR(unit) >> MdpMinorShift; |
| 361 | 361 | ||
| 362 | mutex_init(&new->open_mutex); | ||
| 362 | mutex_init(&new->reconfig_mutex); | 363 | mutex_init(&new->reconfig_mutex); |
| 363 | INIT_LIST_HEAD(&new->disks); | 364 | INIT_LIST_HEAD(&new->disks); |
| 364 | INIT_LIST_HEAD(&new->all_mddevs); | 365 | INIT_LIST_HEAD(&new->all_mddevs); |
| @@ -1308,7 +1309,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1308 | } | 1309 | } |
| 1309 | if (mddev->level != LEVEL_MULTIPATH) { | 1310 | if (mddev->level != LEVEL_MULTIPATH) { |
| 1310 | int role; | 1311 | int role; |
| 1311 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | 1312 | if (rdev->desc_nr < 0 || |
| 1313 | rdev->desc_nr >= le32_to_cpu(sb->max_dev)) { | ||
| 1314 | role = 0xffff; | ||
| 1315 | rdev->desc_nr = -1; | ||
| 1316 | } else | ||
| 1317 | role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); | ||
| 1312 | switch(role) { | 1318 | switch(role) { |
| 1313 | case 0xffff: /* spare */ | 1319 | case 0xffff: /* spare */ |
| 1314 | break; | 1320 | break; |
| @@ -1394,8 +1400,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1394 | if (rdev2->desc_nr+1 > max_dev) | 1400 | if (rdev2->desc_nr+1 > max_dev) |
| 1395 | max_dev = rdev2->desc_nr+1; | 1401 | max_dev = rdev2->desc_nr+1; |
| 1396 | 1402 | ||
| 1397 | if (max_dev > le32_to_cpu(sb->max_dev)) | 1403 | if (max_dev > le32_to_cpu(sb->max_dev)) { |
| 1404 | int bmask; | ||
| 1398 | sb->max_dev = cpu_to_le32(max_dev); | 1405 | sb->max_dev = cpu_to_le32(max_dev); |
| 1406 | rdev->sb_size = max_dev * 2 + 256; | ||
| 1407 | bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1; | ||
| 1408 | if (rdev->sb_size & bmask) | ||
| 1409 | rdev->sb_size = (rdev->sb_size | bmask) + 1; | ||
| 1410 | } | ||
| 1399 | for (i=0; i<max_dev;i++) | 1411 | for (i=0; i<max_dev;i++) |
| 1400 | sb->dev_roles[i] = cpu_to_le16(0xfffe); | 1412 | sb->dev_roles[i] = cpu_to_le16(0xfffe); |
| 1401 | 1413 | ||
| @@ -1487,37 +1499,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) | |||
| 1487 | 1499 | ||
| 1488 | static LIST_HEAD(pending_raid_disks); | 1500 | static LIST_HEAD(pending_raid_disks); |
| 1489 | 1501 | ||
| 1490 | static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev) | 1502 | /* |
| 1503 | * Try to register data integrity profile for an mddev | ||
| 1504 | * | ||
| 1505 | * This is called when an array is started and after a disk has been kicked | ||
| 1506 | * from the array. It only succeeds if all working and active component devices | ||
| 1507 | * are integrity capable with matching profiles. | ||
| 1508 | */ | ||
| 1509 | int md_integrity_register(mddev_t *mddev) | ||
| 1510 | { | ||
| 1511 | mdk_rdev_t *rdev, *reference = NULL; | ||
| 1512 | |||
| 1513 | if (list_empty(&mddev->disks)) | ||
| 1514 | return 0; /* nothing to do */ | ||
| 1515 | if (blk_get_integrity(mddev->gendisk)) | ||
| 1516 | return 0; /* already registered */ | ||
| 1517 | list_for_each_entry(rdev, &mddev->disks, same_set) { | ||
| 1518 | /* skip spares and non-functional disks */ | ||
| 1519 | if (test_bit(Faulty, &rdev->flags)) | ||
| 1520 | continue; | ||
| 1521 | if (rdev->raid_disk < 0) | ||
| 1522 | continue; | ||
| 1523 | /* | ||
| 1524 | * If at least one rdev is not integrity capable, we can not | ||
| 1525 | * enable data integrity for the md device. | ||
| 1526 | */ | ||
| 1527 | if (!bdev_get_integrity(rdev->bdev)) | ||
| 1528 | return -EINVAL; | ||
| 1529 | if (!reference) { | ||
| 1530 | /* Use the first rdev as the reference */ | ||
| 1531 | reference = rdev; | ||
| 1532 | continue; | ||
| 1533 | } | ||
| 1534 | /* does this rdev's profile match the reference profile? */ | ||
| 1535 | if (blk_integrity_compare(reference->bdev->bd_disk, | ||
| 1536 | rdev->bdev->bd_disk) < 0) | ||
| 1537 | return -EINVAL; | ||
| 1538 | } | ||
| 1539 | /* | ||
| 1540 | * All component devices are integrity capable and have matching | ||
| 1541 | * profiles, register the common profile for the md device. | ||
| 1542 | */ | ||
| 1543 | if (blk_integrity_register(mddev->gendisk, | ||
| 1544 | bdev_get_integrity(reference->bdev)) != 0) { | ||
| 1545 | printk(KERN_ERR "md: failed to register integrity for %s\n", | ||
| 1546 | mdname(mddev)); | ||
| 1547 | return -EINVAL; | ||
| 1548 | } | ||
| 1549 | printk(KERN_NOTICE "md: data integrity on %s enabled\n", | ||
| 1550 | mdname(mddev)); | ||
| 1551 | return 0; | ||
| 1552 | } | ||
| 1553 | EXPORT_SYMBOL(md_integrity_register); | ||
| 1554 | |||
| 1555 | /* Disable data integrity if non-capable/non-matching disk is being added */ | ||
| 1556 | void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev) | ||
| 1491 | { | 1557 | { |
| 1492 | struct mdk_personality *pers = mddev->pers; | ||
| 1493 | struct gendisk *disk = mddev->gendisk; | ||
| 1494 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); | 1558 | struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev); |
| 1495 | struct blk_integrity *bi_mddev = blk_get_integrity(disk); | 1559 | struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk); |
| 1496 | 1560 | ||
| 1497 | /* Data integrity passthrough not supported on RAID 4, 5 and 6 */ | 1561 | if (!bi_mddev) /* nothing to do */ |
| 1498 | if (pers && pers->level >= 4 && pers->level <= 6) | ||
| 1499 | return; | 1562 | return; |
| 1500 | 1563 | if (rdev->raid_disk < 0) /* skip spares */ | |
| 1501 | /* If rdev is integrity capable, register profile for mddev */ | ||
| 1502 | if (!bi_mddev && bi_rdev) { | ||
| 1503 | if (blk_integrity_register(disk, bi_rdev)) | ||
| 1504 | printk(KERN_ERR "%s: %s Could not register integrity!\n", | ||
| 1505 | __func__, disk->disk_name); | ||
| 1506 | else | ||
| 1507 | printk(KERN_NOTICE "Enabling data integrity on %s\n", | ||
| 1508 | disk->disk_name); | ||
| 1509 | return; | 1564 | return; |
| 1510 | } | 1565 | if (bi_rdev && blk_integrity_compare(mddev->gendisk, |
| 1511 | 1566 | rdev->bdev->bd_disk) >= 0) | |
| 1512 | /* Check that mddev and rdev have matching profiles */ | 1567 | return; |
| 1513 | if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) { | 1568 | printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev)); |
| 1514 | printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__, | 1569 | blk_integrity_unregister(mddev->gendisk); |
| 1515 | disk->disk_name, rdev->bdev->bd_disk->disk_name); | ||
| 1516 | printk(KERN_NOTICE "Disabling data integrity on %s\n", | ||
| 1517 | disk->disk_name); | ||
| 1518 | blk_integrity_unregister(disk); | ||
| 1519 | } | ||
| 1520 | } | 1570 | } |
| 1571 | EXPORT_SYMBOL(md_integrity_add_rdev); | ||
| 1521 | 1572 | ||
| 1522 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | 1573 | static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) |
| 1523 | { | 1574 | { |
| @@ -1591,7 +1642,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) | |||
| 1591 | /* May as well allow recovery to be retried once */ | 1642 | /* May as well allow recovery to be retried once */ |
| 1592 | mddev->recovery_disabled = 0; | 1643 | mddev->recovery_disabled = 0; |
| 1593 | 1644 | ||
| 1594 | md_integrity_check(rdev, mddev); | ||
| 1595 | return 0; | 1645 | return 0; |
| 1596 | 1646 | ||
| 1597 | fail: | 1647 | fail: |
| @@ -1756,9 +1806,10 @@ static void print_sb_1(struct mdp_superblock_1 *sb) | |||
| 1756 | __u8 *uuid; | 1806 | __u8 *uuid; |
| 1757 | 1807 | ||
| 1758 | uuid = sb->set_uuid; | 1808 | uuid = sb->set_uuid; |
| 1759 | printk(KERN_INFO "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" | 1809 | printk(KERN_INFO |
| 1760 | ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" | 1810 | "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x" |
| 1761 | KERN_INFO "md: Name: \"%s\" CT:%llu\n", | 1811 | ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n" |
| 1812 | "md: Name: \"%s\" CT:%llu\n", | ||
| 1762 | le32_to_cpu(sb->major_version), | 1813 | le32_to_cpu(sb->major_version), |
| 1763 | le32_to_cpu(sb->feature_map), | 1814 | le32_to_cpu(sb->feature_map), |
| 1764 | uuid[0], uuid[1], uuid[2], uuid[3], | 1815 | uuid[0], uuid[1], uuid[2], uuid[3], |
| @@ -1770,12 +1821,13 @@ static void print_sb_1(struct mdp_superblock_1 *sb) | |||
| 1770 | & MD_SUPERBLOCK_1_TIME_SEC_MASK); | 1821 | & MD_SUPERBLOCK_1_TIME_SEC_MASK); |
| 1771 | 1822 | ||
| 1772 | uuid = sb->device_uuid; | 1823 | uuid = sb->device_uuid; |
| 1773 | printk(KERN_INFO "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" | 1824 | printk(KERN_INFO |
| 1825 | "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu" | ||
| 1774 | " RO:%llu\n" | 1826 | " RO:%llu\n" |
| 1775 | KERN_INFO "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" | 1827 | "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x" |
| 1776 | ":%02x%02x%02x%02x%02x%02x\n" | 1828 | ":%02x%02x%02x%02x%02x%02x\n" |
| 1777 | KERN_INFO "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" | 1829 | "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n" |
| 1778 | KERN_INFO "md: (MaxDev:%u) \n", | 1830 | "md: (MaxDev:%u) \n", |
| 1779 | le32_to_cpu(sb->level), | 1831 | le32_to_cpu(sb->level), |
| 1780 | (unsigned long long)le64_to_cpu(sb->size), | 1832 | (unsigned long long)le64_to_cpu(sb->size), |
| 1781 | le32_to_cpu(sb->raid_disks), | 1833 | le32_to_cpu(sb->raid_disks), |
| @@ -1923,17 +1975,14 @@ repeat: | |||
| 1923 | /* otherwise we have to go forward and ... */ | 1975 | /* otherwise we have to go forward and ... */ |
| 1924 | mddev->events ++; | 1976 | mddev->events ++; |
| 1925 | if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ | 1977 | if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ |
| 1926 | /* .. if the array isn't clean, insist on an odd 'events' */ | 1978 | /* .. if the array isn't clean, an 'even' event must also go |
| 1927 | if ((mddev->events&1)==0) { | 1979 | * to spares. */ |
| 1928 | mddev->events++; | 1980 | if ((mddev->events&1)==0) |
| 1929 | nospares = 0; | 1981 | nospares = 0; |
| 1930 | } | ||
| 1931 | } else { | 1982 | } else { |
| 1932 | /* otherwise insist on an even 'events' (for clean states) */ | 1983 | /* otherwise an 'odd' event must go to spares */ |
| 1933 | if ((mddev->events&1)) { | 1984 | if ((mddev->events&1)) |
| 1934 | mddev->events++; | ||
| 1935 | nospares = 0; | 1985 | nospares = 0; |
| 1936 | } | ||
| 1937 | } | 1986 | } |
| 1938 | } | 1987 | } |
| 1939 | 1988 | ||
| @@ -2655,6 +2704,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 2655 | ssize_t rv = len; | 2704 | ssize_t rv = len; |
| 2656 | struct mdk_personality *pers; | 2705 | struct mdk_personality *pers; |
| 2657 | void *priv; | 2706 | void *priv; |
| 2707 | mdk_rdev_t *rdev; | ||
| 2658 | 2708 | ||
| 2659 | if (mddev->pers == NULL) { | 2709 | if (mddev->pers == NULL) { |
| 2660 | if (len == 0) | 2710 | if (len == 0) |
| @@ -2734,6 +2784,12 @@ level_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 2734 | mddev_suspend(mddev); | 2784 | mddev_suspend(mddev); |
| 2735 | mddev->pers->stop(mddev); | 2785 | mddev->pers->stop(mddev); |
| 2736 | module_put(mddev->pers->owner); | 2786 | module_put(mddev->pers->owner); |
| 2787 | /* Invalidate devices that are now superfluous */ | ||
| 2788 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
| 2789 | if (rdev->raid_disk >= mddev->raid_disks) { | ||
| 2790 | rdev->raid_disk = -1; | ||
| 2791 | clear_bit(In_sync, &rdev->flags); | ||
| 2792 | } | ||
| 2737 | mddev->pers = pers; | 2793 | mddev->pers = pers; |
| 2738 | mddev->private = priv; | 2794 | mddev->private = priv; |
| 2739 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 2795 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
| @@ -3543,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 3543 | if (max < mddev->resync_min) | 3599 | if (max < mddev->resync_min) |
| 3544 | return -EINVAL; | 3600 | return -EINVAL; |
| 3545 | if (max < mddev->resync_max && | 3601 | if (max < mddev->resync_max && |
| 3602 | mddev->ro == 0 && | ||
| 3546 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 3603 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
| 3547 | return -EBUSY; | 3604 | return -EBUSY; |
| 3548 | 3605 | ||
| @@ -3573,7 +3630,8 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 3573 | char *e; | 3630 | char *e; |
| 3574 | unsigned long long new = simple_strtoull(buf, &e, 10); | 3631 | unsigned long long new = simple_strtoull(buf, &e, 10); |
| 3575 | 3632 | ||
| 3576 | if (mddev->pers->quiesce == NULL) | 3633 | if (mddev->pers == NULL || |
| 3634 | mddev->pers->quiesce == NULL) | ||
| 3577 | return -EINVAL; | 3635 | return -EINVAL; |
| 3578 | if (buf == e || (*e && *e != '\n')) | 3636 | if (buf == e || (*e && *e != '\n')) |
| 3579 | return -EINVAL; | 3637 | return -EINVAL; |
| @@ -3601,7 +3659,8 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 3601 | char *e; | 3659 | char *e; |
| 3602 | unsigned long long new = simple_strtoull(buf, &e, 10); | 3660 | unsigned long long new = simple_strtoull(buf, &e, 10); |
| 3603 | 3661 | ||
| 3604 | if (mddev->pers->quiesce == NULL) | 3662 | if (mddev->pers == NULL || |
| 3663 | mddev->pers->quiesce == NULL) | ||
| 3605 | return -EINVAL; | 3664 | return -EINVAL; |
| 3606 | if (buf == e || (*e && *e != '\n')) | 3665 | if (buf == e || (*e && *e != '\n')) |
| 3607 | return -EINVAL; | 3666 | return -EINVAL; |
| @@ -3681,17 +3740,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len) | |||
| 3681 | 3740 | ||
| 3682 | mddev->array_sectors = sectors; | 3741 | mddev->array_sectors = sectors; |
| 3683 | set_capacity(mddev->gendisk, mddev->array_sectors); | 3742 | set_capacity(mddev->gendisk, mddev->array_sectors); |
| 3684 | if (mddev->pers) { | 3743 | if (mddev->pers) |
| 3685 | struct block_device *bdev = bdget_disk(mddev->gendisk, 0); | 3744 | revalidate_disk(mddev->gendisk); |
| 3686 | |||
| 3687 | if (bdev) { | ||
| 3688 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
| 3689 | i_size_write(bdev->bd_inode, | ||
| 3690 | (loff_t)mddev->array_sectors << 9); | ||
| 3691 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
| 3692 | bdput(bdev); | ||
| 3693 | } | ||
| 3694 | } | ||
| 3695 | 3745 | ||
| 3696 | return len; | 3746 | return len; |
| 3697 | } | 3747 | } |
| @@ -3844,11 +3894,9 @@ static int md_alloc(dev_t dev, char *name) | |||
| 3844 | flush_scheduled_work(); | 3894 | flush_scheduled_work(); |
| 3845 | 3895 | ||
| 3846 | mutex_lock(&disks_mutex); | 3896 | mutex_lock(&disks_mutex); |
| 3847 | if (mddev->gendisk) { | 3897 | error = -EEXIST; |
| 3848 | mutex_unlock(&disks_mutex); | 3898 | if (mddev->gendisk) |
| 3849 | mddev_put(mddev); | 3899 | goto abort; |
| 3850 | return -EEXIST; | ||
| 3851 | } | ||
| 3852 | 3900 | ||
| 3853 | if (name) { | 3901 | if (name) { |
| 3854 | /* Need to ensure that 'name' is not a duplicate. | 3902 | /* Need to ensure that 'name' is not a duplicate. |
| @@ -3860,17 +3908,15 @@ static int md_alloc(dev_t dev, char *name) | |||
| 3860 | if (mddev2->gendisk && | 3908 | if (mddev2->gendisk && |
| 3861 | strcmp(mddev2->gendisk->disk_name, name) == 0) { | 3909 | strcmp(mddev2->gendisk->disk_name, name) == 0) { |
| 3862 | spin_unlock(&all_mddevs_lock); | 3910 | spin_unlock(&all_mddevs_lock); |
| 3863 | return -EEXIST; | 3911 | goto abort; |
| 3864 | } | 3912 | } |
| 3865 | spin_unlock(&all_mddevs_lock); | 3913 | spin_unlock(&all_mddevs_lock); |
| 3866 | } | 3914 | } |
| 3867 | 3915 | ||
| 3916 | error = -ENOMEM; | ||
| 3868 | mddev->queue = blk_alloc_queue(GFP_KERNEL); | 3917 | mddev->queue = blk_alloc_queue(GFP_KERNEL); |
| 3869 | if (!mddev->queue) { | 3918 | if (!mddev->queue) |
| 3870 | mutex_unlock(&disks_mutex); | 3919 | goto abort; |
| 3871 | mddev_put(mddev); | ||
| 3872 | return -ENOMEM; | ||
| 3873 | } | ||
| 3874 | mddev->queue->queuedata = mddev; | 3920 | mddev->queue->queuedata = mddev; |
| 3875 | 3921 | ||
| 3876 | /* Can be unlocked because the queue is new: no concurrency */ | 3922 | /* Can be unlocked because the queue is new: no concurrency */ |
| @@ -3880,11 +3926,9 @@ static int md_alloc(dev_t dev, char *name) | |||
| 3880 | 3926 | ||
| 3881 | disk = alloc_disk(1 << shift); | 3927 | disk = alloc_disk(1 << shift); |
| 3882 | if (!disk) { | 3928 | if (!disk) { |
| 3883 | mutex_unlock(&disks_mutex); | ||
| 3884 | blk_cleanup_queue(mddev->queue); | 3929 | blk_cleanup_queue(mddev->queue); |
| 3885 | mddev->queue = NULL; | 3930 | mddev->queue = NULL; |
| 3886 | mddev_put(mddev); | 3931 | goto abort; |
| 3887 | return -ENOMEM; | ||
| 3888 | } | 3932 | } |
| 3889 | disk->major = MAJOR(mddev->unit); | 3933 | disk->major = MAJOR(mddev->unit); |
| 3890 | disk->first_minor = unit << shift; | 3934 | disk->first_minor = unit << shift; |
| @@ -3906,16 +3950,22 @@ static int md_alloc(dev_t dev, char *name) | |||
| 3906 | mddev->gendisk = disk; | 3950 | mddev->gendisk = disk; |
| 3907 | error = kobject_init_and_add(&mddev->kobj, &md_ktype, | 3951 | error = kobject_init_and_add(&mddev->kobj, &md_ktype, |
| 3908 | &disk_to_dev(disk)->kobj, "%s", "md"); | 3952 | &disk_to_dev(disk)->kobj, "%s", "md"); |
| 3909 | mutex_unlock(&disks_mutex); | 3953 | if (error) { |
| 3910 | if (error) | 3954 | /* This isn't possible, but as kobject_init_and_add is marked |
| 3955 | * __must_check, we must do something with the result | ||
| 3956 | */ | ||
| 3911 | printk(KERN_WARNING "md: cannot register %s/md - name in use\n", | 3957 | printk(KERN_WARNING "md: cannot register %s/md - name in use\n", |
| 3912 | disk->disk_name); | 3958 | disk->disk_name); |
| 3913 | else { | 3959 | error = 0; |
| 3960 | } | ||
| 3961 | abort: | ||
| 3962 | mutex_unlock(&disks_mutex); | ||
| 3963 | if (!error) { | ||
| 3914 | kobject_uevent(&mddev->kobj, KOBJ_ADD); | 3964 | kobject_uevent(&mddev->kobj, KOBJ_ADD); |
| 3915 | mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); | 3965 | mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state"); |
| 3916 | } | 3966 | } |
| 3917 | mddev_put(mddev); | 3967 | mddev_put(mddev); |
| 3918 | return 0; | 3968 | return error; |
| 3919 | } | 3969 | } |
| 3920 | 3970 | ||
| 3921 | static struct kobject *md_probe(dev_t dev, int *part, void *data) | 3971 | static struct kobject *md_probe(dev_t dev, int *part, void *data) |
| @@ -4044,10 +4094,6 @@ static int do_md_run(mddev_t * mddev) | |||
| 4044 | } | 4094 | } |
| 4045 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 4095 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
| 4046 | 4096 | ||
| 4047 | if (pers->level >= 4 && pers->level <= 6) | ||
| 4048 | /* Cannot support integrity (yet) */ | ||
| 4049 | blk_integrity_unregister(mddev->gendisk); | ||
| 4050 | |||
| 4051 | if (mddev->reshape_position != MaxSector && | 4097 | if (mddev->reshape_position != MaxSector && |
| 4052 | pers->start_reshape == NULL) { | 4098 | pers->start_reshape == NULL) { |
| 4053 | /* This personality cannot handle reshaping... */ | 4099 | /* This personality cannot handle reshaping... */ |
| @@ -4185,6 +4231,7 @@ static int do_md_run(mddev_t * mddev) | |||
| 4185 | md_wakeup_thread(mddev->thread); | 4231 | md_wakeup_thread(mddev->thread); |
| 4186 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ | 4232 | md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */ |
| 4187 | 4233 | ||
| 4234 | revalidate_disk(mddev->gendisk); | ||
| 4188 | mddev->changed = 1; | 4235 | mddev->changed = 1; |
| 4189 | md_new_event(mddev); | 4236 | md_new_event(mddev); |
| 4190 | sysfs_notify_dirent(mddev->sysfs_state); | 4237 | sysfs_notify_dirent(mddev->sysfs_state); |
| @@ -4256,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
| 4256 | struct gendisk *disk = mddev->gendisk; | 4303 | struct gendisk *disk = mddev->gendisk; |
| 4257 | mdk_rdev_t *rdev; | 4304 | mdk_rdev_t *rdev; |
| 4258 | 4305 | ||
| 4306 | mutex_lock(&mddev->open_mutex); | ||
| 4259 | if (atomic_read(&mddev->openers) > is_open) { | 4307 | if (atomic_read(&mddev->openers) > is_open) { |
| 4260 | printk("md: %s still in use.\n",mdname(mddev)); | 4308 | printk("md: %s still in use.\n",mdname(mddev)); |
| 4261 | return -EBUSY; | 4309 | err = -EBUSY; |
| 4262 | } | 4310 | } else if (mddev->pers) { |
| 4263 | |||
| 4264 | if (mddev->pers) { | ||
| 4265 | 4311 | ||
| 4266 | if (mddev->sync_thread) { | 4312 | if (mddev->sync_thread) { |
| 4267 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4313 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
| @@ -4318,8 +4364,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
| 4318 | if (mode == 1) | 4364 | if (mode == 1) |
| 4319 | set_disk_ro(disk, 1); | 4365 | set_disk_ro(disk, 1); |
| 4320 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4366 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
| 4367 | err = 0; | ||
| 4321 | } | 4368 | } |
| 4322 | 4369 | out: | |
| 4370 | mutex_unlock(&mddev->open_mutex); | ||
| 4371 | if (err) | ||
| 4372 | return err; | ||
| 4323 | /* | 4373 | /* |
| 4324 | * Free resources if final stop | 4374 | * Free resources if final stop |
| 4325 | */ | 4375 | */ |
| @@ -4385,7 +4435,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
| 4385 | blk_integrity_unregister(disk); | 4435 | blk_integrity_unregister(disk); |
| 4386 | md_new_event(mddev); | 4436 | md_new_event(mddev); |
| 4387 | sysfs_notify_dirent(mddev->sysfs_state); | 4437 | sysfs_notify_dirent(mddev->sysfs_state); |
| 4388 | out: | ||
| 4389 | return err; | 4438 | return err; |
| 4390 | } | 4439 | } |
| 4391 | 4440 | ||
| @@ -5083,18 +5132,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) | |||
| 5083 | return -ENOSPC; | 5132 | return -ENOSPC; |
| 5084 | } | 5133 | } |
| 5085 | rv = mddev->pers->resize(mddev, num_sectors); | 5134 | rv = mddev->pers->resize(mddev, num_sectors); |
| 5086 | if (!rv) { | 5135 | if (!rv) |
| 5087 | struct block_device *bdev; | 5136 | revalidate_disk(mddev->gendisk); |
| 5088 | |||
| 5089 | bdev = bdget_disk(mddev->gendisk, 0); | ||
| 5090 | if (bdev) { | ||
| 5091 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
| 5092 | i_size_write(bdev->bd_inode, | ||
| 5093 | (loff_t)mddev->array_sectors << 9); | ||
| 5094 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
| 5095 | bdput(bdev); | ||
| 5096 | } | ||
| 5097 | } | ||
| 5098 | return rv; | 5137 | return rv; |
| 5099 | } | 5138 | } |
| 5100 | 5139 | ||
| @@ -5480,12 +5519,12 @@ static int md_open(struct block_device *bdev, fmode_t mode) | |||
| 5480 | } | 5519 | } |
| 5481 | BUG_ON(mddev != bdev->bd_disk->private_data); | 5520 | BUG_ON(mddev != bdev->bd_disk->private_data); |
| 5482 | 5521 | ||
| 5483 | if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) | 5522 | if ((err = mutex_lock_interruptible(&mddev->open_mutex))) |
| 5484 | goto out; | 5523 | goto out; |
| 5485 | 5524 | ||
| 5486 | err = 0; | 5525 | err = 0; |
| 5487 | atomic_inc(&mddev->openers); | 5526 | atomic_inc(&mddev->openers); |
| 5488 | mddev_unlock(mddev); | 5527 | mutex_unlock(&mddev->open_mutex); |
| 5489 | 5528 | ||
| 5490 | check_disk_change(bdev); | 5529 | check_disk_change(bdev); |
| 5491 | out: | 5530 | out: |
| @@ -6334,10 +6373,16 @@ void md_do_sync(mddev_t *mddev) | |||
| 6334 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); | 6373 | sysfs_notify(&mddev->kobj, NULL, "sync_completed"); |
| 6335 | } | 6374 | } |
| 6336 | 6375 | ||
| 6337 | if (j >= mddev->resync_max) | 6376 | while (j >= mddev->resync_max && !kthread_should_stop()) { |
| 6338 | wait_event(mddev->recovery_wait, | 6377 | /* As this condition is controlled by user-space, |
| 6339 | mddev->resync_max > j | 6378 | * we can block indefinitely, so use '_interruptible' |
| 6340 | || kthread_should_stop()); | 6379 | * to avoid triggering warnings. |
| 6380 | */ | ||
| 6381 | flush_signals(current); /* just in case */ | ||
| 6382 | wait_event_interruptible(mddev->recovery_wait, | ||
| 6383 | mddev->resync_max > j | ||
| 6384 | || kthread_should_stop()); | ||
| 6385 | } | ||
| 6341 | 6386 | ||
| 6342 | if (kthread_should_stop()) | 6387 | if (kthread_should_stop()) |
| 6343 | goto interrupted; | 6388 | goto interrupted; |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 9430a110db93..f8fc188bc762 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
| @@ -223,6 +223,16 @@ struct mddev_s | |||
| 223 | * so we don't loop trying */ | 223 | * so we don't loop trying */ |
| 224 | 224 | ||
| 225 | int in_sync; /* know to not need resync */ | 225 | int in_sync; /* know to not need resync */ |
| 226 | /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so | ||
| 227 | * that we are never stopping an array while it is open. | ||
| 228 | * 'reconfig_mutex' protects all other reconfiguration. | ||
| 229 | * These locks are separate due to conflicting interactions | ||
| 230 | * with bdev->bd_mutex. | ||
| 231 | * Lock ordering is: | ||
| 232 | * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk | ||
| 233 | * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open | ||
| 234 | */ | ||
| 235 | struct mutex open_mutex; | ||
| 226 | struct mutex reconfig_mutex; | 236 | struct mutex reconfig_mutex; |
| 227 | atomic_t active; /* general refcount */ | 237 | atomic_t active; /* general refcount */ |
| 228 | atomic_t openers; /* number of active opens */ | 238 | atomic_t openers; /* number of active opens */ |
| @@ -431,5 +441,7 @@ extern int md_allow_write(mddev_t *mddev); | |||
| 431 | extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | 441 | extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); |
| 432 | extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); | 442 | extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); |
| 433 | extern int md_check_no_bitmap(mddev_t *mddev); | 443 | extern int md_check_no_bitmap(mddev_t *mddev); |
| 444 | extern int md_integrity_register(mddev_t *mddev); | ||
| 445 | void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev); | ||
| 434 | 446 | ||
| 435 | #endif /* _MD_MD_H */ | 447 | #endif /* _MD_MD_H */ |
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c index cbe368fa6598..7140909f6662 100644 --- a/drivers/md/multipath.c +++ b/drivers/md/multipath.c | |||
| @@ -294,7 +294,8 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 294 | for (path = first; path <= last; path++) | 294 | for (path = first; path <= last; path++) |
| 295 | if ((p=conf->multipaths+path)->rdev == NULL) { | 295 | if ((p=conf->multipaths+path)->rdev == NULL) { |
| 296 | q = rdev->bdev->bd_disk->queue; | 296 | q = rdev->bdev->bd_disk->queue; |
| 297 | blk_queue_stack_limits(mddev->queue, q); | 297 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
| 298 | rdev->data_offset << 9); | ||
| 298 | 299 | ||
| 299 | /* as we don't honour merge_bvec_fn, we must never risk | 300 | /* as we don't honour merge_bvec_fn, we must never risk |
| 300 | * violating it, so limit ->max_sector to one PAGE, as | 301 | * violating it, so limit ->max_sector to one PAGE, as |
| @@ -312,6 +313,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 312 | set_bit(In_sync, &rdev->flags); | 313 | set_bit(In_sync, &rdev->flags); |
| 313 | rcu_assign_pointer(p->rdev, rdev); | 314 | rcu_assign_pointer(p->rdev, rdev); |
| 314 | err = 0; | 315 | err = 0; |
| 316 | md_integrity_add_rdev(rdev, mddev); | ||
| 315 | break; | 317 | break; |
| 316 | } | 318 | } |
| 317 | 319 | ||
| @@ -344,7 +346,9 @@ static int multipath_remove_disk(mddev_t *mddev, int number) | |||
| 344 | /* lost the race, try later */ | 346 | /* lost the race, try later */ |
| 345 | err = -EBUSY; | 347 | err = -EBUSY; |
| 346 | p->rdev = rdev; | 348 | p->rdev = rdev; |
| 349 | goto abort; | ||
| 347 | } | 350 | } |
| 351 | md_integrity_register(mddev); | ||
| 348 | } | 352 | } |
| 349 | abort: | 353 | abort: |
| 350 | 354 | ||
| @@ -463,9 +467,9 @@ static int multipath_run (mddev_t *mddev) | |||
| 463 | 467 | ||
| 464 | disk = conf->multipaths + disk_idx; | 468 | disk = conf->multipaths + disk_idx; |
| 465 | disk->rdev = rdev; | 469 | disk->rdev = rdev; |
| 470 | disk_stack_limits(mddev->gendisk, rdev->bdev, | ||
| 471 | rdev->data_offset << 9); | ||
| 466 | 472 | ||
| 467 | blk_queue_stack_limits(mddev->queue, | ||
| 468 | rdev->bdev->bd_disk->queue); | ||
| 469 | /* as we don't honour merge_bvec_fn, we must never risk | 473 | /* as we don't honour merge_bvec_fn, we must never risk |
| 470 | * violating it, not that we ever expect a device with | 474 | * violating it, not that we ever expect a device with |
| 471 | * a merge_bvec_fn to be involved in multipath */ | 475 | * a merge_bvec_fn to be involved in multipath */ |
| @@ -518,7 +522,7 @@ static int multipath_run (mddev_t *mddev) | |||
| 518 | mddev->queue->unplug_fn = multipath_unplug; | 522 | mddev->queue->unplug_fn = multipath_unplug; |
| 519 | mddev->queue->backing_dev_info.congested_fn = multipath_congested; | 523 | mddev->queue->backing_dev_info.congested_fn = multipath_congested; |
| 520 | mddev->queue->backing_dev_info.congested_data = mddev; | 524 | mddev->queue->backing_dev_info.congested_data = mddev; |
| 521 | 525 | md_integrity_register(mddev); | |
| 522 | return 0; | 526 | return 0; |
| 523 | 527 | ||
| 524 | out_free_conf: | 528 | out_free_conf: |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index ab4a489d8695..898e2bdfee47 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
| @@ -170,8 +170,8 @@ static int create_strip_zones(mddev_t *mddev) | |||
| 170 | } | 170 | } |
| 171 | dev[j] = rdev1; | 171 | dev[j] = rdev1; |
| 172 | 172 | ||
| 173 | blk_queue_stack_limits(mddev->queue, | 173 | disk_stack_limits(mddev->gendisk, rdev1->bdev, |
| 174 | rdev1->bdev->bd_disk->queue); | 174 | rdev1->data_offset << 9); |
| 175 | /* as we don't honour merge_bvec_fn, we must never risk | 175 | /* as we don't honour merge_bvec_fn, we must never risk |
| 176 | * violating it, so limit ->max_sector to one PAGE, as | 176 | * violating it, so limit ->max_sector to one PAGE, as |
| 177 | * a one page request is never in violation. | 177 | * a one page request is never in violation. |
| @@ -250,6 +250,11 @@ static int create_strip_zones(mddev_t *mddev) | |||
| 250 | mddev->chunk_sectors << 9); | 250 | mddev->chunk_sectors << 9); |
| 251 | goto abort; | 251 | goto abort; |
| 252 | } | 252 | } |
| 253 | |||
| 254 | blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); | ||
| 255 | blk_queue_io_opt(mddev->queue, | ||
| 256 | (mddev->chunk_sectors << 9) * mddev->raid_disks); | ||
| 257 | |||
| 253 | printk(KERN_INFO "raid0: done.\n"); | 258 | printk(KERN_INFO "raid0: done.\n"); |
| 254 | mddev->private = conf; | 259 | mddev->private = conf; |
| 255 | return 0; | 260 | return 0; |
| @@ -346,6 +351,7 @@ static int raid0_run(mddev_t *mddev) | |||
| 346 | 351 | ||
| 347 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); | 352 | blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec); |
| 348 | dump_zones(mddev); | 353 | dump_zones(mddev); |
| 354 | md_integrity_register(mddev); | ||
| 349 | return 0; | 355 | return 0; |
| 350 | } | 356 | } |
| 351 | 357 | ||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 89939a7aef57..8726fd7ebce5 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -1123,8 +1123,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1123 | for (mirror = first; mirror <= last; mirror++) | 1123 | for (mirror = first; mirror <= last; mirror++) |
| 1124 | if ( !(p=conf->mirrors+mirror)->rdev) { | 1124 | if ( !(p=conf->mirrors+mirror)->rdev) { |
| 1125 | 1125 | ||
| 1126 | blk_queue_stack_limits(mddev->queue, | 1126 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
| 1127 | rdev->bdev->bd_disk->queue); | 1127 | rdev->data_offset << 9); |
| 1128 | /* as we don't honour merge_bvec_fn, we must never risk | 1128 | /* as we don't honour merge_bvec_fn, we must never risk |
| 1129 | * violating it, so limit ->max_sector to one PAGE, as | 1129 | * violating it, so limit ->max_sector to one PAGE, as |
| 1130 | * a one page request is never in violation. | 1130 | * a one page request is never in violation. |
| @@ -1144,7 +1144,7 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1144 | rcu_assign_pointer(p->rdev, rdev); | 1144 | rcu_assign_pointer(p->rdev, rdev); |
| 1145 | break; | 1145 | break; |
| 1146 | } | 1146 | } |
| 1147 | 1147 | md_integrity_add_rdev(rdev, mddev); | |
| 1148 | print_conf(conf); | 1148 | print_conf(conf); |
| 1149 | return err; | 1149 | return err; |
| 1150 | } | 1150 | } |
| @@ -1178,7 +1178,9 @@ static int raid1_remove_disk(mddev_t *mddev, int number) | |||
| 1178 | /* lost the race, try later */ | 1178 | /* lost the race, try later */ |
| 1179 | err = -EBUSY; | 1179 | err = -EBUSY; |
| 1180 | p->rdev = rdev; | 1180 | p->rdev = rdev; |
| 1181 | goto abort; | ||
| 1181 | } | 1182 | } |
| 1183 | md_integrity_register(mddev); | ||
| 1182 | } | 1184 | } |
| 1183 | abort: | 1185 | abort: |
| 1184 | 1186 | ||
| @@ -1988,9 +1990,8 @@ static int run(mddev_t *mddev) | |||
| 1988 | disk = conf->mirrors + disk_idx; | 1990 | disk = conf->mirrors + disk_idx; |
| 1989 | 1991 | ||
| 1990 | disk->rdev = rdev; | 1992 | disk->rdev = rdev; |
| 1991 | 1993 | disk_stack_limits(mddev->gendisk, rdev->bdev, | |
| 1992 | blk_queue_stack_limits(mddev->queue, | 1994 | rdev->data_offset << 9); |
| 1993 | rdev->bdev->bd_disk->queue); | ||
| 1994 | /* as we don't honour merge_bvec_fn, we must never risk | 1995 | /* as we don't honour merge_bvec_fn, we must never risk |
| 1995 | * violating it, so limit ->max_sector to one PAGE, as | 1996 | * violating it, so limit ->max_sector to one PAGE, as |
| 1996 | * a one page request is never in violation. | 1997 | * a one page request is never in violation. |
| @@ -2068,7 +2069,7 @@ static int run(mddev_t *mddev) | |||
| 2068 | mddev->queue->unplug_fn = raid1_unplug; | 2069 | mddev->queue->unplug_fn = raid1_unplug; |
| 2069 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; | 2070 | mddev->queue->backing_dev_info.congested_fn = raid1_congested; |
| 2070 | mddev->queue->backing_dev_info.congested_data = mddev; | 2071 | mddev->queue->backing_dev_info.congested_data = mddev; |
| 2071 | 2072 | md_integrity_register(mddev); | |
| 2072 | return 0; | 2073 | return 0; |
| 2073 | 2074 | ||
| 2074 | out_no_mem: | 2075 | out_no_mem: |
| @@ -2133,6 +2134,7 @@ static int raid1_resize(mddev_t *mddev, sector_t sectors) | |||
| 2133 | return -EINVAL; | 2134 | return -EINVAL; |
| 2134 | set_capacity(mddev->gendisk, mddev->array_sectors); | 2135 | set_capacity(mddev->gendisk, mddev->array_sectors); |
| 2135 | mddev->changed = 1; | 2136 | mddev->changed = 1; |
| 2137 | revalidate_disk(mddev->gendisk); | ||
| 2136 | if (sectors > mddev->dev_sectors && | 2138 | if (sectors > mddev->dev_sectors && |
| 2137 | mddev->recovery_cp == MaxSector) { | 2139 | mddev->recovery_cp == MaxSector) { |
| 2138 | mddev->recovery_cp = mddev->dev_sectors; | 2140 | mddev->recovery_cp = mddev->dev_sectors; |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ae12ceafe10c..3d9020cf6f6e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
| @@ -1151,8 +1151,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1151 | for ( ; mirror <= last ; mirror++) | 1151 | for ( ; mirror <= last ; mirror++) |
| 1152 | if ( !(p=conf->mirrors+mirror)->rdev) { | 1152 | if ( !(p=conf->mirrors+mirror)->rdev) { |
| 1153 | 1153 | ||
| 1154 | blk_queue_stack_limits(mddev->queue, | 1154 | disk_stack_limits(mddev->gendisk, rdev->bdev, |
| 1155 | rdev->bdev->bd_disk->queue); | 1155 | rdev->data_offset << 9); |
| 1156 | /* as we don't honour merge_bvec_fn, we must never risk | 1156 | /* as we don't honour merge_bvec_fn, we must never risk |
| 1157 | * violating it, so limit ->max_sector to one PAGE, as | 1157 | * violating it, so limit ->max_sector to one PAGE, as |
| 1158 | * a one page request is never in violation. | 1158 | * a one page request is never in violation. |
| @@ -1170,6 +1170,7 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
| 1170 | break; | 1170 | break; |
| 1171 | } | 1171 | } |
| 1172 | 1172 | ||
| 1173 | md_integrity_add_rdev(rdev, mddev); | ||
| 1173 | print_conf(conf); | 1174 | print_conf(conf); |
| 1174 | return err; | 1175 | return err; |
| 1175 | } | 1176 | } |
| @@ -1203,7 +1204,9 @@ static int raid10_remove_disk(mddev_t *mddev, int number) | |||
| 1203 | /* lost the race, try later */ | 1204 | /* lost the race, try later */ |
| 1204 | err = -EBUSY; | 1205 | err = -EBUSY; |
| 1205 | p->rdev = rdev; | 1206 | p->rdev = rdev; |
| 1207 | goto abort; | ||
| 1206 | } | 1208 | } |
| 1209 | md_integrity_register(mddev); | ||
| 1207 | } | 1210 | } |
| 1208 | abort: | 1211 | abort: |
| 1209 | 1212 | ||
| @@ -2044,7 +2047,7 @@ raid10_size(mddev_t *mddev, sector_t sectors, int raid_disks) | |||
| 2044 | static int run(mddev_t *mddev) | 2047 | static int run(mddev_t *mddev) |
| 2045 | { | 2048 | { |
| 2046 | conf_t *conf; | 2049 | conf_t *conf; |
| 2047 | int i, disk_idx; | 2050 | int i, disk_idx, chunk_size; |
| 2048 | mirror_info_t *disk; | 2051 | mirror_info_t *disk; |
| 2049 | mdk_rdev_t *rdev; | 2052 | mdk_rdev_t *rdev; |
| 2050 | int nc, fc, fo; | 2053 | int nc, fc, fo; |
| @@ -2130,6 +2133,14 @@ static int run(mddev_t *mddev) | |||
| 2130 | spin_lock_init(&conf->device_lock); | 2133 | spin_lock_init(&conf->device_lock); |
| 2131 | mddev->queue->queue_lock = &conf->device_lock; | 2134 | mddev->queue->queue_lock = &conf->device_lock; |
| 2132 | 2135 | ||
| 2136 | chunk_size = mddev->chunk_sectors << 9; | ||
| 2137 | blk_queue_io_min(mddev->queue, chunk_size); | ||
| 2138 | if (conf->raid_disks % conf->near_copies) | ||
| 2139 | blk_queue_io_opt(mddev->queue, chunk_size * conf->raid_disks); | ||
| 2140 | else | ||
| 2141 | blk_queue_io_opt(mddev->queue, chunk_size * | ||
| 2142 | (conf->raid_disks / conf->near_copies)); | ||
| 2143 | |||
| 2133 | list_for_each_entry(rdev, &mddev->disks, same_set) { | 2144 | list_for_each_entry(rdev, &mddev->disks, same_set) { |
| 2134 | disk_idx = rdev->raid_disk; | 2145 | disk_idx = rdev->raid_disk; |
| 2135 | if (disk_idx >= mddev->raid_disks | 2146 | if (disk_idx >= mddev->raid_disks |
| @@ -2138,9 +2149,8 @@ static int run(mddev_t *mddev) | |||
| 2138 | disk = conf->mirrors + disk_idx; | 2149 | disk = conf->mirrors + disk_idx; |
| 2139 | 2150 | ||
| 2140 | disk->rdev = rdev; | 2151 | disk->rdev = rdev; |
| 2141 | 2152 | disk_stack_limits(mddev->gendisk, rdev->bdev, | |
| 2142 | blk_queue_stack_limits(mddev->queue, | 2153 | rdev->data_offset << 9); |
| 2143 | rdev->bdev->bd_disk->queue); | ||
| 2144 | /* as we don't honour merge_bvec_fn, we must never risk | 2154 | /* as we don't honour merge_bvec_fn, we must never risk |
| 2145 | * violating it, so limit ->max_sector to one PAGE, as | 2155 | * violating it, so limit ->max_sector to one PAGE, as |
| 2146 | * a one page request is never in violation. | 2156 | * a one page request is never in violation. |
| @@ -2218,6 +2228,7 @@ static int run(mddev_t *mddev) | |||
| 2218 | 2228 | ||
| 2219 | if (conf->near_copies < mddev->raid_disks) | 2229 | if (conf->near_copies < mddev->raid_disks) |
| 2220 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); | 2230 | blk_queue_merge_bvec(mddev->queue, raid10_mergeable_bvec); |
| 2231 | md_integrity_register(mddev); | ||
| 2221 | return 0; | 2232 | return 0; |
| 2222 | 2233 | ||
| 2223 | out_free_conf: | 2234 | out_free_conf: |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index cac6f4d3a143..9b00a229015a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
| @@ -3911,13 +3911,21 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
| 3911 | goto retry; | 3911 | goto retry; |
| 3912 | } | 3912 | } |
| 3913 | } | 3913 | } |
| 3914 | /* FIXME what if we get a false positive because these | 3914 | |
| 3915 | * are being updated. | 3915 | if (bio_data_dir(bi) == WRITE && |
| 3916 | */ | 3916 | logical_sector >= mddev->suspend_lo && |
| 3917 | if (logical_sector >= mddev->suspend_lo && | ||
| 3918 | logical_sector < mddev->suspend_hi) { | 3917 | logical_sector < mddev->suspend_hi) { |
| 3919 | release_stripe(sh); | 3918 | release_stripe(sh); |
| 3920 | schedule(); | 3919 | /* As the suspend_* range is controlled by |
| 3920 | * userspace, we want an interruptible | ||
| 3921 | * wait. | ||
| 3922 | */ | ||
| 3923 | flush_signals(current); | ||
| 3924 | prepare_to_wait(&conf->wait_for_overlap, | ||
| 3925 | &w, TASK_INTERRUPTIBLE); | ||
| 3926 | if (logical_sector >= mddev->suspend_lo && | ||
| 3927 | logical_sector < mddev->suspend_hi) | ||
| 3928 | schedule(); | ||
| 3921 | goto retry; | 3929 | goto retry; |
| 3922 | } | 3930 | } |
| 3923 | 3931 | ||
| @@ -3989,7 +3997,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
| 3989 | conf->reshape_progress < raid5_size(mddev, 0, 0)) { | 3997 | conf->reshape_progress < raid5_size(mddev, 0, 0)) { |
| 3990 | sector_nr = raid5_size(mddev, 0, 0) | 3998 | sector_nr = raid5_size(mddev, 0, 0) |
| 3991 | - conf->reshape_progress; | 3999 | - conf->reshape_progress; |
| 3992 | } else if (mddev->delta_disks > 0 && | 4000 | } else if (mddev->delta_disks >= 0 && |
| 3993 | conf->reshape_progress > 0) | 4001 | conf->reshape_progress > 0) |
| 3994 | sector_nr = conf->reshape_progress; | 4002 | sector_nr = conf->reshape_progress; |
| 3995 | sector_div(sector_nr, new_data_disks); | 4003 | sector_div(sector_nr, new_data_disks); |
| @@ -4203,6 +4211,9 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski | |||
| 4203 | return 0; | 4211 | return 0; |
| 4204 | } | 4212 | } |
| 4205 | 4213 | ||
| 4214 | /* Allow raid5_quiesce to complete */ | ||
| 4215 | wait_event(conf->wait_for_overlap, conf->quiesce != 2); | ||
| 4216 | |||
| 4206 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) | 4217 | if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) |
| 4207 | return reshape_request(mddev, sector_nr, skipped); | 4218 | return reshape_request(mddev, sector_nr, skipped); |
| 4208 | 4219 | ||
| @@ -4803,7 +4814,7 @@ static raid5_conf_t *setup_conf(mddev_t *mddev) | |||
| 4803 | static int run(mddev_t *mddev) | 4814 | static int run(mddev_t *mddev) |
| 4804 | { | 4815 | { |
| 4805 | raid5_conf_t *conf; | 4816 | raid5_conf_t *conf; |
| 4806 | int working_disks = 0; | 4817 | int working_disks = 0, chunk_size; |
| 4807 | mdk_rdev_t *rdev; | 4818 | mdk_rdev_t *rdev; |
| 4808 | 4819 | ||
| 4809 | if (mddev->recovery_cp != MaxSector) | 4820 | if (mddev->recovery_cp != MaxSector) |
| @@ -4844,7 +4855,26 @@ static int run(mddev_t *mddev) | |||
| 4844 | (old_disks-max_degraded)); | 4855 | (old_disks-max_degraded)); |
| 4845 | /* here_old is the first stripe that we might need to read | 4856 | /* here_old is the first stripe that we might need to read |
| 4846 | * from */ | 4857 | * from */ |
| 4847 | if (here_new >= here_old) { | 4858 | if (mddev->delta_disks == 0) { |
| 4859 | /* We cannot be sure it is safe to start an in-place | ||
| 4860 | * reshape. It is only safe if user-space if monitoring | ||
| 4861 | * and taking constant backups. | ||
| 4862 | * mdadm always starts a situation like this in | ||
| 4863 | * readonly mode so it can take control before | ||
| 4864 | * allowing any writes. So just check for that. | ||
| 4865 | */ | ||
| 4866 | if ((here_new * mddev->new_chunk_sectors != | ||
| 4867 | here_old * mddev->chunk_sectors) || | ||
| 4868 | mddev->ro == 0) { | ||
| 4869 | printk(KERN_ERR "raid5: in-place reshape must be started" | ||
| 4870 | " in read-only mode - aborting\n"); | ||
| 4871 | return -EINVAL; | ||
| 4872 | } | ||
| 4873 | } else if (mddev->delta_disks < 0 | ||
| 4874 | ? (here_new * mddev->new_chunk_sectors <= | ||
| 4875 | here_old * mddev->chunk_sectors) | ||
| 4876 | : (here_new * mddev->new_chunk_sectors >= | ||
| 4877 | here_old * mddev->chunk_sectors)) { | ||
| 4848 | /* Reading from the same stripe as writing to - bad */ | 4878 | /* Reading from the same stripe as writing to - bad */ |
| 4849 | printk(KERN_ERR "raid5: reshape_position too early for " | 4879 | printk(KERN_ERR "raid5: reshape_position too early for " |
| 4850 | "auto-recovery - aborting.\n"); | 4880 | "auto-recovery - aborting.\n"); |
| @@ -4958,6 +4988,14 @@ static int run(mddev_t *mddev) | |||
| 4958 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 4988 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); |
| 4959 | 4989 | ||
| 4960 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); | 4990 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); |
| 4991 | chunk_size = mddev->chunk_sectors << 9; | ||
| 4992 | blk_queue_io_min(mddev->queue, chunk_size); | ||
| 4993 | blk_queue_io_opt(mddev->queue, chunk_size * | ||
| 4994 | (conf->raid_disks - conf->max_degraded)); | ||
| 4995 | |||
| 4996 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
| 4997 | disk_stack_limits(mddev->gendisk, rdev->bdev, | ||
| 4998 | rdev->data_offset << 9); | ||
| 4961 | 4999 | ||
| 4962 | return 0; | 5000 | return 0; |
| 4963 | abort: | 5001 | abort: |
| @@ -5185,6 +5223,7 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) | |||
| 5185 | return -EINVAL; | 5223 | return -EINVAL; |
| 5186 | set_capacity(mddev->gendisk, mddev->array_sectors); | 5224 | set_capacity(mddev->gendisk, mddev->array_sectors); |
| 5187 | mddev->changed = 1; | 5225 | mddev->changed = 1; |
| 5226 | revalidate_disk(mddev->gendisk); | ||
| 5188 | if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { | 5227 | if (sectors > mddev->dev_sectors && mddev->recovery_cp == MaxSector) { |
| 5189 | mddev->recovery_cp = mddev->dev_sectors; | 5228 | mddev->recovery_cp = mddev->dev_sectors; |
| 5190 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); | 5229 | set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); |
| @@ -5330,7 +5369,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
| 5330 | spin_unlock_irqrestore(&conf->device_lock, flags); | 5369 | spin_unlock_irqrestore(&conf->device_lock, flags); |
| 5331 | } | 5370 | } |
| 5332 | mddev->raid_disks = conf->raid_disks; | 5371 | mddev->raid_disks = conf->raid_disks; |
| 5333 | mddev->reshape_position = 0; | 5372 | mddev->reshape_position = conf->reshape_progress; |
| 5334 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 5373 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
| 5335 | 5374 | ||
| 5336 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 5375 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
| @@ -5385,7 +5424,6 @@ static void end_reshape(raid5_conf_t *conf) | |||
| 5385 | */ | 5424 | */ |
| 5386 | static void raid5_finish_reshape(mddev_t *mddev) | 5425 | static void raid5_finish_reshape(mddev_t *mddev) |
| 5387 | { | 5426 | { |
| 5388 | struct block_device *bdev; | ||
| 5389 | raid5_conf_t *conf = mddev->private; | 5427 | raid5_conf_t *conf = mddev->private; |
| 5390 | 5428 | ||
| 5391 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { | 5429 | if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { |
| @@ -5394,15 +5432,7 @@ static void raid5_finish_reshape(mddev_t *mddev) | |||
| 5394 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); | 5432 | md_set_array_sectors(mddev, raid5_size(mddev, 0, 0)); |
| 5395 | set_capacity(mddev->gendisk, mddev->array_sectors); | 5433 | set_capacity(mddev->gendisk, mddev->array_sectors); |
| 5396 | mddev->changed = 1; | 5434 | mddev->changed = 1; |
| 5397 | 5435 | revalidate_disk(mddev->gendisk); | |
| 5398 | bdev = bdget_disk(mddev->gendisk, 0); | ||
| 5399 | if (bdev) { | ||
| 5400 | mutex_lock(&bdev->bd_inode->i_mutex); | ||
| 5401 | i_size_write(bdev->bd_inode, | ||
| 5402 | (loff_t)mddev->array_sectors << 9); | ||
| 5403 | mutex_unlock(&bdev->bd_inode->i_mutex); | ||
| 5404 | bdput(bdev); | ||
| 5405 | } | ||
| 5406 | } else { | 5436 | } else { |
| 5407 | int d; | 5437 | int d; |
| 5408 | mddev->degraded = conf->raid_disks; | 5438 | mddev->degraded = conf->raid_disks; |
| @@ -5413,8 +5443,15 @@ static void raid5_finish_reshape(mddev_t *mddev) | |||
| 5413 | mddev->degraded--; | 5443 | mddev->degraded--; |
| 5414 | for (d = conf->raid_disks ; | 5444 | for (d = conf->raid_disks ; |
| 5415 | d < conf->raid_disks - mddev->delta_disks; | 5445 | d < conf->raid_disks - mddev->delta_disks; |
| 5416 | d++) | 5446 | d++) { |
| 5417 | raid5_remove_disk(mddev, d); | 5447 | mdk_rdev_t *rdev = conf->disks[d].rdev; |
| 5448 | if (rdev && raid5_remove_disk(mddev, d) == 0) { | ||
| 5449 | char nm[20]; | ||
| 5450 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
| 5451 | sysfs_remove_link(&mddev->kobj, nm); | ||
| 5452 | rdev->raid_disk = -1; | ||
| 5453 | } | ||
| 5454 | } | ||
| 5418 | } | 5455 | } |
| 5419 | mddev->layout = conf->algorithm; | 5456 | mddev->layout = conf->algorithm; |
| 5420 | mddev->chunk_sectors = conf->chunk_sectors; | 5457 | mddev->chunk_sectors = conf->chunk_sectors; |
| @@ -5434,12 +5471,18 @@ static void raid5_quiesce(mddev_t *mddev, int state) | |||
| 5434 | 5471 | ||
| 5435 | case 1: /* stop all writes */ | 5472 | case 1: /* stop all writes */ |
| 5436 | spin_lock_irq(&conf->device_lock); | 5473 | spin_lock_irq(&conf->device_lock); |
| 5437 | conf->quiesce = 1; | 5474 | /* '2' tells resync/reshape to pause so that all |
| 5475 | * active stripes can drain | ||
| 5476 | */ | ||
| 5477 | conf->quiesce = 2; | ||
| 5438 | wait_event_lock_irq(conf->wait_for_stripe, | 5478 | wait_event_lock_irq(conf->wait_for_stripe, |
| 5439 | atomic_read(&conf->active_stripes) == 0 && | 5479 | atomic_read(&conf->active_stripes) == 0 && |
| 5440 | atomic_read(&conf->active_aligned_reads) == 0, | 5480 | atomic_read(&conf->active_aligned_reads) == 0, |
| 5441 | conf->device_lock, /* nothing */); | 5481 | conf->device_lock, /* nothing */); |
| 5482 | conf->quiesce = 1; | ||
| 5442 | spin_unlock_irq(&conf->device_lock); | 5483 | spin_unlock_irq(&conf->device_lock); |
| 5484 | /* allow reshape to continue */ | ||
| 5485 | wake_up(&conf->wait_for_overlap); | ||
| 5443 | break; | 5486 | break; |
| 5444 | 5487 | ||
| 5445 | case 0: /* re-enable writes */ | 5488 | case 0: /* re-enable writes */ |
