aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-raid1.c3
-rw-r--r--drivers/md/dm-region-hash.c5
-rw-r--r--drivers/md/dm-thin.c13
-rw-r--r--drivers/md/md.c45
-rw-r--r--drivers/md/multipath.c3
-rw-r--r--drivers/md/persistent-data/dm-space-map-checker.c54
-rw-r--r--drivers/md/persistent-data/dm-space-map-disk.c11
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.c11
-rw-r--r--drivers/md/raid1.c30
-rw-r--r--drivers/md/raid10.c30
-rw-r--r--drivers/md/raid5.c67
11 files changed, 185 insertions, 87 deletions
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index d039de8322f0..b58b7a33914a 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1084,6 +1084,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
1084 ti->split_io = dm_rh_get_region_size(ms->rh); 1084 ti->split_io = dm_rh_get_region_size(ms->rh);
1085 ti->num_flush_requests = 1; 1085 ti->num_flush_requests = 1;
1086 ti->num_discard_requests = 1; 1086 ti->num_discard_requests = 1;
1087 ti->discard_zeroes_data_unsupported = 1;
1087 1088
1088 ms->kmirrord_wq = alloc_workqueue("kmirrord", 1089 ms->kmirrord_wq = alloc_workqueue("kmirrord",
1089 WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0); 1090 WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
@@ -1214,7 +1215,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
1214 * We need to dec pending if this was a write. 1215 * We need to dec pending if this was a write.
1215 */ 1216 */
1216 if (rw == WRITE) { 1217 if (rw == WRITE) {
1217 if (!(bio->bi_rw & REQ_FLUSH)) 1218 if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)))
1218 dm_rh_dec(ms->rh, map_context->ll); 1219 dm_rh_dec(ms->rh, map_context->ll);
1219 return error; 1220 return error;
1220 } 1221 }
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index 7771ed212182..69732e03eb34 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -404,6 +404,9 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
404 return; 404 return;
405 } 405 }
406 406
407 if (bio->bi_rw & REQ_DISCARD)
408 return;
409
407 /* We must inform the log that the sync count has changed. */ 410 /* We must inform the log that the sync count has changed. */
408 log->type->set_region_sync(log, region, 0); 411 log->type->set_region_sync(log, region, 0);
409 412
@@ -524,7 +527,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
524 struct bio *bio; 527 struct bio *bio;
525 528
526 for (bio = bios->head; bio; bio = bio->bi_next) { 529 for (bio = bios->head; bio; bio = bio->bi_next) {
527 if (bio->bi_rw & REQ_FLUSH) 530 if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))
528 continue; 531 continue;
529 rh_inc(rh, dm_rh_bio_to_region(rh, bio)); 532 rh_inc(rh, dm_rh_bio_to_region(rh, bio));
530 } 533 }
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 37fdaf81bd1f..68694da0d21d 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1245,7 +1245,10 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
1245 1245
1246 cell_release_singleton(cell, bio); 1246 cell_release_singleton(cell, bio);
1247 cell_release_singleton(cell2, bio); 1247 cell_release_singleton(cell2, bio);
1248 remap_and_issue(tc, bio, lookup_result.block); 1248 if ((!lookup_result.shared) && pool->pf.discard_passdown)
1249 remap_and_issue(tc, bio, lookup_result.block);
1250 else
1251 bio_endio(bio, 0);
1249 } 1252 }
1250 break; 1253 break;
1251 1254
@@ -2292,6 +2295,13 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct
2292 if (r) 2295 if (r)
2293 return r; 2296 return r;
2294 2297
2298 r = dm_pool_commit_metadata(pool->pmd);
2299 if (r) {
2300 DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
2301 __func__, r);
2302 return r;
2303 }
2304
2295 r = dm_pool_reserve_metadata_snap(pool->pmd); 2305 r = dm_pool_reserve_metadata_snap(pool->pmd);
2296 if (r) 2306 if (r)
2297 DMWARN("reserve_metadata_snap message failed."); 2307 DMWARN("reserve_metadata_snap message failed.");
@@ -2621,6 +2631,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
2621 if (tc->pool->pf.discard_enabled) { 2631 if (tc->pool->pf.discard_enabled) {
2622 ti->discards_supported = 1; 2632 ti->discards_supported = 1;
2623 ti->num_discard_requests = 1; 2633 ti->num_discard_requests = 1;
2634 ti->discard_zeroes_data_unsupported = 1;
2624 } 2635 }
2625 2636
2626 dm_put(pool_md); 2637 dm_put(pool_md);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1c2f9048e1ae..d5ab4493c8be 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2931,6 +2931,7 @@ offset_store(struct md_rdev *rdev, const char *buf, size_t len)
2931 * can be sane */ 2931 * can be sane */
2932 return -EBUSY; 2932 return -EBUSY;
2933 rdev->data_offset = offset; 2933 rdev->data_offset = offset;
2934 rdev->new_data_offset = offset;
2934 return len; 2935 return len;
2935} 2936}
2936 2937
@@ -3926,8 +3927,8 @@ array_state_show(struct mddev *mddev, char *page)
3926 return sprintf(page, "%s\n", array_states[st]); 3927 return sprintf(page, "%s\n", array_states[st]);
3927} 3928}
3928 3929
3929static int do_md_stop(struct mddev * mddev, int ro, int is_open); 3930static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev);
3930static int md_set_readonly(struct mddev * mddev, int is_open); 3931static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);
3931static int do_md_run(struct mddev * mddev); 3932static int do_md_run(struct mddev * mddev);
3932static int restart_array(struct mddev *mddev); 3933static int restart_array(struct mddev *mddev);
3933 3934
@@ -3943,14 +3944,14 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
3943 /* stopping an active array */ 3944 /* stopping an active array */
3944 if (atomic_read(&mddev->openers) > 0) 3945 if (atomic_read(&mddev->openers) > 0)
3945 return -EBUSY; 3946 return -EBUSY;
3946 err = do_md_stop(mddev, 0, 0); 3947 err = do_md_stop(mddev, 0, NULL);
3947 break; 3948 break;
3948 case inactive: 3949 case inactive:
3949 /* stopping an active array */ 3950 /* stopping an active array */
3950 if (mddev->pers) { 3951 if (mddev->pers) {
3951 if (atomic_read(&mddev->openers) > 0) 3952 if (atomic_read(&mddev->openers) > 0)
3952 return -EBUSY; 3953 return -EBUSY;
3953 err = do_md_stop(mddev, 2, 0); 3954 err = do_md_stop(mddev, 2, NULL);
3954 } else 3955 } else
3955 err = 0; /* already inactive */ 3956 err = 0; /* already inactive */
3956 break; 3957 break;
@@ -3958,7 +3959,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
3958 break; /* not supported yet */ 3959 break; /* not supported yet */
3959 case readonly: 3960 case readonly:
3960 if (mddev->pers) 3961 if (mddev->pers)
3961 err = md_set_readonly(mddev, 0); 3962 err = md_set_readonly(mddev, NULL);
3962 else { 3963 else {
3963 mddev->ro = 1; 3964 mddev->ro = 1;
3964 set_disk_ro(mddev->gendisk, 1); 3965 set_disk_ro(mddev->gendisk, 1);
@@ -3968,7 +3969,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
3968 case read_auto: 3969 case read_auto:
3969 if (mddev->pers) { 3970 if (mddev->pers) {
3970 if (mddev->ro == 0) 3971 if (mddev->ro == 0)
3971 err = md_set_readonly(mddev, 0); 3972 err = md_set_readonly(mddev, NULL);
3972 else if (mddev->ro == 1) 3973 else if (mddev->ro == 1)
3973 err = restart_array(mddev); 3974 err = restart_array(mddev);
3974 if (err == 0) { 3975 if (err == 0) {
@@ -5351,15 +5352,17 @@ void md_stop(struct mddev *mddev)
5351} 5352}
5352EXPORT_SYMBOL_GPL(md_stop); 5353EXPORT_SYMBOL_GPL(md_stop);
5353 5354
5354static int md_set_readonly(struct mddev *mddev, int is_open) 5355static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
5355{ 5356{
5356 int err = 0; 5357 int err = 0;
5357 mutex_lock(&mddev->open_mutex); 5358 mutex_lock(&mddev->open_mutex);
5358 if (atomic_read(&mddev->openers) > is_open) { 5359 if (atomic_read(&mddev->openers) > !!bdev) {
5359 printk("md: %s still in use.\n",mdname(mddev)); 5360 printk("md: %s still in use.\n",mdname(mddev));
5360 err = -EBUSY; 5361 err = -EBUSY;
5361 goto out; 5362 goto out;
5362 } 5363 }
5364 if (bdev)
5365 sync_blockdev(bdev);
5363 if (mddev->pers) { 5366 if (mddev->pers) {
5364 __md_stop_writes(mddev); 5367 __md_stop_writes(mddev);
5365 5368
@@ -5381,18 +5384,26 @@ out:
5381 * 0 - completely stop and dis-assemble array 5384 * 0 - completely stop and dis-assemble array
5382 * 2 - stop but do not disassemble array 5385 * 2 - stop but do not disassemble array
5383 */ 5386 */
5384static int do_md_stop(struct mddev * mddev, int mode, int is_open) 5387static int do_md_stop(struct mddev * mddev, int mode,
5388 struct block_device *bdev)
5385{ 5389{
5386 struct gendisk *disk = mddev->gendisk; 5390 struct gendisk *disk = mddev->gendisk;
5387 struct md_rdev *rdev; 5391 struct md_rdev *rdev;
5388 5392
5389 mutex_lock(&mddev->open_mutex); 5393 mutex_lock(&mddev->open_mutex);
5390 if (atomic_read(&mddev->openers) > is_open || 5394 if (atomic_read(&mddev->openers) > !!bdev ||
5391 mddev->sysfs_active) { 5395 mddev->sysfs_active) {
5392 printk("md: %s still in use.\n",mdname(mddev)); 5396 printk("md: %s still in use.\n",mdname(mddev));
5393 mutex_unlock(&mddev->open_mutex); 5397 mutex_unlock(&mddev->open_mutex);
5394 return -EBUSY; 5398 return -EBUSY;
5395 } 5399 }
5400 if (bdev)
5401 /* It is possible IO was issued on some other
5402 * open file which was closed before we took ->open_mutex.
5403 * As that was not the last close __blkdev_put will not
5404 * have called sync_blockdev, so we must.
5405 */
5406 sync_blockdev(bdev);
5396 5407
5397 if (mddev->pers) { 5408 if (mddev->pers) {
5398 if (mddev->ro) 5409 if (mddev->ro)
@@ -5466,7 +5477,7 @@ static void autorun_array(struct mddev *mddev)
5466 err = do_md_run(mddev); 5477 err = do_md_run(mddev);
5467 if (err) { 5478 if (err) {
5468 printk(KERN_WARNING "md: do_md_run() returned %d\n", err); 5479 printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
5469 do_md_stop(mddev, 0, 0); 5480 do_md_stop(mddev, 0, NULL);
5470 } 5481 }
5471} 5482}
5472 5483
@@ -5784,8 +5795,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
5784 super_types[mddev->major_version]. 5795 super_types[mddev->major_version].
5785 validate_super(mddev, rdev); 5796 validate_super(mddev, rdev);
5786 if ((info->state & (1<<MD_DISK_SYNC)) && 5797 if ((info->state & (1<<MD_DISK_SYNC)) &&
5787 (!test_bit(In_sync, &rdev->flags) || 5798 rdev->raid_disk != info->raid_disk) {
5788 rdev->raid_disk != info->raid_disk)) {
5789 /* This was a hot-add request, but events doesn't 5799 /* This was a hot-add request, but events doesn't
5790 * match, so reject it. 5800 * match, so reject it.
5791 */ 5801 */
@@ -6482,11 +6492,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6482 goto done_unlock; 6492 goto done_unlock;
6483 6493
6484 case STOP_ARRAY: 6494 case STOP_ARRAY:
6485 err = do_md_stop(mddev, 0, 1); 6495 err = do_md_stop(mddev, 0, bdev);
6486 goto done_unlock; 6496 goto done_unlock;
6487 6497
6488 case STOP_ARRAY_RO: 6498 case STOP_ARRAY_RO:
6489 err = md_set_readonly(mddev, 1); 6499 err = md_set_readonly(mddev, bdev);
6490 goto done_unlock; 6500 goto done_unlock;
6491 6501
6492 case BLKROSET: 6502 case BLKROSET:
@@ -6751,7 +6761,7 @@ struct md_thread *md_register_thread(void (*run) (struct mddev *), struct mddev
6751 thread->tsk = kthread_run(md_thread, thread, 6761 thread->tsk = kthread_run(md_thread, thread,
6752 "%s_%s", 6762 "%s_%s",
6753 mdname(thread->mddev), 6763 mdname(thread->mddev),
6754 name ?: mddev->pers->name); 6764 name);
6755 if (IS_ERR(thread->tsk)) { 6765 if (IS_ERR(thread->tsk)) {
6756 kfree(thread); 6766 kfree(thread);
6757 return NULL; 6767 return NULL;
@@ -7298,6 +7308,7 @@ void md_do_sync(struct mddev *mddev)
7298 int skipped = 0; 7308 int skipped = 0;
7299 struct md_rdev *rdev; 7309 struct md_rdev *rdev;
7300 char *desc; 7310 char *desc;
7311 struct blk_plug plug;
7301 7312
7302 /* just incase thread restarts... */ 7313 /* just incase thread restarts... */
7303 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) 7314 if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
@@ -7447,6 +7458,7 @@ void md_do_sync(struct mddev *mddev)
7447 } 7458 }
7448 mddev->curr_resync_completed = j; 7459 mddev->curr_resync_completed = j;
7449 7460
7461 blk_start_plug(&plug);
7450 while (j < max_sectors) { 7462 while (j < max_sectors) {
7451 sector_t sectors; 7463 sector_t sectors;
7452 7464
@@ -7552,6 +7564,7 @@ void md_do_sync(struct mddev *mddev)
7552 * this also signals 'finished resyncing' to md_stop 7564 * this also signals 'finished resyncing' to md_stop
7553 */ 7565 */
7554 out: 7566 out:
7567 blk_finish_plug(&plug);
7555 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active)); 7568 wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
7556 7569
7557 /* tell personality that we are finished */ 7570 /* tell personality that we are finished */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 9339e67fcc79..61a1833ebaf3 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -474,7 +474,8 @@ static int multipath_run (struct mddev *mddev)
474 } 474 }
475 475
476 { 476 {
477 mddev->thread = md_register_thread(multipathd, mddev, NULL); 477 mddev->thread = md_register_thread(multipathd, mddev,
478 "multipath");
478 if (!mddev->thread) { 479 if (!mddev->thread) {
479 printk(KERN_ERR "multipath: couldn't allocate thread" 480 printk(KERN_ERR "multipath: couldn't allocate thread"
480 " for %s\n", mdname(mddev)); 481 " for %s\n", mdname(mddev));
diff --git a/drivers/md/persistent-data/dm-space-map-checker.c b/drivers/md/persistent-data/dm-space-map-checker.c
index 50ed53bf4aa2..fc90c11620ad 100644
--- a/drivers/md/persistent-data/dm-space-map-checker.c
+++ b/drivers/md/persistent-data/dm-space-map-checker.c
@@ -8,6 +8,7 @@
8 8
9#include <linux/device-mapper.h> 9#include <linux/device-mapper.h>
10#include <linux/export.h> 10#include <linux/export.h>
11#include <linux/vmalloc.h>
11 12
12#ifdef CONFIG_DM_DEBUG_SPACE_MAPS 13#ifdef CONFIG_DM_DEBUG_SPACE_MAPS
13 14
@@ -89,13 +90,23 @@ static int ca_create(struct count_array *ca, struct dm_space_map *sm)
89 90
90 ca->nr = nr_blocks; 91 ca->nr = nr_blocks;
91 ca->nr_free = nr_blocks; 92 ca->nr_free = nr_blocks;
92 ca->counts = kzalloc(sizeof(*ca->counts) * nr_blocks, GFP_KERNEL); 93
93 if (!ca->counts) 94 if (!nr_blocks)
94 return -ENOMEM; 95 ca->counts = NULL;
96 else {
97 ca->counts = vzalloc(sizeof(*ca->counts) * nr_blocks);
98 if (!ca->counts)
99 return -ENOMEM;
100 }
95 101
96 return 0; 102 return 0;
97} 103}
98 104
105static void ca_destroy(struct count_array *ca)
106{
107 vfree(ca->counts);
108}
109
99static int ca_load(struct count_array *ca, struct dm_space_map *sm) 110static int ca_load(struct count_array *ca, struct dm_space_map *sm)
100{ 111{
101 int r; 112 int r;
@@ -126,12 +137,14 @@ static int ca_load(struct count_array *ca, struct dm_space_map *sm)
126static int ca_extend(struct count_array *ca, dm_block_t extra_blocks) 137static int ca_extend(struct count_array *ca, dm_block_t extra_blocks)
127{ 138{
128 dm_block_t nr_blocks = ca->nr + extra_blocks; 139 dm_block_t nr_blocks = ca->nr + extra_blocks;
129 uint32_t *counts = kzalloc(sizeof(*counts) * nr_blocks, GFP_KERNEL); 140 uint32_t *counts = vzalloc(sizeof(*counts) * nr_blocks);
130 if (!counts) 141 if (!counts)
131 return -ENOMEM; 142 return -ENOMEM;
132 143
133 memcpy(counts, ca->counts, sizeof(*counts) * ca->nr); 144 if (ca->counts) {
134 kfree(ca->counts); 145 memcpy(counts, ca->counts, sizeof(*counts) * ca->nr);
146 ca_destroy(ca);
147 }
135 ca->nr = nr_blocks; 148 ca->nr = nr_blocks;
136 ca->nr_free += extra_blocks; 149 ca->nr_free += extra_blocks;
137 ca->counts = counts; 150 ca->counts = counts;
@@ -151,11 +164,6 @@ static int ca_commit(struct count_array *old, struct count_array *new)
151 return 0; 164 return 0;
152} 165}
153 166
154static void ca_destroy(struct count_array *ca)
155{
156 kfree(ca->counts);
157}
158
159/*----------------------------------------------------------------*/ 167/*----------------------------------------------------------------*/
160 168
161struct sm_checker { 169struct sm_checker {
@@ -343,25 +351,25 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
343 int r; 351 int r;
344 struct sm_checker *smc; 352 struct sm_checker *smc;
345 353
346 if (!sm) 354 if (IS_ERR_OR_NULL(sm))
347 return NULL; 355 return ERR_PTR(-EINVAL);
348 356
349 smc = kmalloc(sizeof(*smc), GFP_KERNEL); 357 smc = kmalloc(sizeof(*smc), GFP_KERNEL);
350 if (!smc) 358 if (!smc)
351 return NULL; 359 return ERR_PTR(-ENOMEM);
352 360
353 memcpy(&smc->sm, &ops_, sizeof(smc->sm)); 361 memcpy(&smc->sm, &ops_, sizeof(smc->sm));
354 r = ca_create(&smc->old_counts, sm); 362 r = ca_create(&smc->old_counts, sm);
355 if (r) { 363 if (r) {
356 kfree(smc); 364 kfree(smc);
357 return NULL; 365 return ERR_PTR(r);
358 } 366 }
359 367
360 r = ca_create(&smc->counts, sm); 368 r = ca_create(&smc->counts, sm);
361 if (r) { 369 if (r) {
362 ca_destroy(&smc->old_counts); 370 ca_destroy(&smc->old_counts);
363 kfree(smc); 371 kfree(smc);
364 return NULL; 372 return ERR_PTR(r);
365 } 373 }
366 374
367 smc->real_sm = sm; 375 smc->real_sm = sm;
@@ -371,7 +379,7 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
371 ca_destroy(&smc->counts); 379 ca_destroy(&smc->counts);
372 ca_destroy(&smc->old_counts); 380 ca_destroy(&smc->old_counts);
373 kfree(smc); 381 kfree(smc);
374 return NULL; 382 return ERR_PTR(r);
375 } 383 }
376 384
377 r = ca_commit(&smc->old_counts, &smc->counts); 385 r = ca_commit(&smc->old_counts, &smc->counts);
@@ -379,7 +387,7 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
379 ca_destroy(&smc->counts); 387 ca_destroy(&smc->counts);
380 ca_destroy(&smc->old_counts); 388 ca_destroy(&smc->old_counts);
381 kfree(smc); 389 kfree(smc);
382 return NULL; 390 return ERR_PTR(r);
383 } 391 }
384 392
385 return &smc->sm; 393 return &smc->sm;
@@ -391,25 +399,25 @@ struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm)
391 int r; 399 int r;
392 struct sm_checker *smc; 400 struct sm_checker *smc;
393 401
394 if (!sm) 402 if (IS_ERR_OR_NULL(sm))
395 return NULL; 403 return ERR_PTR(-EINVAL);
396 404
397 smc = kmalloc(sizeof(*smc), GFP_KERNEL); 405 smc = kmalloc(sizeof(*smc), GFP_KERNEL);
398 if (!smc) 406 if (!smc)
399 return NULL; 407 return ERR_PTR(-ENOMEM);
400 408
401 memcpy(&smc->sm, &ops_, sizeof(smc->sm)); 409 memcpy(&smc->sm, &ops_, sizeof(smc->sm));
402 r = ca_create(&smc->old_counts, sm); 410 r = ca_create(&smc->old_counts, sm);
403 if (r) { 411 if (r) {
404 kfree(smc); 412 kfree(smc);
405 return NULL; 413 return ERR_PTR(r);
406 } 414 }
407 415
408 r = ca_create(&smc->counts, sm); 416 r = ca_create(&smc->counts, sm);
409 if (r) { 417 if (r) {
410 ca_destroy(&smc->old_counts); 418 ca_destroy(&smc->old_counts);
411 kfree(smc); 419 kfree(smc);
412 return NULL; 420 return ERR_PTR(r);
413 } 421 }
414 422
415 smc->real_sm = sm; 423 smc->real_sm = sm;
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index fc469ba9f627..3d0ed5332883 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -290,7 +290,16 @@ struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
290 dm_block_t nr_blocks) 290 dm_block_t nr_blocks)
291{ 291{
292 struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks); 292 struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks);
293 return dm_sm_checker_create_fresh(sm); 293 struct dm_space_map *smc;
294
295 if (IS_ERR_OR_NULL(sm))
296 return sm;
297
298 smc = dm_sm_checker_create_fresh(sm);
299 if (IS_ERR(smc))
300 dm_sm_destroy(sm);
301
302 return smc;
294} 303}
295EXPORT_SYMBOL_GPL(dm_sm_disk_create); 304EXPORT_SYMBOL_GPL(dm_sm_disk_create);
296 305
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 400fe144c0cd..e5604b32d91f 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -138,6 +138,9 @@ EXPORT_SYMBOL_GPL(dm_tm_create_non_blocking_clone);
138 138
139void dm_tm_destroy(struct dm_transaction_manager *tm) 139void dm_tm_destroy(struct dm_transaction_manager *tm)
140{ 140{
141 if (!tm->is_clone)
142 wipe_shadow_table(tm);
143
141 kfree(tm); 144 kfree(tm);
142} 145}
143EXPORT_SYMBOL_GPL(dm_tm_destroy); 146EXPORT_SYMBOL_GPL(dm_tm_destroy);
@@ -344,8 +347,10 @@ static int dm_tm_create_internal(struct dm_block_manager *bm,
344 } 347 }
345 348
346 *sm = dm_sm_checker_create(inner); 349 *sm = dm_sm_checker_create(inner);
347 if (!*sm) 350 if (IS_ERR(*sm)) {
351 r = PTR_ERR(*sm);
348 goto bad2; 352 goto bad2;
353 }
349 354
350 } else { 355 } else {
351 r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location, 356 r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location,
@@ -364,8 +369,10 @@ static int dm_tm_create_internal(struct dm_block_manager *bm,
364 } 369 }
365 370
366 *sm = dm_sm_checker_create(inner); 371 *sm = dm_sm_checker_create(inner);
367 if (!*sm) 372 if (IS_ERR(*sm)) {
373 r = PTR_ERR(*sm);
368 goto bad2; 374 goto bad2;
375 }
369 } 376 }
370 377
371 return 0; 378 return 0;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 835de7168cd3..cacd008d6864 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -517,8 +517,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
517 int bad_sectors; 517 int bad_sectors;
518 518
519 int disk = start_disk + i; 519 int disk = start_disk + i;
520 if (disk >= conf->raid_disks) 520 if (disk >= conf->raid_disks * 2)
521 disk -= conf->raid_disks; 521 disk -= conf->raid_disks * 2;
522 522
523 rdev = rcu_dereference(conf->mirrors[disk].rdev); 523 rdev = rcu_dereference(conf->mirrors[disk].rdev);
524 if (r1_bio->bios[disk] == IO_BLOCKED 524 if (r1_bio->bios[disk] == IO_BLOCKED
@@ -883,7 +883,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
883 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); 883 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
884 const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); 884 const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
885 struct md_rdev *blocked_rdev; 885 struct md_rdev *blocked_rdev;
886 int plugged;
887 int first_clone; 886 int first_clone;
888 int sectors_handled; 887 int sectors_handled;
889 int max_sectors; 888 int max_sectors;
@@ -1034,7 +1033,6 @@ read_again:
1034 * the bad blocks. Each set of writes gets it's own r1bio 1033 * the bad blocks. Each set of writes gets it's own r1bio
1035 * with a set of bios attached. 1034 * with a set of bios attached.
1036 */ 1035 */
1037 plugged = mddev_check_plugged(mddev);
1038 1036
1039 disks = conf->raid_disks * 2; 1037 disks = conf->raid_disks * 2;
1040 retry_write: 1038 retry_write:
@@ -1191,6 +1189,8 @@ read_again:
1191 bio_list_add(&conf->pending_bio_list, mbio); 1189 bio_list_add(&conf->pending_bio_list, mbio);
1192 conf->pending_count++; 1190 conf->pending_count++;
1193 spin_unlock_irqrestore(&conf->device_lock, flags); 1191 spin_unlock_irqrestore(&conf->device_lock, flags);
1192 if (!mddev_check_plugged(mddev))
1193 md_wakeup_thread(mddev->thread);
1194 } 1194 }
1195 /* Mustn't call r1_bio_write_done before this next test, 1195 /* Mustn't call r1_bio_write_done before this next test,
1196 * as it could result in the bio being freed. 1196 * as it could result in the bio being freed.
@@ -1213,9 +1213,6 @@ read_again:
1213 1213
1214 /* In case raid1d snuck in to freeze_array */ 1214 /* In case raid1d snuck in to freeze_array */
1215 wake_up(&conf->wait_barrier); 1215 wake_up(&conf->wait_barrier);
1216
1217 if (do_sync || !bitmap || !plugged)
1218 md_wakeup_thread(mddev->thread);
1219} 1216}
1220 1217
1221static void status(struct seq_file *seq, struct mddev *mddev) 1218static void status(struct seq_file *seq, struct mddev *mddev)
@@ -1821,8 +1818,14 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
1821 1818
1822 if (atomic_dec_and_test(&r1_bio->remaining)) { 1819 if (atomic_dec_and_test(&r1_bio->remaining)) {
1823 /* if we're here, all write(s) have completed, so clean up */ 1820 /* if we're here, all write(s) have completed, so clean up */
1824 md_done_sync(mddev, r1_bio->sectors, 1); 1821 int s = r1_bio->sectors;
1825 put_buf(r1_bio); 1822 if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
1823 test_bit(R1BIO_WriteError, &r1_bio->state))
1824 reschedule_retry(r1_bio);
1825 else {
1826 put_buf(r1_bio);
1827 md_done_sync(mddev, s, 1);
1828 }
1826 } 1829 }
1827} 1830}
1828 1831
@@ -2488,9 +2491,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
2488 */ 2491 */
2489 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { 2492 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
2490 atomic_set(&r1_bio->remaining, read_targets); 2493 atomic_set(&r1_bio->remaining, read_targets);
2491 for (i = 0; i < conf->raid_disks * 2; i++) { 2494 for (i = 0; i < conf->raid_disks * 2 && read_targets; i++) {
2492 bio = r1_bio->bios[i]; 2495 bio = r1_bio->bios[i];
2493 if (bio->bi_end_io == end_sync_read) { 2496 if (bio->bi_end_io == end_sync_read) {
2497 read_targets--;
2494 md_sync_acct(bio->bi_bdev, nr_sectors); 2498 md_sync_acct(bio->bi_bdev, nr_sectors);
2495 generic_make_request(bio); 2499 generic_make_request(bio);
2496 } 2500 }
@@ -2550,6 +2554,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2550 err = -EINVAL; 2554 err = -EINVAL;
2551 spin_lock_init(&conf->device_lock); 2555 spin_lock_init(&conf->device_lock);
2552 rdev_for_each(rdev, mddev) { 2556 rdev_for_each(rdev, mddev) {
2557 struct request_queue *q;
2553 int disk_idx = rdev->raid_disk; 2558 int disk_idx = rdev->raid_disk;
2554 if (disk_idx >= mddev->raid_disks 2559 if (disk_idx >= mddev->raid_disks
2555 || disk_idx < 0) 2560 || disk_idx < 0)
@@ -2562,6 +2567,9 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2562 if (disk->rdev) 2567 if (disk->rdev)
2563 goto abort; 2568 goto abort;
2564 disk->rdev = rdev; 2569 disk->rdev = rdev;
2570 q = bdev_get_queue(rdev->bdev);
2571 if (q->merge_bvec_fn)
2572 mddev->merge_check_needed = 1;
2565 2573
2566 disk->head_position = 0; 2574 disk->head_position = 0;
2567 } 2575 }
@@ -2617,7 +2625,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
2617 goto abort; 2625 goto abort;
2618 } 2626 }
2619 err = -ENOMEM; 2627 err = -ENOMEM;
2620 conf->thread = md_register_thread(raid1d, mddev, NULL); 2628 conf->thread = md_register_thread(raid1d, mddev, "raid1");
2621 if (!conf->thread) { 2629 if (!conf->thread) {
2622 printk(KERN_ERR 2630 printk(KERN_ERR
2623 "md/raid1:%s: couldn't allocate thread\n", 2631 "md/raid1:%s: couldn't allocate thread\n",
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 987db37cb875..8da6282254c3 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1039,7 +1039,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
1039 const unsigned long do_fua = (bio->bi_rw & REQ_FUA); 1039 const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
1040 unsigned long flags; 1040 unsigned long flags;
1041 struct md_rdev *blocked_rdev; 1041 struct md_rdev *blocked_rdev;
1042 int plugged;
1043 int sectors_handled; 1042 int sectors_handled;
1044 int max_sectors; 1043 int max_sectors;
1045 int sectors; 1044 int sectors;
@@ -1239,7 +1238,6 @@ read_again:
1239 * of r10_bios is recored in bio->bi_phys_segments just as with 1238 * of r10_bios is recored in bio->bi_phys_segments just as with
1240 * the read case. 1239 * the read case.
1241 */ 1240 */
1242 plugged = mddev_check_plugged(mddev);
1243 1241
1244 r10_bio->read_slot = -1; /* make sure repl_bio gets freed */ 1242 r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
1245 raid10_find_phys(conf, r10_bio); 1243 raid10_find_phys(conf, r10_bio);
@@ -1396,6 +1394,8 @@ retry_write:
1396 bio_list_add(&conf->pending_bio_list, mbio); 1394 bio_list_add(&conf->pending_bio_list, mbio);
1397 conf->pending_count++; 1395 conf->pending_count++;
1398 spin_unlock_irqrestore(&conf->device_lock, flags); 1396 spin_unlock_irqrestore(&conf->device_lock, flags);
1397 if (!mddev_check_plugged(mddev))
1398 md_wakeup_thread(mddev->thread);
1399 1399
1400 if (!r10_bio->devs[i].repl_bio) 1400 if (!r10_bio->devs[i].repl_bio)
1401 continue; 1401 continue;
@@ -1423,6 +1423,8 @@ retry_write:
1423 bio_list_add(&conf->pending_bio_list, mbio); 1423 bio_list_add(&conf->pending_bio_list, mbio);
1424 conf->pending_count++; 1424 conf->pending_count++;
1425 spin_unlock_irqrestore(&conf->device_lock, flags); 1425 spin_unlock_irqrestore(&conf->device_lock, flags);
1426 if (!mddev_check_plugged(mddev))
1427 md_wakeup_thread(mddev->thread);
1426 } 1428 }
1427 1429
1428 /* Don't remove the bias on 'remaining' (one_write_done) until 1430 /* Don't remove the bias on 'remaining' (one_write_done) until
@@ -1448,9 +1450,6 @@ retry_write:
1448 1450
1449 /* In case raid10d snuck in to freeze_array */ 1451 /* In case raid10d snuck in to freeze_array */
1450 wake_up(&conf->wait_barrier); 1452 wake_up(&conf->wait_barrier);
1451
1452 if (do_sync || !mddev->bitmap || !plugged)
1453 md_wakeup_thread(mddev->thread);
1454} 1453}
1455 1454
1456static void status(struct seq_file *seq, struct mddev *mddev) 1455static void status(struct seq_file *seq, struct mddev *mddev)
@@ -2310,7 +2309,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
2310 if (r10_sync_page_io(rdev, 2309 if (r10_sync_page_io(rdev,
2311 r10_bio->devs[sl].addr + 2310 r10_bio->devs[sl].addr +
2312 sect, 2311 sect,
2313 s<<9, conf->tmppage, WRITE) 2312 s, conf->tmppage, WRITE)
2314 == 0) { 2313 == 0) {
2315 /* Well, this device is dead */ 2314 /* Well, this device is dead */
2316 printk(KERN_NOTICE 2315 printk(KERN_NOTICE
@@ -2349,7 +2348,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
2349 switch (r10_sync_page_io(rdev, 2348 switch (r10_sync_page_io(rdev,
2350 r10_bio->devs[sl].addr + 2349 r10_bio->devs[sl].addr +
2351 sect, 2350 sect,
2352 s<<9, conf->tmppage, 2351 s, conf->tmppage,
2353 READ)) { 2352 READ)) {
2354 case 0: 2353 case 0:
2355 /* Well, this device is dead */ 2354 /* Well, this device is dead */
@@ -2512,7 +2511,7 @@ read_more:
2512 slot = r10_bio->read_slot; 2511 slot = r10_bio->read_slot;
2513 printk_ratelimited( 2512 printk_ratelimited(
2514 KERN_ERR 2513 KERN_ERR
2515 "md/raid10:%s: %s: redirecting" 2514 "md/raid10:%s: %s: redirecting "
2516 "sector %llu to another mirror\n", 2515 "sector %llu to another mirror\n",
2517 mdname(mddev), 2516 mdname(mddev),
2518 bdevname(rdev->bdev, b), 2517 bdevname(rdev->bdev, b),
@@ -2661,7 +2660,8 @@ static void raid10d(struct mddev *mddev)
2661 blk_start_plug(&plug); 2660 blk_start_plug(&plug);
2662 for (;;) { 2661 for (;;) {
2663 2662
2664 flush_pending_writes(conf); 2663 if (atomic_read(&mddev->plug_cnt) == 0)
2664 flush_pending_writes(conf);
2665 2665
2666 spin_lock_irqsave(&conf->device_lock, flags); 2666 spin_lock_irqsave(&conf->device_lock, flags);
2667 if (list_empty(head)) { 2667 if (list_empty(head)) {
@@ -2890,6 +2890,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
2890 /* want to reconstruct this device */ 2890 /* want to reconstruct this device */
2891 rb2 = r10_bio; 2891 rb2 = r10_bio;
2892 sect = raid10_find_virt(conf, sector_nr, i); 2892 sect = raid10_find_virt(conf, sector_nr, i);
2893 if (sect >= mddev->resync_max_sectors) {
2894 /* last stripe is not complete - don't
2895 * try to recover this sector.
2896 */
2897 continue;
2898 }
2893 /* Unless we are doing a full sync, or a replacement 2899 /* Unless we are doing a full sync, or a replacement
2894 * we only need to recover the block if it is set in 2900 * we only need to recover the block if it is set in
2895 * the bitmap 2901 * the bitmap
@@ -3421,7 +3427,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
3421 spin_lock_init(&conf->resync_lock); 3427 spin_lock_init(&conf->resync_lock);
3422 init_waitqueue_head(&conf->wait_barrier); 3428 init_waitqueue_head(&conf->wait_barrier);
3423 3429
3424 conf->thread = md_register_thread(raid10d, mddev, NULL); 3430 conf->thread = md_register_thread(raid10d, mddev, "raid10");
3425 if (!conf->thread) 3431 if (!conf->thread)
3426 goto out; 3432 goto out;
3427 3433
@@ -3475,6 +3481,7 @@ static int run(struct mddev *mddev)
3475 3481
3476 rdev_for_each(rdev, mddev) { 3482 rdev_for_each(rdev, mddev) {
3477 long long diff; 3483 long long diff;
3484 struct request_queue *q;
3478 3485
3479 disk_idx = rdev->raid_disk; 3486 disk_idx = rdev->raid_disk;
3480 if (disk_idx < 0) 3487 if (disk_idx < 0)
@@ -3493,6 +3500,9 @@ static int run(struct mddev *mddev)
3493 goto out_free_conf; 3500 goto out_free_conf;
3494 disk->rdev = rdev; 3501 disk->rdev = rdev;
3495 } 3502 }
3503 q = bdev_get_queue(rdev->bdev);
3504 if (q->merge_bvec_fn)
3505 mddev->merge_check_needed = 1;
3496 diff = (rdev->new_data_offset - rdev->data_offset); 3506 diff = (rdev->new_data_offset - rdev->data_offset);
3497 if (!mddev->reshape_backwards) 3507 if (!mddev->reshape_backwards)
3498 diff = -diff; 3508 diff = -diff;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d26767246d26..04348d76bb30 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -196,12 +196,14 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
196 BUG_ON(!list_empty(&sh->lru)); 196 BUG_ON(!list_empty(&sh->lru));
197 BUG_ON(atomic_read(&conf->active_stripes)==0); 197 BUG_ON(atomic_read(&conf->active_stripes)==0);
198 if (test_bit(STRIPE_HANDLE, &sh->state)) { 198 if (test_bit(STRIPE_HANDLE, &sh->state)) {
199 if (test_bit(STRIPE_DELAYED, &sh->state)) 199 if (test_bit(STRIPE_DELAYED, &sh->state) &&
200 !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
200 list_add_tail(&sh->lru, &conf->delayed_list); 201 list_add_tail(&sh->lru, &conf->delayed_list);
201 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 202 else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
202 sh->bm_seq - conf->seq_write > 0) 203 sh->bm_seq - conf->seq_write > 0)
203 list_add_tail(&sh->lru, &conf->bitmap_list); 204 list_add_tail(&sh->lru, &conf->bitmap_list);
204 else { 205 else {
206 clear_bit(STRIPE_DELAYED, &sh->state);
205 clear_bit(STRIPE_BIT_DELAY, &sh->state); 207 clear_bit(STRIPE_BIT_DELAY, &sh->state);
206 list_add_tail(&sh->lru, &conf->handle_list); 208 list_add_tail(&sh->lru, &conf->handle_list);
207 } 209 }
@@ -606,6 +608,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
606 * a chance*/ 608 * a chance*/
607 md_check_recovery(conf->mddev); 609 md_check_recovery(conf->mddev);
608 } 610 }
611 /*
612 * Because md_wait_for_blocked_rdev
613 * will dec nr_pending, we must
614 * increment it first.
615 */
616 atomic_inc(&rdev->nr_pending);
609 md_wait_for_blocked_rdev(rdev, conf->mddev); 617 md_wait_for_blocked_rdev(rdev, conf->mddev);
610 } else { 618 } else {
611 /* Acknowledged bad block - skip the write */ 619 /* Acknowledged bad block - skip the write */
@@ -1737,6 +1745,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
1737 } else { 1745 } else {
1738 const char *bdn = bdevname(rdev->bdev, b); 1746 const char *bdn = bdevname(rdev->bdev, b);
1739 int retry = 0; 1747 int retry = 0;
1748 int set_bad = 0;
1740 1749
1741 clear_bit(R5_UPTODATE, &sh->dev[i].flags); 1750 clear_bit(R5_UPTODATE, &sh->dev[i].flags);
1742 atomic_inc(&rdev->read_errors); 1751 atomic_inc(&rdev->read_errors);
@@ -1748,7 +1757,8 @@ static void raid5_end_read_request(struct bio * bi, int error)
1748 mdname(conf->mddev), 1757 mdname(conf->mddev),
1749 (unsigned long long)s, 1758 (unsigned long long)s,
1750 bdn); 1759 bdn);
1751 else if (conf->mddev->degraded >= conf->max_degraded) 1760 else if (conf->mddev->degraded >= conf->max_degraded) {
1761 set_bad = 1;
1752 printk_ratelimited( 1762 printk_ratelimited(
1753 KERN_WARNING 1763 KERN_WARNING
1754 "md/raid:%s: read error not correctable " 1764 "md/raid:%s: read error not correctable "
@@ -1756,8 +1766,9 @@ static void raid5_end_read_request(struct bio * bi, int error)
1756 mdname(conf->mddev), 1766 mdname(conf->mddev),
1757 (unsigned long long)s, 1767 (unsigned long long)s,
1758 bdn); 1768 bdn);
1759 else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) 1769 } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) {
1760 /* Oh, no!!! */ 1770 /* Oh, no!!! */
1771 set_bad = 1;
1761 printk_ratelimited( 1772 printk_ratelimited(
1762 KERN_WARNING 1773 KERN_WARNING
1763 "md/raid:%s: read error NOT corrected!! " 1774 "md/raid:%s: read error NOT corrected!! "
@@ -1765,7 +1776,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
1765 mdname(conf->mddev), 1776 mdname(conf->mddev),
1766 (unsigned long long)s, 1777 (unsigned long long)s,
1767 bdn); 1778 bdn);
1768 else if (atomic_read(&rdev->read_errors) 1779 } else if (atomic_read(&rdev->read_errors)
1769 > conf->max_nr_stripes) 1780 > conf->max_nr_stripes)
1770 printk(KERN_WARNING 1781 printk(KERN_WARNING
1771 "md/raid:%s: Too many read errors, failing device %s.\n", 1782 "md/raid:%s: Too many read errors, failing device %s.\n",
@@ -1777,7 +1788,11 @@ static void raid5_end_read_request(struct bio * bi, int error)
1777 else { 1788 else {
1778 clear_bit(R5_ReadError, &sh->dev[i].flags); 1789 clear_bit(R5_ReadError, &sh->dev[i].flags);
1779 clear_bit(R5_ReWrite, &sh->dev[i].flags); 1790 clear_bit(R5_ReWrite, &sh->dev[i].flags);
1780 md_error(conf->mddev, rdev); 1791 if (!(set_bad
1792 && test_bit(In_sync, &rdev->flags)
1793 && rdev_set_badblocks(
1794 rdev, sh->sector, STRIPE_SECTORS, 0)))
1795 md_error(conf->mddev, rdev);
1781 } 1796 }
1782 } 1797 }
1783 rdev_dec_pending(rdev, conf->mddev); 1798 rdev_dec_pending(rdev, conf->mddev);
@@ -3582,8 +3597,18 @@ static void handle_stripe(struct stripe_head *sh)
3582 3597
3583finish: 3598finish:
3584 /* wait for this device to become unblocked */ 3599 /* wait for this device to become unblocked */
3585 if (conf->mddev->external && unlikely(s.blocked_rdev)) 3600 if (unlikely(s.blocked_rdev)) {
3586 md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev); 3601 if (conf->mddev->external)
3602 md_wait_for_blocked_rdev(s.blocked_rdev,
3603 conf->mddev);
3604 else
3605 /* Internal metadata will immediately
3606 * be written by raid5d, so we don't
3607 * need to wait here.
3608 */
3609 rdev_dec_pending(s.blocked_rdev,
3610 conf->mddev);
3611 }
3587 3612
3588 if (s.handle_bad_blocks) 3613 if (s.handle_bad_blocks)
3589 for (i = disks; i--; ) { 3614 for (i = disks; i--; ) {
@@ -3881,8 +3906,6 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
3881 raid_bio->bi_next = (void*)rdev; 3906 raid_bio->bi_next = (void*)rdev;
3882 align_bi->bi_bdev = rdev->bdev; 3907 align_bi->bi_bdev = rdev->bdev;
3883 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); 3908 align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
3884 /* No reshape active, so we can trust rdev->data_offset */
3885 align_bi->bi_sector += rdev->data_offset;
3886 3909
3887 if (!bio_fits_rdev(align_bi) || 3910 if (!bio_fits_rdev(align_bi) ||
3888 is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, 3911 is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
@@ -3893,6 +3916,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
3893 return 0; 3916 return 0;
3894 } 3917 }
3895 3918
3919 /* No reshape active, so we can trust rdev->data_offset */
3920 align_bi->bi_sector += rdev->data_offset;
3921
3896 spin_lock_irq(&conf->device_lock); 3922 spin_lock_irq(&conf->device_lock);
3897 wait_event_lock_irq(conf->wait_for_stripe, 3923 wait_event_lock_irq(conf->wait_for_stripe,
3898 conf->quiesce == 0, 3924 conf->quiesce == 0,
@@ -3971,7 +3997,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
3971 struct stripe_head *sh; 3997 struct stripe_head *sh;
3972 const int rw = bio_data_dir(bi); 3998 const int rw = bio_data_dir(bi);
3973 int remaining; 3999 int remaining;
3974 int plugged;
3975 4000
3976 if (unlikely(bi->bi_rw & REQ_FLUSH)) { 4001 if (unlikely(bi->bi_rw & REQ_FLUSH)) {
3977 md_flush_request(mddev, bi); 4002 md_flush_request(mddev, bi);
@@ -3990,7 +4015,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
3990 bi->bi_next = NULL; 4015 bi->bi_next = NULL;
3991 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ 4016 bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
3992 4017
3993 plugged = mddev_check_plugged(mddev);
3994 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { 4018 for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
3995 DEFINE_WAIT(w); 4019 DEFINE_WAIT(w);
3996 int previous; 4020 int previous;
@@ -4092,6 +4116,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4092 if ((bi->bi_rw & REQ_SYNC) && 4116 if ((bi->bi_rw & REQ_SYNC) &&
4093 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 4117 !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
4094 atomic_inc(&conf->preread_active_stripes); 4118 atomic_inc(&conf->preread_active_stripes);
4119 mddev_check_plugged(mddev);
4095 release_stripe(sh); 4120 release_stripe(sh);
4096 } else { 4121 } else {
4097 /* cannot get stripe for read-ahead, just give-up */ 4122 /* cannot get stripe for read-ahead, just give-up */
@@ -4099,10 +4124,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
4099 finish_wait(&conf->wait_for_overlap, &w); 4124 finish_wait(&conf->wait_for_overlap, &w);
4100 break; 4125 break;
4101 } 4126 }
4102
4103 } 4127 }
4104 if (!plugged)
4105 md_wakeup_thread(mddev->thread);
4106 4128
4107 spin_lock_irq(&conf->device_lock); 4129 spin_lock_irq(&conf->device_lock);
4108 remaining = raid5_dec_bi_phys_segments(bi); 4130 remaining = raid5_dec_bi_phys_segments(bi);
@@ -4823,6 +4845,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
4823 int raid_disk, memory, max_disks; 4845 int raid_disk, memory, max_disks;
4824 struct md_rdev *rdev; 4846 struct md_rdev *rdev;
4825 struct disk_info *disk; 4847 struct disk_info *disk;
4848 char pers_name[6];
4826 4849
4827 if (mddev->new_level != 5 4850 if (mddev->new_level != 5
4828 && mddev->new_level != 4 4851 && mddev->new_level != 4
@@ -4946,7 +4969,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
4946 printk(KERN_INFO "md/raid:%s: allocated %dkB\n", 4969 printk(KERN_INFO "md/raid:%s: allocated %dkB\n",
4947 mdname(mddev), memory); 4970 mdname(mddev), memory);
4948 4971
4949 conf->thread = md_register_thread(raid5d, mddev, NULL); 4972 sprintf(pers_name, "raid%d", mddev->new_level);
4973 conf->thread = md_register_thread(raid5d, mddev, pers_name);
4950 if (!conf->thread) { 4974 if (!conf->thread) {
4951 printk(KERN_ERR 4975 printk(KERN_ERR
4952 "md/raid:%s: couldn't allocate thread.\n", 4976 "md/raid:%s: couldn't allocate thread.\n",
@@ -5465,10 +5489,9 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
5465 if (rdev->saved_raid_disk >= 0 && 5489 if (rdev->saved_raid_disk >= 0 &&
5466 rdev->saved_raid_disk >= first && 5490 rdev->saved_raid_disk >= first &&
5467 conf->disks[rdev->saved_raid_disk].rdev == NULL) 5491 conf->disks[rdev->saved_raid_disk].rdev == NULL)
5468 disk = rdev->saved_raid_disk; 5492 first = rdev->saved_raid_disk;
5469 else 5493
5470 disk = first; 5494 for (disk = first; disk <= last; disk++) {
5471 for ( ; disk <= last ; disk++) {
5472 p = conf->disks + disk; 5495 p = conf->disks + disk;
5473 if (p->rdev == NULL) { 5496 if (p->rdev == NULL) {
5474 clear_bit(In_sync, &rdev->flags); 5497 clear_bit(In_sync, &rdev->flags);
@@ -5477,8 +5500,11 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
5477 if (rdev->saved_raid_disk != disk) 5500 if (rdev->saved_raid_disk != disk)
5478 conf->fullsync = 1; 5501 conf->fullsync = 1;
5479 rcu_assign_pointer(p->rdev, rdev); 5502 rcu_assign_pointer(p->rdev, rdev);
5480 break; 5503 goto out;
5481 } 5504 }
5505 }
5506 for (disk = first; disk <= last; disk++) {
5507 p = conf->disks + disk;
5482 if (test_bit(WantReplacement, &p->rdev->flags) && 5508 if (test_bit(WantReplacement, &p->rdev->flags) &&
5483 p->replacement == NULL) { 5509 p->replacement == NULL) {
5484 clear_bit(In_sync, &rdev->flags); 5510 clear_bit(In_sync, &rdev->flags);
@@ -5490,6 +5516,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
5490 break; 5516 break;
5491 } 5517 }
5492 } 5518 }
5519out:
5493 print_raid5_conf(conf); 5520 print_raid5_conf(conf);
5494 return err; 5521 return err;
5495} 5522}