diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-09 15:01:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-09 15:01:36 -0400 |
commit | d60dafdca4b463405e5586df923f05b10e9ac2f9 (patch) | |
tree | eb87cb614a64615eb0a1a0e32f62ad28e705e68c | |
parent | 1329c20433fb1298dd0bf94b59c1a11f27a1f2e2 (diff) | |
parent | 5a409b4f56d50b212334f338cb8465d65550cd85 (diff) |
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD updates from Shaohua Li:
"A few fixes of MD for this merge window. Mostly bug fixes:
- raid5 stripe batch fix from Amy
- Read error handling for raid1 FailFast device from Gioh
- raid10 recovery NULL pointer dereference fix from Guoqing
- Support write hint for raid5 stripe cache from Mariusz
- Fixes for device hot add/remove from Neil and Yufen
- Improve flush bio scalability from Xiao"
* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
MD: fix lock contention for flush bios
md/raid5: Assigning NULL to sh->batch_head before testing bit R5_Overlap of a stripe
md/raid1: add error handling of read error from FailFast device
md: fix NULL dereference of mddev->pers in remove_and_add_spares()
raid5: copy write hint from origin bio to stripe
md: fix two problems with setting the "re-add" device state.
raid10: check bio in r10buf_pool_free to void NULL pointer dereference
md: fix an error code format and remove unsed bio_sector
-rw-r--r-- | drivers/md/md.c | 169 | ||||
-rw-r--r-- | drivers/md/md.h | 22 | ||||
-rw-r--r-- | drivers/md/raid1.c | 4 | ||||
-rw-r--r-- | drivers/md/raid10.c | 10 | ||||
-rw-r--r-- | drivers/md/raid5.c | 12 | ||||
-rw-r--r-- | drivers/md/raid5.h | 1 |
6 files changed, 148 insertions, 70 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 22203eba1e6e..29b0cd9ec951 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -132,6 +132,24 @@ static inline int speed_max(struct mddev *mddev) | |||
132 | mddev->sync_speed_max : sysctl_speed_limit_max; | 132 | mddev->sync_speed_max : sysctl_speed_limit_max; |
133 | } | 133 | } |
134 | 134 | ||
135 | static void * flush_info_alloc(gfp_t gfp_flags, void *data) | ||
136 | { | ||
137 | return kzalloc(sizeof(struct flush_info), gfp_flags); | ||
138 | } | ||
139 | static void flush_info_free(void *flush_info, void *data) | ||
140 | { | ||
141 | kfree(flush_info); | ||
142 | } | ||
143 | |||
144 | static void * flush_bio_alloc(gfp_t gfp_flags, void *data) | ||
145 | { | ||
146 | return kzalloc(sizeof(struct flush_bio), gfp_flags); | ||
147 | } | ||
148 | static void flush_bio_free(void *flush_bio, void *data) | ||
149 | { | ||
150 | kfree(flush_bio); | ||
151 | } | ||
152 | |||
135 | static struct ctl_table_header *raid_table_header; | 153 | static struct ctl_table_header *raid_table_header; |
136 | 154 | ||
137 | static struct ctl_table raid_table[] = { | 155 | static struct ctl_table raid_table[] = { |
@@ -414,30 +432,53 @@ static int md_congested(void *data, int bits) | |||
414 | /* | 432 | /* |
415 | * Generic flush handling for md | 433 | * Generic flush handling for md |
416 | */ | 434 | */ |
435 | static void submit_flushes(struct work_struct *ws) | ||
436 | { | ||
437 | struct flush_info *fi = container_of(ws, struct flush_info, flush_work); | ||
438 | struct mddev *mddev = fi->mddev; | ||
439 | struct bio *bio = fi->bio; | ||
440 | |||
441 | bio->bi_opf &= ~REQ_PREFLUSH; | ||
442 | md_handle_request(mddev, bio); | ||
443 | |||
444 | mempool_free(fi, mddev->flush_pool); | ||
445 | } | ||
417 | 446 | ||
418 | static void md_end_flush(struct bio *bio) | 447 | static void md_end_flush(struct bio *fbio) |
419 | { | 448 | { |
420 | struct md_rdev *rdev = bio->bi_private; | 449 | struct flush_bio *fb = fbio->bi_private; |
421 | struct mddev *mddev = rdev->mddev; | 450 | struct md_rdev *rdev = fb->rdev; |
451 | struct flush_info *fi = fb->fi; | ||
452 | struct bio *bio = fi->bio; | ||
453 | struct mddev *mddev = fi->mddev; | ||
422 | 454 | ||
423 | rdev_dec_pending(rdev, mddev); | 455 | rdev_dec_pending(rdev, mddev); |
424 | 456 | ||
425 | if (atomic_dec_and_test(&mddev->flush_pending)) { | 457 | if (atomic_dec_and_test(&fi->flush_pending)) { |
426 | /* The pre-request flush has finished */ | 458 | if (bio->bi_iter.bi_size == 0) |
427 | queue_work(md_wq, &mddev->flush_work); | 459 | /* an empty barrier - all done */ |
460 | bio_endio(bio); | ||
461 | else { | ||
462 | INIT_WORK(&fi->flush_work, submit_flushes); | ||
463 | queue_work(md_wq, &fi->flush_work); | ||
464 | } | ||
428 | } | 465 | } |
429 | bio_put(bio); | ||
430 | } | ||
431 | 466 | ||
432 | static void md_submit_flush_data(struct work_struct *ws); | 467 | mempool_free(fb, mddev->flush_bio_pool); |
468 | bio_put(fbio); | ||
469 | } | ||
433 | 470 | ||
434 | static void submit_flushes(struct work_struct *ws) | 471 | void md_flush_request(struct mddev *mddev, struct bio *bio) |
435 | { | 472 | { |
436 | struct mddev *mddev = container_of(ws, struct mddev, flush_work); | ||
437 | struct md_rdev *rdev; | 473 | struct md_rdev *rdev; |
474 | struct flush_info *fi; | ||
475 | |||
476 | fi = mempool_alloc(mddev->flush_pool, GFP_NOIO); | ||
477 | |||
478 | fi->bio = bio; | ||
479 | fi->mddev = mddev; | ||
480 | atomic_set(&fi->flush_pending, 1); | ||
438 | 481 | ||
439 | INIT_WORK(&mddev->flush_work, md_submit_flush_data); | ||
440 | atomic_set(&mddev->flush_pending, 1); | ||
441 | rcu_read_lock(); | 482 | rcu_read_lock(); |
442 | rdev_for_each_rcu(rdev, mddev) | 483 | rdev_for_each_rcu(rdev, mddev) |
443 | if (rdev->raid_disk >= 0 && | 484 | if (rdev->raid_disk >= 0 && |
@@ -447,59 +488,39 @@ static void submit_flushes(struct work_struct *ws) | |||
447 | * we reclaim rcu_read_lock | 488 | * we reclaim rcu_read_lock |
448 | */ | 489 | */ |
449 | struct bio *bi; | 490 | struct bio *bi; |
491 | struct flush_bio *fb; | ||
450 | atomic_inc(&rdev->nr_pending); | 492 | atomic_inc(&rdev->nr_pending); |
451 | atomic_inc(&rdev->nr_pending); | 493 | atomic_inc(&rdev->nr_pending); |
452 | rcu_read_unlock(); | 494 | rcu_read_unlock(); |
495 | |||
496 | fb = mempool_alloc(mddev->flush_bio_pool, GFP_NOIO); | ||
497 | fb->fi = fi; | ||
498 | fb->rdev = rdev; | ||
499 | |||
453 | bi = bio_alloc_mddev(GFP_NOIO, 0, mddev); | 500 | bi = bio_alloc_mddev(GFP_NOIO, 0, mddev); |
454 | bi->bi_end_io = md_end_flush; | ||
455 | bi->bi_private = rdev; | ||
456 | bio_set_dev(bi, rdev->bdev); | 501 | bio_set_dev(bi, rdev->bdev); |
502 | bi->bi_end_io = md_end_flush; | ||
503 | bi->bi_private = fb; | ||
457 | bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; | 504 | bi->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; |
458 | atomic_inc(&mddev->flush_pending); | 505 | |
506 | atomic_inc(&fi->flush_pending); | ||
459 | submit_bio(bi); | 507 | submit_bio(bi); |
508 | |||
460 | rcu_read_lock(); | 509 | rcu_read_lock(); |
461 | rdev_dec_pending(rdev, mddev); | 510 | rdev_dec_pending(rdev, mddev); |
462 | } | 511 | } |
463 | rcu_read_unlock(); | 512 | rcu_read_unlock(); |
464 | if (atomic_dec_and_test(&mddev->flush_pending)) | ||
465 | queue_work(md_wq, &mddev->flush_work); | ||
466 | } | ||
467 | |||
468 | static void md_submit_flush_data(struct work_struct *ws) | ||
469 | { | ||
470 | struct mddev *mddev = container_of(ws, struct mddev, flush_work); | ||
471 | struct bio *bio = mddev->flush_bio; | ||
472 | 513 | ||
473 | /* | 514 | if (atomic_dec_and_test(&fi->flush_pending)) { |
474 | * must reset flush_bio before calling into md_handle_request to avoid a | 515 | if (bio->bi_iter.bi_size == 0) |
475 | * deadlock, because other bios passed md_handle_request suspend check | 516 | /* an empty barrier - all done */ |
476 | * could wait for this and below md_handle_request could wait for those | 517 | bio_endio(bio); |
477 | * bios because of suspend check | 518 | else { |
478 | */ | 519 | INIT_WORK(&fi->flush_work, submit_flushes); |
479 | mddev->flush_bio = NULL; | 520 | queue_work(md_wq, &fi->flush_work); |
480 | wake_up(&mddev->sb_wait); | 521 | } |
481 | |||
482 | if (bio->bi_iter.bi_size == 0) | ||
483 | /* an empty barrier - all done */ | ||
484 | bio_endio(bio); | ||
485 | else { | ||
486 | bio->bi_opf &= ~REQ_PREFLUSH; | ||
487 | md_handle_request(mddev, bio); | ||
488 | } | 522 | } |
489 | } | 523 | } |
490 | |||
491 | void md_flush_request(struct mddev *mddev, struct bio *bio) | ||
492 | { | ||
493 | spin_lock_irq(&mddev->lock); | ||
494 | wait_event_lock_irq(mddev->sb_wait, | ||
495 | !mddev->flush_bio, | ||
496 | mddev->lock); | ||
497 | mddev->flush_bio = bio; | ||
498 | spin_unlock_irq(&mddev->lock); | ||
499 | |||
500 | INIT_WORK(&mddev->flush_work, submit_flushes); | ||
501 | queue_work(md_wq, &mddev->flush_work); | ||
502 | } | ||
503 | EXPORT_SYMBOL(md_flush_request); | 524 | EXPORT_SYMBOL(md_flush_request); |
504 | 525 | ||
505 | static inline struct mddev *mddev_get(struct mddev *mddev) | 526 | static inline struct mddev *mddev_get(struct mddev *mddev) |
@@ -546,7 +567,6 @@ void mddev_init(struct mddev *mddev) | |||
546 | atomic_set(&mddev->openers, 0); | 567 | atomic_set(&mddev->openers, 0); |
547 | atomic_set(&mddev->active_io, 0); | 568 | atomic_set(&mddev->active_io, 0); |
548 | spin_lock_init(&mddev->lock); | 569 | spin_lock_init(&mddev->lock); |
549 | atomic_set(&mddev->flush_pending, 0); | ||
550 | init_waitqueue_head(&mddev->sb_wait); | 570 | init_waitqueue_head(&mddev->sb_wait); |
551 | init_waitqueue_head(&mddev->recovery_wait); | 571 | init_waitqueue_head(&mddev->recovery_wait); |
552 | mddev->reshape_position = MaxSector; | 572 | mddev->reshape_position = MaxSector; |
@@ -2844,7 +2864,8 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) | |||
2844 | err = 0; | 2864 | err = 0; |
2845 | } | 2865 | } |
2846 | } else if (cmd_match(buf, "re-add")) { | 2866 | } else if (cmd_match(buf, "re-add")) { |
2847 | if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1)) { | 2867 | if (test_bit(Faulty, &rdev->flags) && (rdev->raid_disk == -1) && |
2868 | rdev->saved_raid_disk >= 0) { | ||
2848 | /* clear_bit is performed _after_ all the devices | 2869 | /* clear_bit is performed _after_ all the devices |
2849 | * have their local Faulty bit cleared. If any writes | 2870 | * have their local Faulty bit cleared. If any writes |
2850 | * happen in the meantime in the local node, they | 2871 | * happen in the meantime in the local node, they |
@@ -5499,6 +5520,22 @@ int md_run(struct mddev *mddev) | |||
5499 | if (err) | 5520 | if (err) |
5500 | return err; | 5521 | return err; |
5501 | } | 5522 | } |
5523 | if (mddev->flush_pool == NULL) { | ||
5524 | mddev->flush_pool = mempool_create(NR_FLUSH_INFOS, flush_info_alloc, | ||
5525 | flush_info_free, mddev); | ||
5526 | if (!mddev->flush_pool) { | ||
5527 | err = -ENOMEM; | ||
5528 | goto abort; | ||
5529 | } | ||
5530 | } | ||
5531 | if (mddev->flush_bio_pool == NULL) { | ||
5532 | mddev->flush_bio_pool = mempool_create(NR_FLUSH_BIOS, flush_bio_alloc, | ||
5533 | flush_bio_free, mddev); | ||
5534 | if (!mddev->flush_bio_pool) { | ||
5535 | err = -ENOMEM; | ||
5536 | goto abort; | ||
5537 | } | ||
5538 | } | ||
5502 | 5539 | ||
5503 | spin_lock(&pers_lock); | 5540 | spin_lock(&pers_lock); |
5504 | pers = find_pers(mddev->level, mddev->clevel); | 5541 | pers = find_pers(mddev->level, mddev->clevel); |
@@ -5654,6 +5691,18 @@ int md_run(struct mddev *mddev) | |||
5654 | sysfs_notify_dirent_safe(mddev->sysfs_action); | 5691 | sysfs_notify_dirent_safe(mddev->sysfs_action); |
5655 | sysfs_notify(&mddev->kobj, NULL, "degraded"); | 5692 | sysfs_notify(&mddev->kobj, NULL, "degraded"); |
5656 | return 0; | 5693 | return 0; |
5694 | |||
5695 | abort: | ||
5696 | if (mddev->flush_bio_pool) { | ||
5697 | mempool_destroy(mddev->flush_bio_pool); | ||
5698 | mddev->flush_bio_pool = NULL; | ||
5699 | } | ||
5700 | if (mddev->flush_pool){ | ||
5701 | mempool_destroy(mddev->flush_pool); | ||
5702 | mddev->flush_pool = NULL; | ||
5703 | } | ||
5704 | |||
5705 | return err; | ||
5657 | } | 5706 | } |
5658 | EXPORT_SYMBOL_GPL(md_run); | 5707 | EXPORT_SYMBOL_GPL(md_run); |
5659 | 5708 | ||
@@ -5864,6 +5913,14 @@ void md_stop(struct mddev *mddev) | |||
5864 | * This is called from dm-raid | 5913 | * This is called from dm-raid |
5865 | */ | 5914 | */ |
5866 | __md_stop(mddev); | 5915 | __md_stop(mddev); |
5916 | if (mddev->flush_bio_pool) { | ||
5917 | mempool_destroy(mddev->flush_bio_pool); | ||
5918 | mddev->flush_bio_pool = NULL; | ||
5919 | } | ||
5920 | if (mddev->flush_pool) { | ||
5921 | mempool_destroy(mddev->flush_pool); | ||
5922 | mddev->flush_pool = NULL; | ||
5923 | } | ||
5867 | bioset_exit(&mddev->bio_set); | 5924 | bioset_exit(&mddev->bio_set); |
5868 | bioset_exit(&mddev->sync_set); | 5925 | bioset_exit(&mddev->sync_set); |
5869 | } | 5926 | } |
@@ -6494,6 +6551,9 @@ static int hot_remove_disk(struct mddev *mddev, dev_t dev) | |||
6494 | char b[BDEVNAME_SIZE]; | 6551 | char b[BDEVNAME_SIZE]; |
6495 | struct md_rdev *rdev; | 6552 | struct md_rdev *rdev; |
6496 | 6553 | ||
6554 | if (!mddev->pers) | ||
6555 | return -ENODEV; | ||
6556 | |||
6497 | rdev = find_rdev(mddev, dev); | 6557 | rdev = find_rdev(mddev, dev); |
6498 | if (!rdev) | 6558 | if (!rdev) |
6499 | return -ENXIO; | 6559 | return -ENXIO; |
@@ -8611,6 +8671,7 @@ static int remove_and_add_spares(struct mddev *mddev, | |||
8611 | if (mddev->pers->hot_remove_disk( | 8671 | if (mddev->pers->hot_remove_disk( |
8612 | mddev, rdev) == 0) { | 8672 | mddev, rdev) == 0) { |
8613 | sysfs_unlink_rdev(mddev, rdev); | 8673 | sysfs_unlink_rdev(mddev, rdev); |
8674 | rdev->saved_raid_disk = rdev->raid_disk; | ||
8614 | rdev->raid_disk = -1; | 8675 | rdev->raid_disk = -1; |
8615 | removed++; | 8676 | removed++; |
8616 | } | 8677 | } |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 3507cab22cb6..2d148bdaba74 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -252,6 +252,19 @@ enum mddev_sb_flags { | |||
252 | MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */ | 252 | MD_SB_NEED_REWRITE, /* metadata write needs to be repeated */ |
253 | }; | 253 | }; |
254 | 254 | ||
255 | #define NR_FLUSH_INFOS 8 | ||
256 | #define NR_FLUSH_BIOS 64 | ||
257 | struct flush_info { | ||
258 | struct bio *bio; | ||
259 | struct mddev *mddev; | ||
260 | struct work_struct flush_work; | ||
261 | atomic_t flush_pending; | ||
262 | }; | ||
263 | struct flush_bio { | ||
264 | struct flush_info *fi; | ||
265 | struct md_rdev *rdev; | ||
266 | }; | ||
267 | |||
255 | struct mddev { | 268 | struct mddev { |
256 | void *private; | 269 | void *private; |
257 | struct md_personality *pers; | 270 | struct md_personality *pers; |
@@ -457,13 +470,8 @@ struct mddev { | |||
457 | * metadata and bitmap writes | 470 | * metadata and bitmap writes |
458 | */ | 471 | */ |
459 | 472 | ||
460 | /* Generic flush handling. | 473 | mempool_t *flush_pool; |
461 | * The last to finish preflush schedules a worker to submit | 474 | mempool_t *flush_bio_pool; |
462 | * the rest of the request (without the REQ_PREFLUSH flag). | ||
463 | */ | ||
464 | struct bio *flush_bio; | ||
465 | atomic_t flush_pending; | ||
466 | struct work_struct flush_work; | ||
467 | struct work_struct event_work; /* used by dm to report failure event */ | 475 | struct work_struct event_work; /* used by dm to report failure event */ |
468 | void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); | 476 | void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); |
469 | struct md_cluster_info *cluster_info; | 477 | struct md_cluster_info *cluster_info; |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index bad28520719b..0b344d087581 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -2449,7 +2449,6 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) | |||
2449 | struct mddev *mddev = conf->mddev; | 2449 | struct mddev *mddev = conf->mddev; |
2450 | struct bio *bio; | 2450 | struct bio *bio; |
2451 | struct md_rdev *rdev; | 2451 | struct md_rdev *rdev; |
2452 | sector_t bio_sector; | ||
2453 | 2452 | ||
2454 | clear_bit(R1BIO_ReadError, &r1_bio->state); | 2453 | clear_bit(R1BIO_ReadError, &r1_bio->state); |
2455 | /* we got a read error. Maybe the drive is bad. Maybe just | 2454 | /* we got a read error. Maybe the drive is bad. Maybe just |
@@ -2462,7 +2461,6 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) | |||
2462 | */ | 2461 | */ |
2463 | 2462 | ||
2464 | bio = r1_bio->bios[r1_bio->read_disk]; | 2463 | bio = r1_bio->bios[r1_bio->read_disk]; |
2465 | bio_sector = conf->mirrors[r1_bio->read_disk].rdev->data_offset + r1_bio->sector; | ||
2466 | bio_put(bio); | 2464 | bio_put(bio); |
2467 | r1_bio->bios[r1_bio->read_disk] = NULL; | 2465 | r1_bio->bios[r1_bio->read_disk] = NULL; |
2468 | 2466 | ||
@@ -2473,6 +2471,8 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio) | |||
2473 | fix_read_error(conf, r1_bio->read_disk, | 2471 | fix_read_error(conf, r1_bio->read_disk, |
2474 | r1_bio->sector, r1_bio->sectors); | 2472 | r1_bio->sector, r1_bio->sectors); |
2475 | unfreeze_array(conf); | 2473 | unfreeze_array(conf); |
2474 | } else if (mddev->ro == 0 && test_bit(FailFast, &rdev->flags)) { | ||
2475 | md_error(mddev, rdev); | ||
2476 | } else { | 2476 | } else { |
2477 | r1_bio->bios[r1_bio->read_disk] = IO_BLOCKED; | 2477 | r1_bio->bios[r1_bio->read_disk] = IO_BLOCKED; |
2478 | } | 2478 | } |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 37d4b236b81b..1147ae59e3b6 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -255,9 +255,11 @@ static void r10buf_pool_free(void *__r10_bio, void *data) | |||
255 | for (j = conf->copies; j--; ) { | 255 | for (j = conf->copies; j--; ) { |
256 | struct bio *bio = r10bio->devs[j].bio; | 256 | struct bio *bio = r10bio->devs[j].bio; |
257 | 257 | ||
258 | rp = get_resync_pages(bio); | 258 | if (bio) { |
259 | resync_free_pages(rp); | 259 | rp = get_resync_pages(bio); |
260 | bio_put(bio); | 260 | resync_free_pages(rp); |
261 | bio_put(bio); | ||
262 | } | ||
261 | 263 | ||
262 | bio = r10bio->devs[j].repl_bio; | 264 | bio = r10bio->devs[j].repl_bio; |
263 | if (bio) | 265 | if (bio) |
@@ -2362,7 +2364,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10 | |||
2362 | { | 2364 | { |
2363 | int sect = 0; /* Offset from r10_bio->sector */ | 2365 | int sect = 0; /* Offset from r10_bio->sector */ |
2364 | int sectors = r10_bio->sectors; | 2366 | int sectors = r10_bio->sectors; |
2365 | struct md_rdev*rdev; | 2367 | struct md_rdev *rdev; |
2366 | int max_read_errors = atomic_read(&mddev->max_corr_read_errors); | 2368 | int max_read_errors = atomic_read(&mddev->max_corr_read_errors); |
2367 | int d = r10_bio->devs[r10_bio->read_slot].devnum; | 2369 | int d = r10_bio->devs[r10_bio->read_slot].devnum; |
2368 | 2370 | ||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a2e64989b01f..73489446bbcb 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -1139,6 +1139,9 @@ again: | |||
1139 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 1139 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
1140 | bi->bi_io_vec[0].bv_offset = 0; | 1140 | bi->bi_io_vec[0].bv_offset = 0; |
1141 | bi->bi_iter.bi_size = STRIPE_SIZE; | 1141 | bi->bi_iter.bi_size = STRIPE_SIZE; |
1142 | bi->bi_write_hint = sh->dev[i].write_hint; | ||
1143 | if (!rrdev) | ||
1144 | sh->dev[i].write_hint = RWF_WRITE_LIFE_NOT_SET; | ||
1142 | /* | 1145 | /* |
1143 | * If this is discard request, set bi_vcnt 0. We don't | 1146 | * If this is discard request, set bi_vcnt 0. We don't |
1144 | * want to confuse SCSI because SCSI will replace payload | 1147 | * want to confuse SCSI because SCSI will replace payload |
@@ -1190,6 +1193,8 @@ again: | |||
1190 | rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; | 1193 | rbi->bi_io_vec[0].bv_len = STRIPE_SIZE; |
1191 | rbi->bi_io_vec[0].bv_offset = 0; | 1194 | rbi->bi_io_vec[0].bv_offset = 0; |
1192 | rbi->bi_iter.bi_size = STRIPE_SIZE; | 1195 | rbi->bi_iter.bi_size = STRIPE_SIZE; |
1196 | rbi->bi_write_hint = sh->dev[i].write_hint; | ||
1197 | sh->dev[i].write_hint = RWF_WRITE_LIFE_NOT_SET; | ||
1193 | /* | 1198 | /* |
1194 | * If this is discard request, set bi_vcnt 0. We don't | 1199 | * If this is discard request, set bi_vcnt 0. We don't |
1195 | * want to confuse SCSI because SCSI will replace payload | 1200 | * want to confuse SCSI because SCSI will replace payload |
@@ -3204,6 +3209,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, | |||
3204 | (unsigned long long)sh->sector); | 3209 | (unsigned long long)sh->sector); |
3205 | 3210 | ||
3206 | spin_lock_irq(&sh->stripe_lock); | 3211 | spin_lock_irq(&sh->stripe_lock); |
3212 | sh->dev[dd_idx].write_hint = bi->bi_write_hint; | ||
3207 | /* Don't allow new IO added to stripes in batch list */ | 3213 | /* Don't allow new IO added to stripes in batch list */ |
3208 | if (sh->batch_head) | 3214 | if (sh->batch_head) |
3209 | goto overlap; | 3215 | goto overlap; |
@@ -4614,15 +4620,15 @@ static void break_stripe_batch_list(struct stripe_head *head_sh, | |||
4614 | 4620 | ||
4615 | sh->check_state = head_sh->check_state; | 4621 | sh->check_state = head_sh->check_state; |
4616 | sh->reconstruct_state = head_sh->reconstruct_state; | 4622 | sh->reconstruct_state = head_sh->reconstruct_state; |
4623 | spin_lock_irq(&sh->stripe_lock); | ||
4624 | sh->batch_head = NULL; | ||
4625 | spin_unlock_irq(&sh->stripe_lock); | ||
4617 | for (i = 0; i < sh->disks; i++) { | 4626 | for (i = 0; i < sh->disks; i++) { |
4618 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) | 4627 | if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) |
4619 | do_wakeup = 1; | 4628 | do_wakeup = 1; |
4620 | sh->dev[i].flags = head_sh->dev[i].flags & | 4629 | sh->dev[i].flags = head_sh->dev[i].flags & |
4621 | (~((1 << R5_WriteError) | (1 << R5_Overlap))); | 4630 | (~((1 << R5_WriteError) | (1 << R5_Overlap))); |
4622 | } | 4631 | } |
4623 | spin_lock_irq(&sh->stripe_lock); | ||
4624 | sh->batch_head = NULL; | ||
4625 | spin_unlock_irq(&sh->stripe_lock); | ||
4626 | if (handle_flags == 0 || | 4632 | if (handle_flags == 0 || |
4627 | sh->state & handle_flags) | 4633 | sh->state & handle_flags) |
4628 | set_bit(STRIPE_HANDLE, &sh->state); | 4634 | set_bit(STRIPE_HANDLE, &sh->state); |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 72e75ba6abf0..8474c224127b 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -257,6 +257,7 @@ struct stripe_head { | |||
257 | sector_t sector; /* sector of this page */ | 257 | sector_t sector; /* sector of this page */ |
258 | unsigned long flags; | 258 | unsigned long flags; |
259 | u32 log_checksum; | 259 | u32 log_checksum; |
260 | unsigned short write_hint; | ||
260 | } dev[1]; /* allocated with extra space depending of RAID geometry */ | 261 | } dev[1]; /* allocated with extra space depending of RAID geometry */ |
261 | }; | 262 | }; |
262 | 263 | ||