aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-09-03 05:56:18 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-09-10 06:35:38 -0400
commite9c7469bb4f502dafc092166201bea1ad5fc0fbf (patch)
tree04202b0bb88623d3005c909eaafcb280778902da /drivers/md/md.c
parent7bc9fddab074d6bb630344e1969e28d20b140621 (diff)
md: implment REQ_FLUSH/FUA support
This patch converts md to support REQ_FLUSH/FUA instead of now deprecated REQ_HARDBARRIER. In the core part (md.c), the following changes are notable. * Unlike REQ_HARDBARRIER, REQ_FLUSH/FUA don't interfere with processing of other requests and thus there is no reason to mark the queue congested while FLUSH/FUA is in progress. * REQ_FLUSH/FUA failures are final and its users don't need retry logic. Retry logic is removed. * Preflush needs to be issued to all member devices but FUA writes can be handled the same way as other writes - their processing can be deferred to request_queue of member devices. md_barrier_request() is renamed to md_flush_request() and simplified accordingly. For linear, raid0 and multipath, the core changes are enough. raid1, 5 and 10 need the following conversions. * raid1: Handling of FLUSH/FUA bio's can simply be deferred to request_queues of member devices. Barrier related logic removed. * raid5: Queue draining logic dropped. FUA bit is propagated through biodrain and stripe resconstruction such that all the updated parts of the stripe are written out with FUA writes if any of the dirtying writes was FUA. preread_active_stripes handling in make_request() is updated as suggested by Neil Brown. * raid10: FUA bit needs to be propagated to write clones. linear, raid0, 1, 5 and 10 tested. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: Neil Brown <neilb@suse.de> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c117
1 files changed, 25 insertions, 92 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c148b6302154..3640f025cb72 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -226,12 +226,12 @@ static int md_make_request(struct request_queue *q, struct bio *bio)
226 return 0; 226 return 0;
227 } 227 }
228 rcu_read_lock(); 228 rcu_read_lock();
229 if (mddev->suspended || mddev->barrier) { 229 if (mddev->suspended) {
230 DEFINE_WAIT(__wait); 230 DEFINE_WAIT(__wait);
231 for (;;) { 231 for (;;) {
232 prepare_to_wait(&mddev->sb_wait, &__wait, 232 prepare_to_wait(&mddev->sb_wait, &__wait,
233 TASK_UNINTERRUPTIBLE); 233 TASK_UNINTERRUPTIBLE);
234 if (!mddev->suspended && !mddev->barrier) 234 if (!mddev->suspended)
235 break; 235 break;
236 rcu_read_unlock(); 236 rcu_read_unlock();
237 schedule(); 237 schedule();
@@ -282,40 +282,29 @@ EXPORT_SYMBOL_GPL(mddev_resume);
282 282
283int mddev_congested(mddev_t *mddev, int bits) 283int mddev_congested(mddev_t *mddev, int bits)
284{ 284{
285 if (mddev->barrier)
286 return 1;
287 return mddev->suspended; 285 return mddev->suspended;
288} 286}
289EXPORT_SYMBOL(mddev_congested); 287EXPORT_SYMBOL(mddev_congested);
290 288
291/* 289/*
292 * Generic barrier handling for md 290 * Generic flush handling for md
293 */ 291 */
294 292
295#define POST_REQUEST_BARRIER ((void*)1) 293static void md_end_flush(struct bio *bio, int err)
296
297static void md_end_barrier(struct bio *bio, int err)
298{ 294{
299 mdk_rdev_t *rdev = bio->bi_private; 295 mdk_rdev_t *rdev = bio->bi_private;
300 mddev_t *mddev = rdev->mddev; 296 mddev_t *mddev = rdev->mddev;
301 if (err == -EOPNOTSUPP && mddev->barrier != POST_REQUEST_BARRIER)
302 set_bit(BIO_EOPNOTSUPP, &mddev->barrier->bi_flags);
303 297
304 rdev_dec_pending(rdev, mddev); 298 rdev_dec_pending(rdev, mddev);
305 299
306 if (atomic_dec_and_test(&mddev->flush_pending)) { 300 if (atomic_dec_and_test(&mddev->flush_pending)) {
307 if (mddev->barrier == POST_REQUEST_BARRIER) { 301 /* The pre-request flush has finished */
308 /* This was a post-request barrier */ 302 schedule_work(&mddev->flush_work);
309 mddev->barrier = NULL;
310 wake_up(&mddev->sb_wait);
311 } else
312 /* The pre-request barrier has finished */
313 schedule_work(&mddev->barrier_work);
314 } 303 }
315 bio_put(bio); 304 bio_put(bio);
316} 305}
317 306
318static void submit_barriers(mddev_t *mddev) 307static void submit_flushes(mddev_t *mddev)
319{ 308{
320 mdk_rdev_t *rdev; 309 mdk_rdev_t *rdev;
321 310
@@ -332,60 +321,56 @@ static void submit_barriers(mddev_t *mddev)
332 atomic_inc(&rdev->nr_pending); 321 atomic_inc(&rdev->nr_pending);
333 rcu_read_unlock(); 322 rcu_read_unlock();
334 bi = bio_alloc(GFP_KERNEL, 0); 323 bi = bio_alloc(GFP_KERNEL, 0);
335 bi->bi_end_io = md_end_barrier; 324 bi->bi_end_io = md_end_flush;
336 bi->bi_private = rdev; 325 bi->bi_private = rdev;
337 bi->bi_bdev = rdev->bdev; 326 bi->bi_bdev = rdev->bdev;
338 atomic_inc(&mddev->flush_pending); 327 atomic_inc(&mddev->flush_pending);
339 submit_bio(WRITE_BARRIER, bi); 328 submit_bio(WRITE_FLUSH, bi);
340 rcu_read_lock(); 329 rcu_read_lock();
341 rdev_dec_pending(rdev, mddev); 330 rdev_dec_pending(rdev, mddev);
342 } 331 }
343 rcu_read_unlock(); 332 rcu_read_unlock();
344} 333}
345 334
346static void md_submit_barrier(struct work_struct *ws) 335static void md_submit_flush_data(struct work_struct *ws)
347{ 336{
348 mddev_t *mddev = container_of(ws, mddev_t, barrier_work); 337 mddev_t *mddev = container_of(ws, mddev_t, flush_work);
349 struct bio *bio = mddev->barrier; 338 struct bio *bio = mddev->flush_bio;
350 339
351 atomic_set(&mddev->flush_pending, 1); 340 atomic_set(&mddev->flush_pending, 1);
352 341
353 if (test_bit(BIO_EOPNOTSUPP, &bio->bi_flags)) 342 if (bio->bi_size == 0)
354 bio_endio(bio, -EOPNOTSUPP);
355 else if (bio->bi_size == 0)
356 /* an empty barrier - all done */ 343 /* an empty barrier - all done */
357 bio_endio(bio, 0); 344 bio_endio(bio, 0);
358 else { 345 else {
359 bio->bi_rw &= ~REQ_HARDBARRIER; 346 bio->bi_rw &= ~REQ_FLUSH;
360 if (mddev->pers->make_request(mddev, bio)) 347 if (mddev->pers->make_request(mddev, bio))
361 generic_make_request(bio); 348 generic_make_request(bio);
362 mddev->barrier = POST_REQUEST_BARRIER;
363 submit_barriers(mddev);
364 } 349 }
365 if (atomic_dec_and_test(&mddev->flush_pending)) { 350 if (atomic_dec_and_test(&mddev->flush_pending)) {
366 mddev->barrier = NULL; 351 mddev->flush_bio = NULL;
367 wake_up(&mddev->sb_wait); 352 wake_up(&mddev->sb_wait);
368 } 353 }
369} 354}
370 355
371void md_barrier_request(mddev_t *mddev, struct bio *bio) 356void md_flush_request(mddev_t *mddev, struct bio *bio)
372{ 357{
373 spin_lock_irq(&mddev->write_lock); 358 spin_lock_irq(&mddev->write_lock);
374 wait_event_lock_irq(mddev->sb_wait, 359 wait_event_lock_irq(mddev->sb_wait,
375 !mddev->barrier, 360 !mddev->flush_bio,
376 mddev->write_lock, /*nothing*/); 361 mddev->write_lock, /*nothing*/);
377 mddev->barrier = bio; 362 mddev->flush_bio = bio;
378 spin_unlock_irq(&mddev->write_lock); 363 spin_unlock_irq(&mddev->write_lock);
379 364
380 atomic_set(&mddev->flush_pending, 1); 365 atomic_set(&mddev->flush_pending, 1);
381 INIT_WORK(&mddev->barrier_work, md_submit_barrier); 366 INIT_WORK(&mddev->flush_work, md_submit_flush_data);
382 367
383 submit_barriers(mddev); 368 submit_flushes(mddev);
384 369
385 if (atomic_dec_and_test(&mddev->flush_pending)) 370 if (atomic_dec_and_test(&mddev->flush_pending))
386 schedule_work(&mddev->barrier_work); 371 schedule_work(&mddev->flush_work);
387} 372}
388EXPORT_SYMBOL(md_barrier_request); 373EXPORT_SYMBOL(md_flush_request);
389 374
390/* Support for plugging. 375/* Support for plugging.
391 * This mirrors the plugging support in request_queue, but does not 376 * This mirrors the plugging support in request_queue, but does not
@@ -696,31 +681,6 @@ static void super_written(struct bio *bio, int error)
696 bio_put(bio); 681 bio_put(bio);
697} 682}
698 683
699static void super_written_barrier(struct bio *bio, int error)
700{
701 struct bio *bio2 = bio->bi_private;
702 mdk_rdev_t *rdev = bio2->bi_private;
703 mddev_t *mddev = rdev->mddev;
704
705 if (!test_bit(BIO_UPTODATE, &bio->bi_flags) &&
706 error == -EOPNOTSUPP) {
707 unsigned long flags;
708 /* barriers don't appear to be supported :-( */
709 set_bit(BarriersNotsupp, &rdev->flags);
710 mddev->barriers_work = 0;
711 spin_lock_irqsave(&mddev->write_lock, flags);
712 bio2->bi_next = mddev->biolist;
713 mddev->biolist = bio2;
714 spin_unlock_irqrestore(&mddev->write_lock, flags);
715 wake_up(&mddev->sb_wait);
716 bio_put(bio);
717 } else {
718 bio_put(bio2);
719 bio->bi_private = rdev;
720 super_written(bio, error);
721 }
722}
723
724void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, 684void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
725 sector_t sector, int size, struct page *page) 685 sector_t sector, int size, struct page *page)
726{ 686{
@@ -729,51 +689,28 @@ void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
729 * and decrement it on completion, waking up sb_wait 689 * and decrement it on completion, waking up sb_wait
730 * if zero is reached. 690 * if zero is reached.
731 * If an error occurred, call md_error 691 * If an error occurred, call md_error
732 *
733 * As we might need to resubmit the request if REQ_HARDBARRIER
734 * causes ENOTSUPP, we allocate a spare bio...
735 */ 692 */
736 struct bio *bio = bio_alloc(GFP_NOIO, 1); 693 struct bio *bio = bio_alloc(GFP_NOIO, 1);
737 int rw = REQ_WRITE | REQ_SYNC | REQ_UNPLUG;
738 694
739 bio->bi_bdev = rdev->bdev; 695 bio->bi_bdev = rdev->bdev;
740 bio->bi_sector = sector; 696 bio->bi_sector = sector;
741 bio_add_page(bio, page, size, 0); 697 bio_add_page(bio, page, size, 0);
742 bio->bi_private = rdev; 698 bio->bi_private = rdev;
743 bio->bi_end_io = super_written; 699 bio->bi_end_io = super_written;
744 bio->bi_rw = rw;
745 700
746 atomic_inc(&mddev->pending_writes); 701 atomic_inc(&mddev->pending_writes);
747 if (!test_bit(BarriersNotsupp, &rdev->flags)) { 702 submit_bio(REQ_WRITE | REQ_SYNC | REQ_UNPLUG | REQ_FLUSH | REQ_FUA,
748 struct bio *rbio; 703 bio);
749 rw |= REQ_HARDBARRIER;
750 rbio = bio_clone(bio, GFP_NOIO);
751 rbio->bi_private = bio;
752 rbio->bi_end_io = super_written_barrier;
753 submit_bio(rw, rbio);
754 } else
755 submit_bio(rw, bio);
756} 704}
757 705
758void md_super_wait(mddev_t *mddev) 706void md_super_wait(mddev_t *mddev)
759{ 707{
760 /* wait for all superblock writes that were scheduled to complete. 708 /* wait for all superblock writes that were scheduled to complete */
761 * if any had to be retried (due to BARRIER problems), retry them
762 */
763 DEFINE_WAIT(wq); 709 DEFINE_WAIT(wq);
764 for(;;) { 710 for(;;) {
765 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE); 711 prepare_to_wait(&mddev->sb_wait, &wq, TASK_UNINTERRUPTIBLE);
766 if (atomic_read(&mddev->pending_writes)==0) 712 if (atomic_read(&mddev->pending_writes)==0)
767 break; 713 break;
768 while (mddev->biolist) {
769 struct bio *bio;
770 spin_lock_irq(&mddev->write_lock);
771 bio = mddev->biolist;
772 mddev->biolist = bio->bi_next ;
773 bio->bi_next = NULL;
774 spin_unlock_irq(&mddev->write_lock);
775 submit_bio(bio->bi_rw, bio);
776 }
777 schedule(); 714 schedule();
778 } 715 }
779 finish_wait(&mddev->sb_wait, &wq); 716 finish_wait(&mddev->sb_wait, &wq);
@@ -1070,7 +1007,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1070 clear_bit(Faulty, &rdev->flags); 1007 clear_bit(Faulty, &rdev->flags);
1071 clear_bit(In_sync, &rdev->flags); 1008 clear_bit(In_sync, &rdev->flags);
1072 clear_bit(WriteMostly, &rdev->flags); 1009 clear_bit(WriteMostly, &rdev->flags);
1073 clear_bit(BarriersNotsupp, &rdev->flags);
1074 1010
1075 if (mddev->raid_disks == 0) { 1011 if (mddev->raid_disks == 0) {
1076 mddev->major_version = 0; 1012 mddev->major_version = 0;
@@ -1485,7 +1421,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1485 clear_bit(Faulty, &rdev->flags); 1421 clear_bit(Faulty, &rdev->flags);
1486 clear_bit(In_sync, &rdev->flags); 1422 clear_bit(In_sync, &rdev->flags);
1487 clear_bit(WriteMostly, &rdev->flags); 1423 clear_bit(WriteMostly, &rdev->flags);
1488 clear_bit(BarriersNotsupp, &rdev->flags);
1489 1424
1490 if (mddev->raid_disks == 0) { 1425 if (mddev->raid_disks == 0) {
1491 mddev->major_version = 1; 1426 mddev->major_version = 1;
@@ -4506,7 +4441,6 @@ int md_run(mddev_t *mddev)
4506 /* may be over-ridden by personality */ 4441 /* may be over-ridden by personality */
4507 mddev->resync_max_sectors = mddev->dev_sectors; 4442 mddev->resync_max_sectors = mddev->dev_sectors;
4508 4443
4509 mddev->barriers_work = 1;
4510 mddev->ok_start_degraded = start_dirty_degraded; 4444 mddev->ok_start_degraded = start_dirty_degraded;
4511 4445
4512 if (start_readonly && mddev->ro == 0) 4446 if (start_readonly && mddev->ro == 0)
@@ -4685,7 +4619,6 @@ static void md_clean(mddev_t *mddev)
4685 mddev->recovery = 0; 4619 mddev->recovery = 0;
4686 mddev->in_sync = 0; 4620 mddev->in_sync = 0;
4687 mddev->degraded = 0; 4621 mddev->degraded = 0;
4688 mddev->barriers_work = 0;
4689 mddev->safemode = 0; 4622 mddev->safemode = 0;
4690 mddev->bitmap_info.offset = 0; 4623 mddev->bitmap_info.offset = 0;
4691 mddev->bitmap_info.default_offset = 0; 4624 mddev->bitmap_info.default_offset = 0;