aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-22 20:07:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-22 20:07:18 -0400
commita2887097f25cd38cadfc11d10769e2b349fb5eca (patch)
treecd4adcb305365d6ba9acd2c02d4eb9d0125c6f8d /drivers/md/raid1.c
parent8abfc6e7a45eb74e51904bbae676fae008b11366 (diff)
parent005a1d15f5a6b2bb4ada80349513effbf22b4588 (diff)
Merge branch 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.37/barrier' of git://git.kernel.dk/linux-2.6-block: (46 commits) xen-blkfront: disable barrier/flush write support Added blk-lib.c and blk-barrier.c was renamed to blk-flush.c block: remove BLKDEV_IFL_WAIT aic7xxx_old: removed unused 'req' variable block: remove the BH_Eopnotsupp flag block: remove the BLKDEV_IFL_BARRIER flag block: remove the WRITE_BARRIER flag swap: do not send discards as barriers fat: do not send discards as barriers ext4: do not send discards as barriers jbd2: replace barriers with explicit flush / FUA usage jbd2: Modify ASYNC_COMMIT code to not rely on queue draining on barrier jbd: replace barriers with explicit flush / FUA usage nilfs2: replace barriers with explicit flush / FUA usage reiserfs: replace barriers with explicit flush / FUA usage gfs2: replace barriers with explicit flush / FUA usage btrfs: replace barriers with explicit flush / FUA usage xfs: replace barriers with explicit flush / FUA usage block: pass gfp_mask and flags to sb_issue_discard dm: convey that all flushes are processed as empty ...
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c176
1 files changed, 59 insertions, 117 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 0b830bbe1d8b..378a25894c57 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -319,83 +319,74 @@ static void raid1_end_write_request(struct bio *bio, int error)
319 if (r1_bio->bios[mirror] == bio) 319 if (r1_bio->bios[mirror] == bio)
320 break; 320 break;
321 321
322 if (error == -EOPNOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) { 322 /*
323 set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags); 323 * 'one mirror IO has finished' event handler:
324 set_bit(R1BIO_BarrierRetry, &r1_bio->state); 324 */
325 r1_bio->mddev->barriers_work = 0; 325 r1_bio->bios[mirror] = NULL;
326 /* Don't rdev_dec_pending in this branch - keep it for the retry */ 326 to_put = bio;
327 } else { 327 if (!uptodate) {
328 md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
329 /* an I/O failed, we can't clear the bitmap */
330 set_bit(R1BIO_Degraded, &r1_bio->state);
331 } else
328 /* 332 /*
329 * this branch is our 'one mirror IO has finished' event handler: 333 * Set R1BIO_Uptodate in our master bio, so that we
334 * will return a good error code for to the higher
335 * levels even if IO on some other mirrored buffer
336 * fails.
337 *
338 * The 'master' represents the composite IO operation
339 * to user-side. So if something waits for IO, then it
340 * will wait for the 'master' bio.
330 */ 341 */
331 r1_bio->bios[mirror] = NULL; 342 set_bit(R1BIO_Uptodate, &r1_bio->state);
332 to_put = bio; 343
333 if (!uptodate) { 344 update_head_pos(mirror, r1_bio);
334 md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); 345
335 /* an I/O failed, we can't clear the bitmap */ 346 if (behind) {
336 set_bit(R1BIO_Degraded, &r1_bio->state); 347 if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags))
337 } else 348 atomic_dec(&r1_bio->behind_remaining);
338 /* 349
339 * Set R1BIO_Uptodate in our master bio, so that 350 /*
340 * we will return a good error code for to the higher 351 * In behind mode, we ACK the master bio once the I/O
341 * levels even if IO on some other mirrored buffer fails. 352 * has safely reached all non-writemostly
342 * 353 * disks. Setting the Returned bit ensures that this
343 * The 'master' represents the composite IO operation to 354 * gets done only once -- we don't ever want to return
344 * user-side. So if something waits for IO, then it will 355 * -EIO here, instead we'll wait
345 * wait for the 'master' bio. 356 */
346 */ 357 if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) &&
347 set_bit(R1BIO_Uptodate, &r1_bio->state); 358 test_bit(R1BIO_Uptodate, &r1_bio->state)) {
348 359 /* Maybe we can return now */
349 update_head_pos(mirror, r1_bio); 360 if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
350 361 struct bio *mbio = r1_bio->master_bio;
351 if (behind) { 362 PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n",
352 if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) 363 (unsigned long long) mbio->bi_sector,
353 atomic_dec(&r1_bio->behind_remaining); 364 (unsigned long long) mbio->bi_sector +
354 365 (mbio->bi_size >> 9) - 1);
355 /* In behind mode, we ACK the master bio once the I/O has safely 366 bio_endio(mbio, 0);
356 * reached all non-writemostly disks. Setting the Returned bit
357 * ensures that this gets done only once -- we don't ever want to
358 * return -EIO here, instead we'll wait */
359
360 if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) &&
361 test_bit(R1BIO_Uptodate, &r1_bio->state)) {
362 /* Maybe we can return now */
363 if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
364 struct bio *mbio = r1_bio->master_bio;
365 PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n",
366 (unsigned long long) mbio->bi_sector,
367 (unsigned long long) mbio->bi_sector +
368 (mbio->bi_size >> 9) - 1);
369 bio_endio(mbio, 0);
370 }
371 } 367 }
372 } 368 }
373 rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
374 } 369 }
370 rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
371
375 /* 372 /*
376 *
377 * Let's see if all mirrored write operations have finished 373 * Let's see if all mirrored write operations have finished
378 * already. 374 * already.
379 */ 375 */
380 if (atomic_dec_and_test(&r1_bio->remaining)) { 376 if (atomic_dec_and_test(&r1_bio->remaining)) {
381 if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) 377 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
382 reschedule_retry(r1_bio); 378 /* free extra copy of the data pages */
383 else { 379 int i = bio->bi_vcnt;
384 /* it really is the end of this request */ 380 while (i--)
385 if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { 381 safe_put_page(bio->bi_io_vec[i].bv_page);
386 /* free extra copy of the data pages */
387 int i = bio->bi_vcnt;
388 while (i--)
389 safe_put_page(bio->bi_io_vec[i].bv_page);
390 }
391 /* clear the bitmap if all writes complete successfully */
392 bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
393 r1_bio->sectors,
394 !test_bit(R1BIO_Degraded, &r1_bio->state),
395 behind);
396 md_write_end(r1_bio->mddev);
397 raid_end_bio_io(r1_bio);
398 } 382 }
383 /* clear the bitmap if all writes complete successfully */
384 bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
385 r1_bio->sectors,
386 !test_bit(R1BIO_Degraded, &r1_bio->state),
387 behind);
388 md_write_end(r1_bio->mddev);
389 raid_end_bio_io(r1_bio);
399 } 390 }
400 391
401 if (to_put) 392 if (to_put)
@@ -788,16 +779,13 @@ static int make_request(mddev_t *mddev, struct bio * bio)
788 struct page **behind_pages = NULL; 779 struct page **behind_pages = NULL;
789 const int rw = bio_data_dir(bio); 780 const int rw = bio_data_dir(bio);
790 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); 781 const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
791 unsigned long do_barriers; 782 const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
792 mdk_rdev_t *blocked_rdev; 783 mdk_rdev_t *blocked_rdev;
793 784
794 /* 785 /*
795 * Register the new request and wait if the reconstruction 786 * Register the new request and wait if the reconstruction
796 * thread has put up a bar for new requests. 787 * thread has put up a bar for new requests.
797 * Continue immediately if no resync is active currently. 788 * Continue immediately if no resync is active currently.
798 * We test barriers_work *after* md_write_start as md_write_start
799 * may cause the first superblock write, and that will check out
800 * if barriers work.
801 */ 789 */
802 790
803 md_write_start(mddev, bio); /* wait on superblock update early */ 791 md_write_start(mddev, bio); /* wait on superblock update early */
@@ -821,13 +809,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
821 } 809 }
822 finish_wait(&conf->wait_barrier, &w); 810 finish_wait(&conf->wait_barrier, &w);
823 } 811 }
824 if (unlikely(!mddev->barriers_work &&
825 (bio->bi_rw & REQ_HARDBARRIER))) {
826 if (rw == WRITE)
827 md_write_end(mddev);
828 bio_endio(bio, -EOPNOTSUPP);
829 return 0;
830 }
831 812
832 wait_barrier(conf); 813 wait_barrier(conf);
833 814
@@ -959,10 +940,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
959 atomic_set(&r1_bio->remaining, 0); 940 atomic_set(&r1_bio->remaining, 0);
960 atomic_set(&r1_bio->behind_remaining, 0); 941 atomic_set(&r1_bio->behind_remaining, 0);
961 942
962 do_barriers = bio->bi_rw & REQ_HARDBARRIER;
963 if (do_barriers)
964 set_bit(R1BIO_Barrier, &r1_bio->state);
965
966 bio_list_init(&bl); 943 bio_list_init(&bl);
967 for (i = 0; i < disks; i++) { 944 for (i = 0; i < disks; i++) {
968 struct bio *mbio; 945 struct bio *mbio;
@@ -975,7 +952,7 @@ static int make_request(mddev_t *mddev, struct bio * bio)
975 mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; 952 mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset;
976 mbio->bi_bdev = conf->mirrors[i].rdev->bdev; 953 mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
977 mbio->bi_end_io = raid1_end_write_request; 954 mbio->bi_end_io = raid1_end_write_request;
978 mbio->bi_rw = WRITE | do_barriers | do_sync; 955 mbio->bi_rw = WRITE | do_flush_fua | do_sync;
979 mbio->bi_private = r1_bio; 956 mbio->bi_private = r1_bio;
980 957
981 if (behind_pages) { 958 if (behind_pages) {
@@ -1634,41 +1611,6 @@ static void raid1d(mddev_t *mddev)
1634 if (test_bit(R1BIO_IsSync, &r1_bio->state)) { 1611 if (test_bit(R1BIO_IsSync, &r1_bio->state)) {
1635 sync_request_write(mddev, r1_bio); 1612 sync_request_write(mddev, r1_bio);
1636 unplug = 1; 1613 unplug = 1;
1637 } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) {
1638 /* some requests in the r1bio were REQ_HARDBARRIER
1639 * requests which failed with -EOPNOTSUPP. Hohumm..
1640 * Better resubmit without the barrier.
1641 * We know which devices to resubmit for, because
1642 * all others have had their bios[] entry cleared.
1643 * We already have a nr_pending reference on these rdevs.
1644 */
1645 int i;
1646 const unsigned long do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC);
1647 clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
1648 clear_bit(R1BIO_Barrier, &r1_bio->state);
1649 for (i=0; i < conf->raid_disks; i++)
1650 if (r1_bio->bios[i])
1651 atomic_inc(&r1_bio->remaining);
1652 for (i=0; i < conf->raid_disks; i++)
1653 if (r1_bio->bios[i]) {
1654 struct bio_vec *bvec;
1655 int j;
1656
1657 bio = bio_clone(r1_bio->master_bio, GFP_NOIO);
1658 /* copy pages from the failed bio, as
1659 * this might be a write-behind device */
1660 __bio_for_each_segment(bvec, bio, j, 0)
1661 bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page;
1662 bio_put(r1_bio->bios[i]);
1663 bio->bi_sector = r1_bio->sector +
1664 conf->mirrors[i].rdev->data_offset;
1665 bio->bi_bdev = conf->mirrors[i].rdev->bdev;
1666 bio->bi_end_io = raid1_end_write_request;
1667 bio->bi_rw = WRITE | do_sync;
1668 bio->bi_private = r1_bio;
1669 r1_bio->bios[i] = bio;
1670 generic_make_request(bio);
1671 }
1672 } else { 1614 } else {
1673 int disk; 1615 int disk;
1674 1616