diff options
Diffstat (limited to 'drivers/md/raid1.c')
| -rw-r--r-- | drivers/md/raid1.c | 176 |
1 files changed, 59 insertions, 117 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0b830bbe1d8b..378a25894c57 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
| @@ -319,83 +319,74 @@ static void raid1_end_write_request(struct bio *bio, int error) | |||
| 319 | if (r1_bio->bios[mirror] == bio) | 319 | if (r1_bio->bios[mirror] == bio) |
| 320 | break; | 320 | break; |
| 321 | 321 | ||
| 322 | if (error == -EOPNOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) { | 322 | /* |
| 323 | set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags); | 323 | * 'one mirror IO has finished' event handler: |
| 324 | set_bit(R1BIO_BarrierRetry, &r1_bio->state); | 324 | */ |
| 325 | r1_bio->mddev->barriers_work = 0; | 325 | r1_bio->bios[mirror] = NULL; |
| 326 | /* Don't rdev_dec_pending in this branch - keep it for the retry */ | 326 | to_put = bio; |
| 327 | } else { | 327 | if (!uptodate) { |
| 328 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); | ||
| 329 | /* an I/O failed, we can't clear the bitmap */ | ||
| 330 | set_bit(R1BIO_Degraded, &r1_bio->state); | ||
| 331 | } else | ||
| 328 | /* | 332 | /* |
| 329 | * this branch is our 'one mirror IO has finished' event handler: | 333 | * Set R1BIO_Uptodate in our master bio, so that we |
| 334 | * will return a good error code for to the higher | ||
| 335 | * levels even if IO on some other mirrored buffer | ||
| 336 | * fails. | ||
| 337 | * | ||
| 338 | * The 'master' represents the composite IO operation | ||
| 339 | * to user-side. So if something waits for IO, then it | ||
| 340 | * will wait for the 'master' bio. | ||
| 330 | */ | 341 | */ |
| 331 | r1_bio->bios[mirror] = NULL; | 342 | set_bit(R1BIO_Uptodate, &r1_bio->state); |
| 332 | to_put = bio; | 343 | |
| 333 | if (!uptodate) { | 344 | update_head_pos(mirror, r1_bio); |
| 334 | md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); | 345 | |
| 335 | /* an I/O failed, we can't clear the bitmap */ | 346 | if (behind) { |
| 336 | set_bit(R1BIO_Degraded, &r1_bio->state); | 347 | if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) |
| 337 | } else | 348 | atomic_dec(&r1_bio->behind_remaining); |
| 338 | /* | 349 | |
| 339 | * Set R1BIO_Uptodate in our master bio, so that | 350 | /* |
| 340 | * we will return a good error code for to the higher | 351 | * In behind mode, we ACK the master bio once the I/O |
| 341 | * levels even if IO on some other mirrored buffer fails. | 352 | * has safely reached all non-writemostly |
| 342 | * | 353 | * disks. Setting the Returned bit ensures that this |
| 343 | * The 'master' represents the composite IO operation to | 354 | * gets done only once -- we don't ever want to return |
| 344 | * user-side. So if something waits for IO, then it will | 355 | * -EIO here, instead we'll wait |
| 345 | * wait for the 'master' bio. | 356 | */ |
| 346 | */ | 357 | if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && |
| 347 | set_bit(R1BIO_Uptodate, &r1_bio->state); | 358 | test_bit(R1BIO_Uptodate, &r1_bio->state)) { |
| 348 | 359 | /* Maybe we can return now */ | |
| 349 | update_head_pos(mirror, r1_bio); | 360 | if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { |
| 350 | 361 | struct bio *mbio = r1_bio->master_bio; | |
| 351 | if (behind) { | 362 | PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", |
| 352 | if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags)) | 363 | (unsigned long long) mbio->bi_sector, |
| 353 | atomic_dec(&r1_bio->behind_remaining); | 364 | (unsigned long long) mbio->bi_sector + |
| 354 | 365 | (mbio->bi_size >> 9) - 1); | |
| 355 | /* In behind mode, we ACK the master bio once the I/O has safely | 366 | bio_endio(mbio, 0); |
| 356 | * reached all non-writemostly disks. Setting the Returned bit | ||
| 357 | * ensures that this gets done only once -- we don't ever want to | ||
| 358 | * return -EIO here, instead we'll wait */ | ||
| 359 | |||
| 360 | if (atomic_read(&r1_bio->behind_remaining) >= (atomic_read(&r1_bio->remaining)-1) && | ||
| 361 | test_bit(R1BIO_Uptodate, &r1_bio->state)) { | ||
| 362 | /* Maybe we can return now */ | ||
| 363 | if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { | ||
| 364 | struct bio *mbio = r1_bio->master_bio; | ||
| 365 | PRINTK(KERN_DEBUG "raid1: behind end write sectors %llu-%llu\n", | ||
| 366 | (unsigned long long) mbio->bi_sector, | ||
| 367 | (unsigned long long) mbio->bi_sector + | ||
| 368 | (mbio->bi_size >> 9) - 1); | ||
| 369 | bio_endio(mbio, 0); | ||
| 370 | } | ||
| 371 | } | 367 | } |
| 372 | } | 368 | } |
| 373 | rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); | ||
| 374 | } | 369 | } |
| 370 | rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); | ||
| 371 | |||
| 375 | /* | 372 | /* |
| 376 | * | ||
| 377 | * Let's see if all mirrored write operations have finished | 373 | * Let's see if all mirrored write operations have finished |
| 378 | * already. | 374 | * already. |
| 379 | */ | 375 | */ |
| 380 | if (atomic_dec_and_test(&r1_bio->remaining)) { | 376 | if (atomic_dec_and_test(&r1_bio->remaining)) { |
| 381 | if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) | 377 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { |
| 382 | reschedule_retry(r1_bio); | 378 | /* free extra copy of the data pages */ |
| 383 | else { | 379 | int i = bio->bi_vcnt; |
| 384 | /* it really is the end of this request */ | 380 | while (i--) |
| 385 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { | 381 | safe_put_page(bio->bi_io_vec[i].bv_page); |
| 386 | /* free extra copy of the data pages */ | ||
| 387 | int i = bio->bi_vcnt; | ||
| 388 | while (i--) | ||
| 389 | safe_put_page(bio->bi_io_vec[i].bv_page); | ||
| 390 | } | ||
| 391 | /* clear the bitmap if all writes complete successfully */ | ||
| 392 | bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, | ||
| 393 | r1_bio->sectors, | ||
| 394 | !test_bit(R1BIO_Degraded, &r1_bio->state), | ||
| 395 | behind); | ||
| 396 | md_write_end(r1_bio->mddev); | ||
| 397 | raid_end_bio_io(r1_bio); | ||
| 398 | } | 382 | } |
| 383 | /* clear the bitmap if all writes complete successfully */ | ||
| 384 | bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, | ||
| 385 | r1_bio->sectors, | ||
| 386 | !test_bit(R1BIO_Degraded, &r1_bio->state), | ||
| 387 | behind); | ||
| 388 | md_write_end(r1_bio->mddev); | ||
| 389 | raid_end_bio_io(r1_bio); | ||
| 399 | } | 390 | } |
| 400 | 391 | ||
| 401 | if (to_put) | 392 | if (to_put) |
| @@ -788,16 +779,13 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
| 788 | struct page **behind_pages = NULL; | 779 | struct page **behind_pages = NULL; |
| 789 | const int rw = bio_data_dir(bio); | 780 | const int rw = bio_data_dir(bio); |
| 790 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 781 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
| 791 | unsigned long do_barriers; | 782 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); |
| 792 | mdk_rdev_t *blocked_rdev; | 783 | mdk_rdev_t *blocked_rdev; |
| 793 | 784 | ||
| 794 | /* | 785 | /* |
| 795 | * Register the new request and wait if the reconstruction | 786 | * Register the new request and wait if the reconstruction |
| 796 | * thread has put up a bar for new requests. | 787 | * thread has put up a bar for new requests. |
| 797 | * Continue immediately if no resync is active currently. | 788 | * Continue immediately if no resync is active currently. |
| 798 | * We test barriers_work *after* md_write_start as md_write_start | ||
| 799 | * may cause the first superblock write, and that will check out | ||
| 800 | * if barriers work. | ||
| 801 | */ | 789 | */ |
| 802 | 790 | ||
| 803 | md_write_start(mddev, bio); /* wait on superblock update early */ | 791 | md_write_start(mddev, bio); /* wait on superblock update early */ |
| @@ -821,13 +809,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
| 821 | } | 809 | } |
| 822 | finish_wait(&conf->wait_barrier, &w); | 810 | finish_wait(&conf->wait_barrier, &w); |
| 823 | } | 811 | } |
| 824 | if (unlikely(!mddev->barriers_work && | ||
| 825 | (bio->bi_rw & REQ_HARDBARRIER))) { | ||
| 826 | if (rw == WRITE) | ||
| 827 | md_write_end(mddev); | ||
| 828 | bio_endio(bio, -EOPNOTSUPP); | ||
| 829 | return 0; | ||
| 830 | } | ||
| 831 | 812 | ||
| 832 | wait_barrier(conf); | 813 | wait_barrier(conf); |
| 833 | 814 | ||
| @@ -959,10 +940,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
| 959 | atomic_set(&r1_bio->remaining, 0); | 940 | atomic_set(&r1_bio->remaining, 0); |
| 960 | atomic_set(&r1_bio->behind_remaining, 0); | 941 | atomic_set(&r1_bio->behind_remaining, 0); |
| 961 | 942 | ||
| 962 | do_barriers = bio->bi_rw & REQ_HARDBARRIER; | ||
| 963 | if (do_barriers) | ||
| 964 | set_bit(R1BIO_Barrier, &r1_bio->state); | ||
| 965 | |||
| 966 | bio_list_init(&bl); | 943 | bio_list_init(&bl); |
| 967 | for (i = 0; i < disks; i++) { | 944 | for (i = 0; i < disks; i++) { |
| 968 | struct bio *mbio; | 945 | struct bio *mbio; |
| @@ -975,7 +952,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
| 975 | mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; | 952 | mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; |
| 976 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; | 953 | mbio->bi_bdev = conf->mirrors[i].rdev->bdev; |
| 977 | mbio->bi_end_io = raid1_end_write_request; | 954 | mbio->bi_end_io = raid1_end_write_request; |
| 978 | mbio->bi_rw = WRITE | do_barriers | do_sync; | 955 | mbio->bi_rw = WRITE | do_flush_fua | do_sync; |
| 979 | mbio->bi_private = r1_bio; | 956 | mbio->bi_private = r1_bio; |
| 980 | 957 | ||
| 981 | if (behind_pages) { | 958 | if (behind_pages) { |
| @@ -1634,41 +1611,6 @@ static void raid1d(mddev_t *mddev) | |||
| 1634 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) { | 1611 | if (test_bit(R1BIO_IsSync, &r1_bio->state)) { |
| 1635 | sync_request_write(mddev, r1_bio); | 1612 | sync_request_write(mddev, r1_bio); |
| 1636 | unplug = 1; | 1613 | unplug = 1; |
| 1637 | } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { | ||
| 1638 | /* some requests in the r1bio were REQ_HARDBARRIER | ||
| 1639 | * requests which failed with -EOPNOTSUPP. Hohumm.. | ||
| 1640 | * Better resubmit without the barrier. | ||
| 1641 | * We know which devices to resubmit for, because | ||
| 1642 | * all others have had their bios[] entry cleared. | ||
| 1643 | * We already have a nr_pending reference on these rdevs. | ||
| 1644 | */ | ||
| 1645 | int i; | ||
| 1646 | const unsigned long do_sync = (r1_bio->master_bio->bi_rw & REQ_SYNC); | ||
| 1647 | clear_bit(R1BIO_BarrierRetry, &r1_bio->state); | ||
| 1648 | clear_bit(R1BIO_Barrier, &r1_bio->state); | ||
| 1649 | for (i=0; i < conf->raid_disks; i++) | ||
| 1650 | if (r1_bio->bios[i]) | ||
| 1651 | atomic_inc(&r1_bio->remaining); | ||
| 1652 | for (i=0; i < conf->raid_disks; i++) | ||
| 1653 | if (r1_bio->bios[i]) { | ||
| 1654 | struct bio_vec *bvec; | ||
| 1655 | int j; | ||
| 1656 | |||
| 1657 | bio = bio_clone(r1_bio->master_bio, GFP_NOIO); | ||
| 1658 | /* copy pages from the failed bio, as | ||
| 1659 | * this might be a write-behind device */ | ||
| 1660 | __bio_for_each_segment(bvec, bio, j, 0) | ||
| 1661 | bvec->bv_page = bio_iovec_idx(r1_bio->bios[i], j)->bv_page; | ||
| 1662 | bio_put(r1_bio->bios[i]); | ||
| 1663 | bio->bi_sector = r1_bio->sector + | ||
| 1664 | conf->mirrors[i].rdev->data_offset; | ||
| 1665 | bio->bi_bdev = conf->mirrors[i].rdev->bdev; | ||
| 1666 | bio->bi_end_io = raid1_end_write_request; | ||
| 1667 | bio->bi_rw = WRITE | do_sync; | ||
| 1668 | bio->bi_private = r1_bio; | ||
| 1669 | r1_bio->bios[i] = bio; | ||
| 1670 | generic_make_request(bio); | ||
| 1671 | } | ||
| 1672 | } else { | 1614 | } else { |
| 1673 | int disk; | 1615 | int disk; |
| 1674 | 1616 | ||
