diff options
author | NeilBrown <neilb@suse.de> | 2011-05-11 00:51:19 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2011-05-11 00:51:19 -0400 |
commit | af6d7b760c7547c1a410a333bdb24daed24e1043 (patch) | |
tree | 5789fa005f94911bc6a92a6cb8700b8025a3adc4 /drivers/md | |
parent | 7ca78d57d11a91bc93b35342fa58647b85bedeb1 (diff) |
md/raid1: improve handling of pages allocated for write-behind.
The current handling and freeing of these pages is a bit fragile.
We only keep the list of allocated pages in each bio, so we need to
still have a valid bio when freeing the pages, which is a bit clumsy.
So simply store the allocated page list in the r1_bio so it can easily
be found and freed when we are finished with the r1_bio.
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid1.c | 55 | ||||
-rw-r--r-- | drivers/md/raid1.h | 4 |
2 files changed, 29 insertions, 30 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index b9d6da1272f1..779abbd2bb1d 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -297,23 +297,24 @@ static void raid1_end_read_request(struct bio *bio, int error) | |||
297 | rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); | 297 | rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); |
298 | } | 298 | } |
299 | 299 | ||
300 | static void r1_bio_write_done(r1bio_t *r1_bio, int vcnt, struct bio_vec *bv, | 300 | static void r1_bio_write_done(r1bio_t *r1_bio) |
301 | int behind) | ||
302 | { | 301 | { |
303 | if (atomic_dec_and_test(&r1_bio->remaining)) | 302 | if (atomic_dec_and_test(&r1_bio->remaining)) |
304 | { | 303 | { |
305 | /* it really is the end of this request */ | 304 | /* it really is the end of this request */ |
306 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { | 305 | if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { |
307 | /* free extra copy of the data pages */ | 306 | /* free extra copy of the data pages */ |
308 | int i = vcnt; | 307 | int i = r1_bio->behind_page_count; |
309 | while (i--) | 308 | while (i--) |
310 | safe_put_page(bv[i].bv_page); | 309 | safe_put_page(r1_bio->behind_pages[i]); |
310 | kfree(r1_bio->behind_pages); | ||
311 | r1_bio->behind_pages = NULL; | ||
311 | } | 312 | } |
312 | /* clear the bitmap if all writes complete successfully */ | 313 | /* clear the bitmap if all writes complete successfully */ |
313 | bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, | 314 | bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, |
314 | r1_bio->sectors, | 315 | r1_bio->sectors, |
315 | !test_bit(R1BIO_Degraded, &r1_bio->state), | 316 | !test_bit(R1BIO_Degraded, &r1_bio->state), |
316 | behind); | 317 | test_bit(R1BIO_BehindIO, &r1_bio->state)); |
317 | md_write_end(r1_bio->mddev); | 318 | md_write_end(r1_bio->mddev); |
318 | raid_end_bio_io(r1_bio); | 319 | raid_end_bio_io(r1_bio); |
319 | } | 320 | } |
@@ -386,7 +387,7 @@ static void raid1_end_write_request(struct bio *bio, int error) | |||
386 | * Let's see if all mirrored write operations have finished | 387 | * Let's see if all mirrored write operations have finished |
387 | * already. | 388 | * already. |
388 | */ | 389 | */ |
389 | r1_bio_write_done(r1_bio, bio->bi_vcnt, bio->bi_io_vec, behind); | 390 | r1_bio_write_done(r1_bio); |
390 | 391 | ||
391 | if (to_put) | 392 | if (to_put) |
392 | bio_put(to_put); | 393 | bio_put(to_put); |
@@ -660,37 +661,36 @@ static void unfreeze_array(conf_t *conf) | |||
660 | 661 | ||
661 | 662 | ||
662 | /* duplicate the data pages for behind I/O | 663 | /* duplicate the data pages for behind I/O |
663 | * We return a list of bio_vec rather than just page pointers | ||
664 | * as it makes freeing easier | ||
665 | */ | 664 | */ |
666 | static struct bio_vec *alloc_behind_pages(struct bio *bio) | 665 | static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio) |
667 | { | 666 | { |
668 | int i; | 667 | int i; |
669 | struct bio_vec *bvec; | 668 | struct bio_vec *bvec; |
670 | struct bio_vec *pages = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec), | 669 | struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*), |
671 | GFP_NOIO); | 670 | GFP_NOIO); |
672 | if (unlikely(!pages)) | 671 | if (unlikely(!pages)) |
673 | goto do_sync_io; | 672 | return; |
674 | 673 | ||
675 | bio_for_each_segment(bvec, bio, i) { | 674 | bio_for_each_segment(bvec, bio, i) { |
676 | pages[i].bv_page = alloc_page(GFP_NOIO); | 675 | pages[i] = alloc_page(GFP_NOIO); |
677 | if (unlikely(!pages[i].bv_page)) | 676 | if (unlikely(!pages[i])) |
678 | goto do_sync_io; | 677 | goto do_sync_io; |
679 | memcpy(kmap(pages[i].bv_page) + bvec->bv_offset, | 678 | memcpy(kmap(pages[i]) + bvec->bv_offset, |
680 | kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len); | 679 | kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len); |
681 | kunmap(pages[i].bv_page); | 680 | kunmap(pages[i]); |
682 | kunmap(bvec->bv_page); | 681 | kunmap(bvec->bv_page); |
683 | } | 682 | } |
684 | 683 | r1_bio->behind_pages = pages; | |
685 | return pages; | 684 | r1_bio->behind_page_count = bio->bi_vcnt; |
685 | set_bit(R1BIO_BehindIO, &r1_bio->state); | ||
686 | return; | ||
686 | 687 | ||
687 | do_sync_io: | 688 | do_sync_io: |
688 | if (pages) | 689 | for (i = 0; i < bio->bi_vcnt; i++) |
689 | for (i = 0; i < bio->bi_vcnt && pages[i].bv_page; i++) | 690 | if (pages[i]) |
690 | put_page(pages[i].bv_page); | 691 | put_page(pages[i]); |
691 | kfree(pages); | 692 | kfree(pages); |
692 | PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); | 693 | PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); |
693 | return NULL; | ||
694 | } | 694 | } |
695 | 695 | ||
696 | static int make_request(mddev_t *mddev, struct bio * bio) | 696 | static int make_request(mddev_t *mddev, struct bio * bio) |
@@ -702,7 +702,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
702 | int i, targets = 0, disks; | 702 | int i, targets = 0, disks; |
703 | struct bitmap *bitmap; | 703 | struct bitmap *bitmap; |
704 | unsigned long flags; | 704 | unsigned long flags; |
705 | struct bio_vec *behind_pages = NULL; | ||
706 | const int rw = bio_data_dir(bio); | 705 | const int rw = bio_data_dir(bio); |
707 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 706 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
708 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); | 707 | const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); |
@@ -855,9 +854,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
855 | if (bitmap && | 854 | if (bitmap && |
856 | (atomic_read(&bitmap->behind_writes) | 855 | (atomic_read(&bitmap->behind_writes) |
857 | < mddev->bitmap_info.max_write_behind) && | 856 | < mddev->bitmap_info.max_write_behind) && |
858 | !waitqueue_active(&bitmap->behind_wait) && | 857 | !waitqueue_active(&bitmap->behind_wait)) |
859 | (behind_pages = alloc_behind_pages(bio)) != NULL) | 858 | alloc_behind_pages(bio, r1_bio); |
860 | set_bit(R1BIO_BehindIO, &r1_bio->state); | ||
861 | 859 | ||
862 | atomic_set(&r1_bio->remaining, 1); | 860 | atomic_set(&r1_bio->remaining, 1); |
863 | atomic_set(&r1_bio->behind_remaining, 0); | 861 | atomic_set(&r1_bio->behind_remaining, 0); |
@@ -878,7 +876,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
878 | mbio->bi_rw = WRITE | do_flush_fua | do_sync; | 876 | mbio->bi_rw = WRITE | do_flush_fua | do_sync; |
879 | mbio->bi_private = r1_bio; | 877 | mbio->bi_private = r1_bio; |
880 | 878 | ||
881 | if (behind_pages) { | 879 | if (r1_bio->behind_pages) { |
882 | struct bio_vec *bvec; | 880 | struct bio_vec *bvec; |
883 | int j; | 881 | int j; |
884 | 882 | ||
@@ -890,7 +888,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
890 | * them all | 888 | * them all |
891 | */ | 889 | */ |
892 | __bio_for_each_segment(bvec, mbio, j, 0) | 890 | __bio_for_each_segment(bvec, mbio, j, 0) |
893 | bvec->bv_page = behind_pages[j].bv_page; | 891 | bvec->bv_page = r1_bio->behind_pages[j]; |
894 | if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) | 892 | if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) |
895 | atomic_inc(&r1_bio->behind_remaining); | 893 | atomic_inc(&r1_bio->behind_remaining); |
896 | } | 894 | } |
@@ -900,8 +898,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
900 | bio_list_add(&conf->pending_bio_list, mbio); | 898 | bio_list_add(&conf->pending_bio_list, mbio); |
901 | spin_unlock_irqrestore(&conf->device_lock, flags); | 899 | spin_unlock_irqrestore(&conf->device_lock, flags); |
902 | } | 900 | } |
903 | r1_bio_write_done(r1_bio, bio->bi_vcnt, behind_pages, behind_pages != NULL); | 901 | r1_bio_write_done(r1_bio); |
904 | kfree(behind_pages); /* the behind pages are attached to the bios now */ | ||
905 | 902 | ||
906 | /* In case raid1d snuck in to freeze_array */ | 903 | /* In case raid1d snuck in to freeze_array */ |
907 | wake_up(&conf->wait_barrier); | 904 | wake_up(&conf->wait_barrier); |
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index cbfdf1a6acd9..5fc4ca1af863 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h | |||
@@ -94,7 +94,9 @@ struct r1bio_s { | |||
94 | int read_disk; | 94 | int read_disk; |
95 | 95 | ||
96 | struct list_head retry_list; | 96 | struct list_head retry_list; |
97 | struct bitmap_update *bitmap_update; | 97 | /* Next two are only valid when R1BIO_BehindIO is set */ |
98 | struct page **behind_pages; | ||
99 | int behind_page_count; | ||
98 | /* | 100 | /* |
99 | * if the IO is in WRITE direction, then multiple bios are used. | 101 | * if the IO is in WRITE direction, then multiple bios are used. |
100 | * We choose the number when they are allocated. | 102 | * We choose the number when they are allocated. |