diff options
author | NeilBrown <neilb@suse.de> | 2010-10-18 21:54:01 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2010-10-28 02:34:07 -0400 |
commit | 4e78064f42ad474ce9c31760861f7fb0cfc22532 (patch) | |
tree | 3a1abaa98ebcbd62eacfbe95d72e44195fb3bc1f /drivers/md/raid10.c | |
parent | e804ac780e2f01cb3b914daca2fd4780d1743db1 (diff) |
md: Fix possible deadlock with multiple mempool allocations.
It is not safe to allocate from a mempool while holding an item
previously allocated from that mempool as that can deadlock when the
mempool is close to exhaustion.
So don't use a bio list to collect the bios to write to multiple
devices in raid1 and raid10.
Instead queue each bio as it becomes available so an unplug will
activate all previously allocated bios and so a new bio has a chance
of being allocated.
This means we must set the 'remaining' count to '1' before submitting
any requests, then when all are submitted, decrement 'remaining' and
possible handle the write completion at that point.
Reported-by: Torsten Kaiser <just.for.lkml@googlemail.com>
Tested-by: Torsten Kaiser <just.for.lkml@googlemail.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md/raid10.c')
-rw-r--r-- | drivers/md/raid10.c | 25 |
1 files changed, 12 insertions, 13 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 387fe4b4fab7..8f5543a62416 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -801,7 +801,6 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
801 | const int rw = bio_data_dir(bio); | 801 | const int rw = bio_data_dir(bio); |
802 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); | 802 | const unsigned long do_sync = (bio->bi_rw & REQ_SYNC); |
803 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); | 803 | const unsigned long do_fua = (bio->bi_rw & REQ_FUA); |
804 | struct bio_list bl; | ||
805 | unsigned long flags; | 804 | unsigned long flags; |
806 | mdk_rdev_t *blocked_rdev; | 805 | mdk_rdev_t *blocked_rdev; |
807 | 806 | ||
@@ -950,9 +949,9 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
950 | goto retry_write; | 949 | goto retry_write; |
951 | } | 950 | } |
952 | 951 | ||
953 | atomic_set(&r10_bio->remaining, 0); | 952 | atomic_set(&r10_bio->remaining, 1); |
953 | bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0); | ||
954 | 954 | ||
955 | bio_list_init(&bl); | ||
956 | for (i = 0; i < conf->copies; i++) { | 955 | for (i = 0; i < conf->copies; i++) { |
957 | struct bio *mbio; | 956 | struct bio *mbio; |
958 | int d = r10_bio->devs[i].devnum; | 957 | int d = r10_bio->devs[i].devnum; |
@@ -970,22 +969,22 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
970 | mbio->bi_private = r10_bio; | 969 | mbio->bi_private = r10_bio; |
971 | 970 | ||
972 | atomic_inc(&r10_bio->remaining); | 971 | atomic_inc(&r10_bio->remaining); |
973 | bio_list_add(&bl, mbio); | 972 | spin_lock_irqsave(&conf->device_lock, flags); |
973 | bio_list_add(&conf->pending_bio_list, mbio); | ||
974 | blk_plug_device(mddev->queue); | ||
975 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
974 | } | 976 | } |
975 | 977 | ||
976 | if (unlikely(!atomic_read(&r10_bio->remaining))) { | 978 | if (atomic_dec_and_test(&r10_bio->remaining)) { |
977 | /* the array is dead */ | 979 | /* This matches the end of raid10_end_write_request() */ |
980 | bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, | ||
981 | r10_bio->sectors, | ||
982 | !test_bit(R10BIO_Degraded, &r10_bio->state), | ||
983 | 0); | ||
978 | md_write_end(mddev); | 984 | md_write_end(mddev); |
979 | raid_end_bio_io(r10_bio); | 985 | raid_end_bio_io(r10_bio); |
980 | return 0; | ||
981 | } | 986 | } |
982 | 987 | ||
983 | bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0); | ||
984 | spin_lock_irqsave(&conf->device_lock, flags); | ||
985 | bio_list_merge(&conf->pending_bio_list, &bl); | ||
986 | blk_plug_device(mddev->queue); | ||
987 | spin_unlock_irqrestore(&conf->device_lock, flags); | ||
988 | |||
989 | /* In case raid10d snuck in to freeze_array */ | 988 | /* In case raid10d snuck in to freeze_array */ |
990 | wake_up(&conf->wait_barrier); | 989 | wake_up(&conf->wait_barrier); |
991 | 990 | ||