diff options
author | NeilBrown <neilb@suse.de> | 2010-03-30 20:21:44 -0400 |
---|---|---|
committer | NeilBrown <neilb@suse.de> | 2010-05-18 01:27:57 -0400 |
commit | e555190d82c0f58e825e3cbd9e6ebe2e7ac713bd (patch) | |
tree | ea9c8e548c4d5eb5fc903bab05af3bda4192ed00 | |
parent | d754c5ae1ff76b20d3ecde8ad666d7865eada8ae (diff) |
md/raid1: delay reads that could overtake behind-writes.
When a raid1 array is configured to support write-behind
on some devices, it normally only reads from other devices.
If all devices are write-behind (because the rest have failed)
it is possible for a read request to be serviced before a
behind-write request, which would appear as data corruption.
So when forced to read from a WriteMostly device, wait for any
write-behind to complete, and don't start any more behind-writes.
Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r-- | drivers/md/bitmap.c | 4 | ||||
-rw-r--r-- | drivers/md/bitmap.h | 1 | ||||
-rw-r--r-- | drivers/md/raid1.c | 25 |
3 files changed, 22 insertions, 8 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 49d6080387c8..c9c6a345e17b 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -1356,7 +1356,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto | |||
1356 | { | 1356 | { |
1357 | if (!bitmap) return; | 1357 | if (!bitmap) return; |
1358 | if (behind) { | 1358 | if (behind) { |
1359 | atomic_dec(&bitmap->behind_writes); | 1359 | if (atomic_dec_and_test(&bitmap->behind_writes)) |
1360 | wake_up(&bitmap->behind_wait); | ||
1360 | PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n", | 1361 | PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n", |
1361 | atomic_read(&bitmap->behind_writes), bitmap->max_write_behind); | 1362 | atomic_read(&bitmap->behind_writes), bitmap->max_write_behind); |
1362 | } | 1363 | } |
@@ -1680,6 +1681,7 @@ int bitmap_create(mddev_t *mddev) | |||
1680 | atomic_set(&bitmap->pending_writes, 0); | 1681 | atomic_set(&bitmap->pending_writes, 0); |
1681 | init_waitqueue_head(&bitmap->write_wait); | 1682 | init_waitqueue_head(&bitmap->write_wait); |
1682 | init_waitqueue_head(&bitmap->overflow_wait); | 1683 | init_waitqueue_head(&bitmap->overflow_wait); |
1684 | init_waitqueue_head(&bitmap->behind_wait); | ||
1683 | 1685 | ||
1684 | bitmap->mddev = mddev; | 1686 | bitmap->mddev = mddev; |
1685 | 1687 | ||
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index aa82b7caa85f..3797dea4723a 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h | |||
@@ -240,6 +240,7 @@ struct bitmap { | |||
240 | atomic_t pending_writes; /* pending writes to the bitmap file */ | 240 | atomic_t pending_writes; /* pending writes to the bitmap file */ |
241 | wait_queue_head_t write_wait; | 241 | wait_queue_head_t write_wait; |
242 | wait_queue_head_t overflow_wait; | 242 | wait_queue_head_t overflow_wait; |
243 | wait_queue_head_t behind_wait; | ||
243 | 244 | ||
244 | struct sysfs_dirent *sysfs_can_clear; | 245 | struct sysfs_dirent *sysfs_can_clear; |
245 | }; | 246 | }; |
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 2e08e48b02d9..cb2da87ad593 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -857,6 +857,15 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
857 | } | 857 | } |
858 | mirror = conf->mirrors + rdisk; | 858 | mirror = conf->mirrors + rdisk; |
859 | 859 | ||
860 | if (test_bit(WriteMostly, &mirror->rdev->flags) && | ||
861 | bitmap) { | ||
862 | /* Reading from a write-mostly device must | ||
863 | * take care not to over-take any writes | ||
864 | * that are 'behind' | ||
865 | */ | ||
866 | wait_event(bitmap->behind_wait, | ||
867 | atomic_read(&bitmap->behind_writes) == 0); | ||
868 | } | ||
860 | r1_bio->read_disk = rdisk; | 869 | r1_bio->read_disk = rdisk; |
861 | 870 | ||
862 | read_bio = bio_clone(bio, GFP_NOIO); | 871 | read_bio = bio_clone(bio, GFP_NOIO); |
@@ -934,10 +943,14 @@ static int make_request(mddev_t *mddev, struct bio * bio) | |||
934 | set_bit(R1BIO_Degraded, &r1_bio->state); | 943 | set_bit(R1BIO_Degraded, &r1_bio->state); |
935 | } | 944 | } |
936 | 945 | ||
937 | /* do behind I/O ? */ | 946 | /* do behind I/O ? |
947 | * Not if there are too many, or cannot allocate memory, | ||
948 | * or a reader on WriteMostly is waiting for behind writes | ||
949 | * to flush */ | ||
938 | if (bitmap && | 950 | if (bitmap && |
939 | (atomic_read(&bitmap->behind_writes) | 951 | (atomic_read(&bitmap->behind_writes) |
940 | < mddev->bitmap_info.max_write_behind) && | 952 | < mddev->bitmap_info.max_write_behind) && |
953 | !waitqueue_active(&bitmap->behind_wait) && | ||
941 | (behind_pages = alloc_behind_pages(bio)) != NULL) | 954 | (behind_pages = alloc_behind_pages(bio)) != NULL) |
942 | set_bit(R1BIO_BehindIO, &r1_bio->state); | 955 | set_bit(R1BIO_BehindIO, &r1_bio->state); |
943 | 956 | ||
@@ -2144,15 +2157,13 @@ static int stop(mddev_t *mddev) | |||
2144 | { | 2157 | { |
2145 | conf_t *conf = mddev->private; | 2158 | conf_t *conf = mddev->private; |
2146 | struct bitmap *bitmap = mddev->bitmap; | 2159 | struct bitmap *bitmap = mddev->bitmap; |
2147 | int behind_wait = 0; | ||
2148 | 2160 | ||
2149 | /* wait for behind writes to complete */ | 2161 | /* wait for behind writes to complete */ |
2150 | while (bitmap && atomic_read(&bitmap->behind_writes) > 0) { | 2162 | if (bitmap && atomic_read(&bitmap->behind_writes) > 0) { |
2151 | behind_wait++; | 2163 | printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop.\n", mdname(mddev)); |
2152 | printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop (%d)\n", mdname(mddev), behind_wait); | ||
2153 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
2154 | schedule_timeout(HZ); /* wait a second */ | ||
2155 | /* need to kick something here to make sure I/O goes? */ | 2164 | /* need to kick something here to make sure I/O goes? */ |
2165 | wait_event(bitmap->behind_wait, | ||
2166 | atomic_read(&bitmap->behind_writes) == 0); | ||
2156 | } | 2167 | } |
2157 | 2168 | ||
2158 | raise_barrier(conf); | 2169 | raise_barrier(conf); |