aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2010-03-30 20:21:44 -0400
committerNeilBrown <neilb@suse.de>2010-05-18 01:27:57 -0400
commite555190d82c0f58e825e3cbd9e6ebe2e7ac713bd (patch)
treeea9c8e548c4d5eb5fc903bab05af3bda4192ed00
parentd754c5ae1ff76b20d3ecde8ad666d7865eada8ae (diff)
md/raid1: delay reads that could overtake behind-writes.
When a raid1 array is configured to support write-behind on some devices, it normally only reads from other devices. If all devices are write-behind (because the rest have failed) it is possible for a read request to be serviced before a behind-write request, which would appear as data corruption. So when forced to read from a WriteMostly device, wait for any write-behind to complete, and don't start any more behind-writes. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/bitmap.c4
-rw-r--r--drivers/md/bitmap.h1
-rw-r--r--drivers/md/raid1.c25
3 files changed, 22 insertions, 8 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 49d6080387c8..c9c6a345e17b 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1356,7 +1356,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
1356{ 1356{
1357 if (!bitmap) return; 1357 if (!bitmap) return;
1358 if (behind) { 1358 if (behind) {
1359 atomic_dec(&bitmap->behind_writes); 1359 if (atomic_dec_and_test(&bitmap->behind_writes))
1360 wake_up(&bitmap->behind_wait);
1360 PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n", 1361 PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n",
1361 atomic_read(&bitmap->behind_writes), bitmap->max_write_behind); 1362 atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
1362 } 1363 }
@@ -1680,6 +1681,7 @@ int bitmap_create(mddev_t *mddev)
1680 atomic_set(&bitmap->pending_writes, 0); 1681 atomic_set(&bitmap->pending_writes, 0);
1681 init_waitqueue_head(&bitmap->write_wait); 1682 init_waitqueue_head(&bitmap->write_wait);
1682 init_waitqueue_head(&bitmap->overflow_wait); 1683 init_waitqueue_head(&bitmap->overflow_wait);
1684 init_waitqueue_head(&bitmap->behind_wait);
1683 1685
1684 bitmap->mddev = mddev; 1686 bitmap->mddev = mddev;
1685 1687
diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
index aa82b7caa85f..3797dea4723a 100644
--- a/drivers/md/bitmap.h
+++ b/drivers/md/bitmap.h
@@ -240,6 +240,7 @@ struct bitmap {
240 atomic_t pending_writes; /* pending writes to the bitmap file */ 240 atomic_t pending_writes; /* pending writes to the bitmap file */
241 wait_queue_head_t write_wait; 241 wait_queue_head_t write_wait;
242 wait_queue_head_t overflow_wait; 242 wait_queue_head_t overflow_wait;
243 wait_queue_head_t behind_wait;
243 244
244 struct sysfs_dirent *sysfs_can_clear; 245 struct sysfs_dirent *sysfs_can_clear;
245}; 246};
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 2e08e48b02d9..cb2da87ad593 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -857,6 +857,15 @@ static int make_request(mddev_t *mddev, struct bio * bio)
857 } 857 }
858 mirror = conf->mirrors + rdisk; 858 mirror = conf->mirrors + rdisk;
859 859
860 if (test_bit(WriteMostly, &mirror->rdev->flags) &&
861 bitmap) {
862 /* Reading from a write-mostly device must
863 * take care not to over-take any writes
864 * that are 'behind'
865 */
866 wait_event(bitmap->behind_wait,
867 atomic_read(&bitmap->behind_writes) == 0);
868 }
860 r1_bio->read_disk = rdisk; 869 r1_bio->read_disk = rdisk;
861 870
862 read_bio = bio_clone(bio, GFP_NOIO); 871 read_bio = bio_clone(bio, GFP_NOIO);
@@ -934,10 +943,14 @@ static int make_request(mddev_t *mddev, struct bio * bio)
934 set_bit(R1BIO_Degraded, &r1_bio->state); 943 set_bit(R1BIO_Degraded, &r1_bio->state);
935 } 944 }
936 945
937 /* do behind I/O ? */ 946 /* do behind I/O ?
947 * Not if there are too many, or cannot allocate memory,
948 * or a reader on WriteMostly is waiting for behind writes
949 * to flush */
938 if (bitmap && 950 if (bitmap &&
939 (atomic_read(&bitmap->behind_writes) 951 (atomic_read(&bitmap->behind_writes)
940 < mddev->bitmap_info.max_write_behind) && 952 < mddev->bitmap_info.max_write_behind) &&
953 !waitqueue_active(&bitmap->behind_wait) &&
941 (behind_pages = alloc_behind_pages(bio)) != NULL) 954 (behind_pages = alloc_behind_pages(bio)) != NULL)
942 set_bit(R1BIO_BehindIO, &r1_bio->state); 955 set_bit(R1BIO_BehindIO, &r1_bio->state);
943 956
@@ -2144,15 +2157,13 @@ static int stop(mddev_t *mddev)
2144{ 2157{
2145 conf_t *conf = mddev->private; 2158 conf_t *conf = mddev->private;
2146 struct bitmap *bitmap = mddev->bitmap; 2159 struct bitmap *bitmap = mddev->bitmap;
2147 int behind_wait = 0;
2148 2160
2149 /* wait for behind writes to complete */ 2161 /* wait for behind writes to complete */
2150 while (bitmap && atomic_read(&bitmap->behind_writes) > 0) { 2162 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
2151 behind_wait++; 2163 printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop.\n", mdname(mddev));
2152 printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop (%d)\n", mdname(mddev), behind_wait);
2153 set_current_state(TASK_UNINTERRUPTIBLE);
2154 schedule_timeout(HZ); /* wait a second */
2155 /* need to kick something here to make sure I/O goes? */ 2164 /* need to kick something here to make sure I/O goes? */
2165 wait_event(bitmap->behind_wait,
2166 atomic_read(&bitmap->behind_writes) == 0);
2156 } 2167 }
2157 2168
2158 raise_barrier(conf); 2169 raise_barrier(conf);