aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2006-07-10 07:44:17 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-07-10 16:24:17 -0400
commitae3c20ccf84c88d45616f12122f781a900118f09 (patch)
tree19d2f11d8a4766182402316632d32bd66d518c55 /drivers/md/raid5.c
parent7c785b7a18dc30572a49c6b75efd384269735d14 (diff)
[PATCH] md: fix some small races in bitmap plugging in raid5
The comment gives more details, but I didn't quite have the sequencing write, so there was room for races to leave bits unset in the on-disk bitmap for short periods of time. Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c30
1 files changed, 27 insertions, 3 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6ba394082129..56303ff31730 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -18,6 +18,30 @@
18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 */ 19 */
20 20
21/*
22 * BITMAP UNPLUGGING:
23 *
24 * The sequencing for updating the bitmap reliably is a little
25 * subtle (and I got it wrong the first time) so it deserves some
26 * explanation.
27 *
28 * We group bitmap updates into batches. Each batch has a number.
29 * We may write out several batches at once, but that isn't very important.
30 * conf->bm_write is the number of the last batch successfully written.
31 * conf->bm_flush is the number of the last batch that was closed to
32 * new additions.
33 * When we discover that we will need to write to any block in a stripe
34 * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq
35 * the number of the batch it will be in. This is bm_flush+1.
36 * When we are ready to do a write, if that batch hasn't been written yet,
37 * we plug the array and queue the stripe for later.
38 * When an unplug happens, we increment bm_flush, thus closing the current
39 * batch.
40 * When we notice that bm_flush > bm_write, we write out all pending updates
41 * to the bitmap, and advance bm_write to where bm_flush was.
42 * This may occasionally write a bit out twice, but is sure never to
43 * miss any bits.
44 */
21 45
22#include <linux/module.h> 46#include <linux/module.h>
23#include <linux/slab.h> 47#include <linux/slab.h>
@@ -92,7 +116,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh)
92 list_add_tail(&sh->lru, &conf->delayed_list); 116 list_add_tail(&sh->lru, &conf->delayed_list);
93 blk_plug_device(conf->mddev->queue); 117 blk_plug_device(conf->mddev->queue);
94 } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && 118 } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
95 conf->seq_write == sh->bm_seq) { 119 sh->bm_seq - conf->seq_write > 0) {
96 list_add_tail(&sh->lru, &conf->bitmap_list); 120 list_add_tail(&sh->lru, &conf->bitmap_list);
97 blk_plug_device(conf->mddev->queue); 121 blk_plug_device(conf->mddev->queue);
98 } else { 122 } else {
@@ -1273,9 +1297,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
1273 (unsigned long long)sh->sector, dd_idx); 1297 (unsigned long long)sh->sector, dd_idx);
1274 1298
1275 if (conf->mddev->bitmap && firstwrite) { 1299 if (conf->mddev->bitmap && firstwrite) {
1276 sh->bm_seq = conf->seq_write;
1277 bitmap_startwrite(conf->mddev->bitmap, sh->sector, 1300 bitmap_startwrite(conf->mddev->bitmap, sh->sector,
1278 STRIPE_SECTORS, 0); 1301 STRIPE_SECTORS, 0);
1302 sh->bm_seq = conf->seq_flush+1;
1279 set_bit(STRIPE_BIT_DELAY, &sh->state); 1303 set_bit(STRIPE_BIT_DELAY, &sh->state);
1280 } 1304 }
1281 1305
@@ -2918,7 +2942,7 @@ static void raid5d (mddev_t *mddev)
2918 while (1) { 2942 while (1) {
2919 struct list_head *first; 2943 struct list_head *first;
2920 2944
2921 if (conf->seq_flush - conf->seq_write > 0) { 2945 if (conf->seq_flush != conf->seq_write) {
2922 int seq = conf->seq_flush; 2946 int seq = conf->seq_flush;
2923 spin_unlock_irq(&conf->device_lock); 2947 spin_unlock_irq(&conf->device_lock);
2924 bitmap_unplug(mddev->bitmap); 2948 bitmap_unplug(mddev->bitmap);