diff options
author | NeilBrown <neilb@suse.de> | 2006-07-10 07:44:17 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-07-10 16:24:17 -0400 |
commit | ae3c20ccf84c88d45616f12122f781a900118f09 (patch) | |
tree | 19d2f11d8a4766182402316632d32bd66d518c55 /drivers/md | |
parent | 7c785b7a18dc30572a49c6b75efd384269735d14 (diff) |
[PATCH] md: fix some small races in bitmap plugging in raid5
The comment gives more details, but I didn't quite have the sequencing write,
so there was room for races to leave bits unset in the on-disk bitmap for
short periods of time.
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/raid5.c | 30 |
1 files changed, 27 insertions, 3 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6ba394082129..56303ff31730 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -18,6 +18,30 @@ | |||
18 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 18 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | /* | ||
22 | * BITMAP UNPLUGGING: | ||
23 | * | ||
24 | * The sequencing for updating the bitmap reliably is a little | ||
25 | * subtle (and I got it wrong the first time) so it deserves some | ||
26 | * explanation. | ||
27 | * | ||
28 | * We group bitmap updates into batches. Each batch has a number. | ||
29 | * We may write out several batches at once, but that isn't very important. | ||
30 | * conf->bm_write is the number of the last batch successfully written. | ||
31 | * conf->bm_flush is the number of the last batch that was closed to | ||
32 | * new additions. | ||
33 | * When we discover that we will need to write to any block in a stripe | ||
34 | * (in add_stripe_bio) we update the in-memory bitmap and record in sh->bm_seq | ||
35 | * the number of the batch it will be in. This is bm_flush+1. | ||
36 | * When we are ready to do a write, if that batch hasn't been written yet, | ||
37 | * we plug the array and queue the stripe for later. | ||
38 | * When an unplug happens, we increment bm_flush, thus closing the current | ||
39 | * batch. | ||
40 | * When we notice that bm_flush > bm_write, we write out all pending updates | ||
41 | * to the bitmap, and advance bm_write to where bm_flush was. | ||
42 | * This may occasionally write a bit out twice, but is sure never to | ||
43 | * miss any bits. | ||
44 | */ | ||
21 | 45 | ||
22 | #include <linux/module.h> | 46 | #include <linux/module.h> |
23 | #include <linux/slab.h> | 47 | #include <linux/slab.h> |
@@ -92,7 +116,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |||
92 | list_add_tail(&sh->lru, &conf->delayed_list); | 116 | list_add_tail(&sh->lru, &conf->delayed_list); |
93 | blk_plug_device(conf->mddev->queue); | 117 | blk_plug_device(conf->mddev->queue); |
94 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && | 118 | } else if (test_bit(STRIPE_BIT_DELAY, &sh->state) && |
95 | conf->seq_write == sh->bm_seq) { | 119 | sh->bm_seq - conf->seq_write > 0) { |
96 | list_add_tail(&sh->lru, &conf->bitmap_list); | 120 | list_add_tail(&sh->lru, &conf->bitmap_list); |
97 | blk_plug_device(conf->mddev->queue); | 121 | blk_plug_device(conf->mddev->queue); |
98 | } else { | 122 | } else { |
@@ -1273,9 +1297,9 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in | |||
1273 | (unsigned long long)sh->sector, dd_idx); | 1297 | (unsigned long long)sh->sector, dd_idx); |
1274 | 1298 | ||
1275 | if (conf->mddev->bitmap && firstwrite) { | 1299 | if (conf->mddev->bitmap && firstwrite) { |
1276 | sh->bm_seq = conf->seq_write; | ||
1277 | bitmap_startwrite(conf->mddev->bitmap, sh->sector, | 1300 | bitmap_startwrite(conf->mddev->bitmap, sh->sector, |
1278 | STRIPE_SECTORS, 0); | 1301 | STRIPE_SECTORS, 0); |
1302 | sh->bm_seq = conf->seq_flush+1; | ||
1279 | set_bit(STRIPE_BIT_DELAY, &sh->state); | 1303 | set_bit(STRIPE_BIT_DELAY, &sh->state); |
1280 | } | 1304 | } |
1281 | 1305 | ||
@@ -2918,7 +2942,7 @@ static void raid5d (mddev_t *mddev) | |||
2918 | while (1) { | 2942 | while (1) { |
2919 | struct list_head *first; | 2943 | struct list_head *first; |
2920 | 2944 | ||
2921 | if (conf->seq_flush - conf->seq_write > 0) { | 2945 | if (conf->seq_flush != conf->seq_write) { |
2922 | int seq = conf->seq_flush; | 2946 | int seq = conf->seq_flush; |
2923 | spin_unlock_irq(&conf->device_lock); | 2947 | spin_unlock_irq(&conf->device_lock); |
2924 | bitmap_unplug(mddev->bitmap); | 2948 | bitmap_unplug(mddev->bitmap); |