From b1581566183f310abbd2d384a9079d4039faca05 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sun, 31 Jul 2005 22:34:50 -0700 Subject: [PATCH] md: make sure raid5/raid6 resync uses correct 'max_sectors' The default resync_max_sector is set to "mddev->size << 1". If the raid-personality-module updates mddev->size, it must update resync_max_sectors too. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid5.c | 1 + drivers/md/raid6main.c | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 4698d5f79575..43f231a467d5 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1653,6 +1653,7 @@ static int run (mddev_t *mddev) /* device size must be a multiple of chunk size */ mddev->size &= ~(mddev->chunk_size/1024 -1); + mddev->resync_max_sectors = mddev->size << 1; if (!conf->chunk_size || conf->chunk_size % 4) { printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index f5ee16805111..495dee1d1e83 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -1813,6 +1813,7 @@ static int run (mddev_t *mddev) /* device size must be a multiple of chunk size */ mddev->size &= ~(mddev->chunk_size/1024 -1); + mddev->resync_max_sectors = mddev->size << 1; if (conf->raid_disks < 4) { printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n", -- cgit v1.2.2 From efd8be2a4280f334be9309fa4ca1fb8f4e29475d Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 4 Aug 2005 12:53:32 -0700 Subject: [PATCH] md: remove a stray debugging printk. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 6580e0fa4a47..08f003aa6cd7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3484,7 +3484,6 @@ static void md_do_sync(mddev_t *mddev) goto skip; } ITERATE_MDDEV(mddev2,tmp) { - printk("."); if (mddev2 == mddev) continue; if (mddev2->curr_resync && -- cgit v1.2.2 From aa1595e9f3d0d731bcfc6c2680d5483b78f663dc Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 4 Aug 2005 12:53:32 -0700 Subject: [PATCH] md: make 'md' and alias for 'md-mod' Until the bitmap code was added, modprobe md would load the md module. But now the md module is called 'md-mod', so we really need an alias for backwards comparability. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 08f003aa6cd7..9fd4dbea0d0d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -4006,3 +4006,4 @@ EXPORT_SYMBOL(md_wakeup_thread); EXPORT_SYMBOL(md_print_devices); EXPORT_SYMBOL(md_check_recovery); MODULE_LICENSE("GPL"); +MODULE_ALIAS("md"); -- cgit v1.2.2 From 193f1c931517592ec4188d15bf261e4bff368207 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 4 Aug 2005 12:53:33 -0700 Subject: [PATCH] md: always honour md bitmap being read from disk The code currently will ignore the bitmap if the array seem to be in-sync. This is wrong if the array is degraded, and probably wrong anyway. If the bitmap says some chunks are not in in-sync, and the superblock says everything IS in sync, then something is clearly wrong, and it is safer to trust the bitmap. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/bitmap.c | 52 +++++++++++++++++++++++----------------------------- 1 file changed, 23 insertions(+), 29 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 70bca955e0de..09d32db06d20 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -818,8 +818,7 @@ int bitmap_unplug(struct bitmap *bitmap) return 0; } -static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int in_sync); +static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset); /* * bitmap_init_from_disk -- called at bitmap_create time to initialize * the in-memory bitmap from the on-disk bitmap -- also, sets up the * memory mapping of the bitmap file @@ -828,7 +827,7 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, * previously kicked from the array, we mark all the bits as * 1's in order to cause a full resync. */ -static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync) +static int bitmap_init_from_disk(struct bitmap *bitmap) { unsigned long i, chunks, index, oldindex, bit; struct page *page = NULL, *oldpage = NULL; @@ -929,8 +928,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync) } if (test_bit(bit, page_address(page))) { /* if the disk bit is set, set the memory bit */ - bitmap_set_memory_bits(bitmap, - i << CHUNK_BLOCK_SHIFT(bitmap), 1, in_sync); + bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap)); bit_cnt++; } } @@ -1426,35 +1424,30 @@ void bitmap_close_sync(struct bitmap *bitmap) } } -static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, - unsigned long sectors, int in_sync) +static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset) { /* For each chunk covered by any of these sectors, set the - * counter to 1 and set resync_needed unless in_sync. They should all + * counter to 1 and set resync_needed. They should all * be 0 at this point */ - while (sectors) { - int secs; - bitmap_counter_t *bmc; - spin_lock_irq(&bitmap->lock); - bmc = bitmap_get_counter(bitmap, offset, &secs, 1); - if (!bmc) { - spin_unlock_irq(&bitmap->lock); - return; - } - if (! *bmc) { - struct page *page; - *bmc = 1 | (in_sync? 0 : NEEDED_MASK); - bitmap_count_page(bitmap, offset, 1); - page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); - set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); - } + + int secs; + bitmap_counter_t *bmc; + spin_lock_irq(&bitmap->lock); + bmc = bitmap_get_counter(bitmap, offset, &secs, 1); + if (!bmc) { spin_unlock_irq(&bitmap->lock); - if (sectors > secs) - sectors -= secs; - else - sectors = 0; + return; } + if (! *bmc) { + struct page *page; + *bmc = 1 | NEEDED_MASK; + bitmap_count_page(bitmap, offset, 1); + page = filemap_get_page(bitmap, offset >> CHUNK_BLOCK_SHIFT(bitmap)); + set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); + } + spin_unlock_irq(&bitmap->lock); + } /* @@ -1565,7 +1558,8 @@ int bitmap_create(mddev_t *mddev) /* now that we have some pages available, initialize the in-memory * bitmap from the on-disk bitmap */ - err = bitmap_init_from_disk(bitmap, mddev->recovery_cp == MaxSector); + err = bitmap_init_from_disk(bitmap); + if (err) return err; -- cgit v1.2.2 From e3b9703e27aab3839dcdb76b00d98428b67d25b0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 4 Aug 2005 12:53:34 -0700 Subject: [PATCH] md: yet another attempt to get bitmap-based resync to do the right thing in all cases... Firstly, R1BIO_Degraded was being set in a number of places in the resync code, but is never used there, so get rid of those settings. Then: When doing a resync, we want to clear the bit in the bitmap iff the array will be non-degraded when the sync has completed. However the current code would clear the bitmap if the array was non-degraded when the resync *started*, which obviously isn't right (it is for 'resync' but not for 'recovery' - i.e. rebuilding a failed drive). This patch calculated 'still_degraded' and uses the to tell bitmap_start_sync whether this sync should clear the corresponding bit. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/raid1.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index d3a64a04a6d8..51d9645ed09c 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -893,7 +893,6 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) if (!uptodate) { md_error(r1_bio->mddev, conf->mirrors[r1_bio->read_disk].rdev); - set_bit(R1BIO_Degraded, &r1_bio->state); } else set_bit(R1BIO_Uptodate, &r1_bio->state); rdev_dec_pending(conf->mirrors[r1_bio->read_disk].rdev, conf->mddev); @@ -918,10 +917,9 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error) mirror = i; break; } - if (!uptodate) { + if (!uptodate) md_error(mddev, conf->mirrors[mirror].rdev); - set_bit(R1BIO_Degraded, &r1_bio->state); - } + update_head_pos(mirror, r1_bio); if (atomic_dec_and_test(&r1_bio->remaining)) { @@ -1109,6 +1107,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i int i; int write_targets = 0; int sync_blocks; + int still_degraded = 0; if (!conf->r1buf_pool) { @@ -1137,7 +1136,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i return 0; } - if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, mddev->degraded) && + /* before building a request, check if we can skip these blocks.. + * This call the bitmap_start_sync doesn't actually record anything + */ + if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) && !conf->fullsync) { /* We can skip this block, and probably several more */ *skipped = 1; @@ -1203,24 +1205,23 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i if (i == disk) { bio->bi_rw = READ; bio->bi_end_io = end_sync_read; - } else if (conf->mirrors[i].rdev && - !conf->mirrors[i].rdev->faulty && - (!conf->mirrors[i].rdev->in_sync || - sector_nr + RESYNC_SECTORS > mddev->recovery_cp)) { + } else if (conf->mirrors[i].rdev == NULL || + conf->mirrors[i].rdev->faulty) { + still_degraded = 1; + continue; + } else if (!conf->mirrors[i].rdev->in_sync || + sector_nr + RESYNC_SECTORS > mddev->recovery_cp) { bio->bi_rw = WRITE; bio->bi_end_io = end_sync_write; write_targets ++; } else + /* no need to read or write here */ continue; bio->bi_sector = sector_nr + conf->mirrors[i].rdev->data_offset; bio->bi_bdev = conf->mirrors[i].rdev->bdev; bio->bi_private = r1_bio; } - if (write_targets + 1 < conf->raid_disks) - /* array degraded, can't clear bitmap */ - set_bit(R1BIO_Degraded, &r1_bio->state); - if (write_targets == 0) { /* There is nowhere to write, so all non-sync * drives must be failed - so we are finished @@ -1243,7 +1244,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i break; if (sync_blocks == 0) { if (!bitmap_start_sync(mddev->bitmap, sector_nr, - &sync_blocks, mddev->degraded) && + &sync_blocks, still_degraded) && !conf->fullsync) break; if (sync_blocks < (PAGE_SIZE>>9)) -- cgit v1.2.2 From 6b8b3e8a8b3e62b4209eaa36697e3c9df457e196 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 4 Aug 2005 12:53:35 -0700 Subject: [PATCH] md: make sure md bitmap updates are flushed when array is stopped. The recent change to never ignore the bitmap, revealed that the bitmap isn't begin flushed properly when an array is stopped. We call bitmap_daemon_work three times as there is a three-stage pipeline for flushing updates to the bitmap file. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/bitmap.c | 23 +++++++++++++++++++++++ drivers/md/md.c | 2 ++ 2 files changed, 25 insertions(+) (limited to 'drivers/md') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 09d32db06d20..41df4cda66e2 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1450,6 +1450,29 @@ static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset) } +/* + * flush out any pending updates + */ +void bitmap_flush(mddev_t *mddev) +{ + struct bitmap *bitmap = mddev->bitmap; + int sleep; + + if (!bitmap) /* there was no bitmap */ + return; + + /* run the daemon_work three time to ensure everything is flushed + * that can be + */ + sleep = bitmap->daemon_sleep; + bitmap->daemon_sleep = 0; + bitmap_daemon_work(bitmap); + bitmap_daemon_work(bitmap); + bitmap_daemon_work(bitmap); + bitmap->daemon_sleep = sleep; + bitmap_update_sb(bitmap); +} + /* * free memory that was allocated */ diff --git a/drivers/md/md.c b/drivers/md/md.c index 9fd4dbea0d0d..480f658db6f2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1798,6 +1798,8 @@ static int do_md_stop(mddev_t * mddev, int ro) goto out; mddev->ro = 1; } else { + bitmap_flush(mddev); + wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); if (mddev->ro) set_disk_ro(disk, 0); blk_queue_make_request(mddev->queue, md_fail_request); -- cgit v1.2.2 From 48f1f5328267f52a34e61b8b0e6fc55a23c1348a Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Thu, 4 Aug 2005 12:53:37 -0700 Subject: [PATCH] dm-raid locking fix This code was never designed to handle more than one instance of do_work() running at once. Signed-Off-By: Alasdair G Kergon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/dm-raid1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 12031c9d3f1e..b08df8b9b2ca 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1230,7 +1230,7 @@ static int __init dm_mirror_init(void) if (r) return r; - _kmirrord_wq = create_workqueue("kmirrord"); + _kmirrord_wq = create_singlethread_workqueue("kmirrord"); if (!_kmirrord_wq) { DMERR("couldn't start kmirrord"); dm_dirty_log_exit(); -- cgit v1.2.2