diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-05-07 15:01:41 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-05-07 15:01:41 -0400 |
commit | ee7fee0b91ceb1c057c67fcc573b2d8dfe6d92c5 (patch) | |
tree | e209cce2b19f99e722bcf806bf7f7fa9511f8dd8 | |
parent | 8a0a9bd4db63bc45e3017bedeafbd88d0eb84d02 (diff) | |
parent | c4647292fda0833bebe45be27f04453b736981fa (diff) |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md:
md: remove rd%d links immediately after stopping an array.
md: remove ability to explicit set an inactive array to 'clean'.
md: constify VFTs
md: tidy up status_resync to handle large arrays.
md: fix some (more) errors with bitmaps on devices larger than 2TB.
md/raid10: don't clear bitmap during recovery if array will still be degraded.
md: fix loading of out-of-date bitmap.
-rw-r--r-- | drivers/md/bitmap.c | 29 | ||||
-rw-r--r-- | drivers/md/md.c | 70 | ||||
-rw-r--r-- | drivers/md/raid10.c | 12 |
3 files changed, 60 insertions, 51 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 1fb91edc7de2..47c68bc75a17 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c | |||
@@ -986,6 +986,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
986 | oldindex = index; | 986 | oldindex = index; |
987 | oldpage = page; | 987 | oldpage = page; |
988 | 988 | ||
989 | bitmap->filemap[bitmap->file_pages++] = page; | ||
990 | bitmap->last_page_size = count; | ||
991 | |||
989 | if (outofdate) { | 992 | if (outofdate) { |
990 | /* | 993 | /* |
991 | * if bitmap is out of date, dirty the | 994 | * if bitmap is out of date, dirty the |
@@ -998,15 +1001,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
998 | write_page(bitmap, page, 1); | 1001 | write_page(bitmap, page, 1); |
999 | 1002 | ||
1000 | ret = -EIO; | 1003 | ret = -EIO; |
1001 | if (bitmap->flags & BITMAP_WRITE_ERROR) { | 1004 | if (bitmap->flags & BITMAP_WRITE_ERROR) |
1002 | /* release, page not in filemap yet */ | ||
1003 | put_page(page); | ||
1004 | goto err; | 1005 | goto err; |
1005 | } | ||
1006 | } | 1006 | } |
1007 | |||
1008 | bitmap->filemap[bitmap->file_pages++] = page; | ||
1009 | bitmap->last_page_size = count; | ||
1010 | } | 1007 | } |
1011 | paddr = kmap_atomic(page, KM_USER0); | 1008 | paddr = kmap_atomic(page, KM_USER0); |
1012 | if (bitmap->flags & BITMAP_HOSTENDIAN) | 1009 | if (bitmap->flags & BITMAP_HOSTENDIAN) |
@@ -1016,9 +1013,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) | |||
1016 | kunmap_atomic(paddr, KM_USER0); | 1013 | kunmap_atomic(paddr, KM_USER0); |
1017 | if (b) { | 1014 | if (b) { |
1018 | /* if the disk bit is set, set the memory bit */ | 1015 | /* if the disk bit is set, set the memory bit */ |
1019 | bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap), | 1016 | int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) |
1020 | ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start) | 1017 | >= start); |
1021 | ); | 1018 | bitmap_set_memory_bits(bitmap, |
1019 | (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), | ||
1020 | needed); | ||
1022 | bit_cnt++; | 1021 | bit_cnt++; |
1023 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | 1022 | set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); |
1024 | } | 1023 | } |
@@ -1154,8 +1153,9 @@ void bitmap_daemon_work(struct bitmap *bitmap) | |||
1154 | spin_lock_irqsave(&bitmap->lock, flags); | 1153 | spin_lock_irqsave(&bitmap->lock, flags); |
1155 | clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); | 1154 | clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); |
1156 | } | 1155 | } |
1157 | bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), | 1156 | bmc = bitmap_get_counter(bitmap, |
1158 | &blocks, 0); | 1157 | (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), |
1158 | &blocks, 0); | ||
1159 | if (bmc) { | 1159 | if (bmc) { |
1160 | /* | 1160 | /* |
1161 | if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); | 1161 | if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); |
@@ -1169,7 +1169,8 @@ void bitmap_daemon_work(struct bitmap *bitmap) | |||
1169 | } else if (*bmc == 1) { | 1169 | } else if (*bmc == 1) { |
1170 | /* we can clear the bit */ | 1170 | /* we can clear the bit */ |
1171 | *bmc = 0; | 1171 | *bmc = 0; |
1172 | bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), | 1172 | bitmap_count_page(bitmap, |
1173 | (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap), | ||
1173 | -1); | 1174 | -1); |
1174 | 1175 | ||
1175 | /* clear the bit */ | 1176 | /* clear the bit */ |
@@ -1514,7 +1515,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e) | |||
1514 | unsigned long chunk; | 1515 | unsigned long chunk; |
1515 | 1516 | ||
1516 | for (chunk = s; chunk <= e; chunk++) { | 1517 | for (chunk = s; chunk <= e; chunk++) { |
1517 | sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap); | 1518 | sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap); |
1518 | bitmap_set_memory_bits(bitmap, sec, 1); | 1519 | bitmap_set_memory_bits(bitmap, sec, 1); |
1519 | bitmap_file_set_bit(bitmap, sec); | 1520 | bitmap_file_set_bit(bitmap, sec); |
1520 | } | 1521 | } |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 612343fdde94..fccc8343a250 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -3066,11 +3066,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
3066 | } else | 3066 | } else |
3067 | err = -EBUSY; | 3067 | err = -EBUSY; |
3068 | spin_unlock_irq(&mddev->write_lock); | 3068 | spin_unlock_irq(&mddev->write_lock); |
3069 | } else { | 3069 | } else |
3070 | mddev->ro = 0; | 3070 | err = -EINVAL; |
3071 | mddev->recovery_cp = MaxSector; | ||
3072 | err = do_md_run(mddev); | ||
3073 | } | ||
3074 | break; | 3071 | break; |
3075 | case active: | 3072 | case active: |
3076 | if (mddev->pers) { | 3073 | if (mddev->pers) { |
@@ -4297,6 +4294,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4297 | { | 4294 | { |
4298 | int err = 0; | 4295 | int err = 0; |
4299 | struct gendisk *disk = mddev->gendisk; | 4296 | struct gendisk *disk = mddev->gendisk; |
4297 | mdk_rdev_t *rdev; | ||
4300 | 4298 | ||
4301 | if (atomic_read(&mddev->openers) > is_open) { | 4299 | if (atomic_read(&mddev->openers) > is_open) { |
4302 | printk("md: %s still in use.\n",mdname(mddev)); | 4300 | printk("md: %s still in use.\n",mdname(mddev)); |
@@ -4339,6 +4337,13 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4339 | /* tell userspace to handle 'inactive' */ | 4337 | /* tell userspace to handle 'inactive' */ |
4340 | sysfs_notify_dirent(mddev->sysfs_state); | 4338 | sysfs_notify_dirent(mddev->sysfs_state); |
4341 | 4339 | ||
4340 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
4341 | if (rdev->raid_disk >= 0) { | ||
4342 | char nm[20]; | ||
4343 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
4344 | sysfs_remove_link(&mddev->kobj, nm); | ||
4345 | } | ||
4346 | |||
4342 | set_capacity(disk, 0); | 4347 | set_capacity(disk, 0); |
4343 | mddev->changed = 1; | 4348 | mddev->changed = 1; |
4344 | 4349 | ||
@@ -4359,7 +4364,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4359 | * Free resources if final stop | 4364 | * Free resources if final stop |
4360 | */ | 4365 | */ |
4361 | if (mode == 0) { | 4366 | if (mode == 0) { |
4362 | mdk_rdev_t *rdev; | ||
4363 | 4367 | ||
4364 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); | 4368 | printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); |
4365 | 4369 | ||
@@ -4371,13 +4375,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4371 | } | 4375 | } |
4372 | mddev->bitmap_offset = 0; | 4376 | mddev->bitmap_offset = 0; |
4373 | 4377 | ||
4374 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
4375 | if (rdev->raid_disk >= 0) { | ||
4376 | char nm[20]; | ||
4377 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
4378 | sysfs_remove_link(&mddev->kobj, nm); | ||
4379 | } | ||
4380 | |||
4381 | /* make sure all md_delayed_delete calls have finished */ | 4378 | /* make sure all md_delayed_delete calls have finished */ |
4382 | flush_scheduled_work(); | 4379 | flush_scheduled_work(); |
4383 | 4380 | ||
@@ -5705,37 +5702,38 @@ static void status_unused(struct seq_file *seq) | |||
5705 | 5702 | ||
5706 | static void status_resync(struct seq_file *seq, mddev_t * mddev) | 5703 | static void status_resync(struct seq_file *seq, mddev_t * mddev) |
5707 | { | 5704 | { |
5708 | sector_t max_blocks, resync, res; | 5705 | sector_t max_sectors, resync, res; |
5709 | unsigned long dt, db, rt; | 5706 | unsigned long dt, db; |
5707 | sector_t rt; | ||
5710 | int scale; | 5708 | int scale; |
5711 | unsigned int per_milli; | 5709 | unsigned int per_milli; |
5712 | 5710 | ||
5713 | resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; | 5711 | resync = mddev->curr_resync - atomic_read(&mddev->recovery_active); |
5714 | 5712 | ||
5715 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) | 5713 | if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) |
5716 | max_blocks = mddev->resync_max_sectors >> 1; | 5714 | max_sectors = mddev->resync_max_sectors; |
5717 | else | 5715 | else |
5718 | max_blocks = mddev->dev_sectors / 2; | 5716 | max_sectors = mddev->dev_sectors; |
5719 | 5717 | ||
5720 | /* | 5718 | /* |
5721 | * Should not happen. | 5719 | * Should not happen. |
5722 | */ | 5720 | */ |
5723 | if (!max_blocks) { | 5721 | if (!max_sectors) { |
5724 | MD_BUG(); | 5722 | MD_BUG(); |
5725 | return; | 5723 | return; |
5726 | } | 5724 | } |
5727 | /* Pick 'scale' such that (resync>>scale)*1000 will fit | 5725 | /* Pick 'scale' such that (resync>>scale)*1000 will fit |
5728 | * in a sector_t, and (max_blocks>>scale) will fit in a | 5726 | * in a sector_t, and (max_sectors>>scale) will fit in a |
5729 | * u32, as those are the requirements for sector_div. | 5727 | * u32, as those are the requirements for sector_div. |
5730 | * Thus 'scale' must be at least 10 | 5728 | * Thus 'scale' must be at least 10 |
5731 | */ | 5729 | */ |
5732 | scale = 10; | 5730 | scale = 10; |
5733 | if (sizeof(sector_t) > sizeof(unsigned long)) { | 5731 | if (sizeof(sector_t) > sizeof(unsigned long)) { |
5734 | while ( max_blocks/2 > (1ULL<<(scale+32))) | 5732 | while ( max_sectors/2 > (1ULL<<(scale+32))) |
5735 | scale++; | 5733 | scale++; |
5736 | } | 5734 | } |
5737 | res = (resync>>scale)*1000; | 5735 | res = (resync>>scale)*1000; |
5738 | sector_div(res, (u32)((max_blocks>>scale)+1)); | 5736 | sector_div(res, (u32)((max_sectors>>scale)+1)); |
5739 | 5737 | ||
5740 | per_milli = res; | 5738 | per_milli = res; |
5741 | { | 5739 | { |
@@ -5756,25 +5754,35 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev) | |||
5756 | (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? | 5754 | (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? |
5757 | "resync" : "recovery"))), | 5755 | "resync" : "recovery"))), |
5758 | per_milli/10, per_milli % 10, | 5756 | per_milli/10, per_milli % 10, |
5759 | (unsigned long long) resync, | 5757 | (unsigned long long) resync/2, |
5760 | (unsigned long long) max_blocks); | 5758 | (unsigned long long) max_sectors/2); |
5761 | 5759 | ||
5762 | /* | 5760 | /* |
5763 | * We do not want to overflow, so the order of operands and | ||
5764 | * the * 100 / 100 trick are important. We do a +1 to be | ||
5765 | * safe against division by zero. We only estimate anyway. | ||
5766 | * | ||
5767 | * dt: time from mark until now | 5761 | * dt: time from mark until now |
5768 | * db: blocks written from mark until now | 5762 | * db: blocks written from mark until now |
5769 | * rt: remaining time | 5763 | * rt: remaining time |
5764 | * | ||
5765 | * rt is a sector_t, so could be 32bit or 64bit. | ||
5766 | * So we divide before multiply in case it is 32bit and close | ||
5767 | * to the limit. | ||
5768 | * We scale the divisor (db) by 32 to avoid loosing precision | ||
5769 | * near the end of resync when the number of remaining sectors | ||
5770 | * is close to 'db'. | ||
5771 | * We then divide rt by 32 after multiplying by db to compensate. | ||
5772 | * The '+1' avoids division by zero if db is very small. | ||
5770 | */ | 5773 | */ |
5771 | dt = ((jiffies - mddev->resync_mark) / HZ); | 5774 | dt = ((jiffies - mddev->resync_mark) / HZ); |
5772 | if (!dt) dt++; | 5775 | if (!dt) dt++; |
5773 | db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active)) | 5776 | db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active)) |
5774 | - mddev->resync_mark_cnt; | 5777 | - mddev->resync_mark_cnt; |
5775 | rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100; | ||
5776 | 5778 | ||
5777 | seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); | 5779 | rt = max_sectors - resync; /* number of remaining sectors */ |
5780 | sector_div(rt, db/32+1); | ||
5781 | rt *= dt; | ||
5782 | rt >>= 5; | ||
5783 | |||
5784 | seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60, | ||
5785 | ((unsigned long)rt % 60)/6); | ||
5778 | 5786 | ||
5779 | seq_printf(seq, " speed=%ldK/sec", db/2/dt); | 5787 | seq_printf(seq, " speed=%ldK/sec", db/2/dt); |
5780 | } | 5788 | } |
@@ -5965,7 +5973,7 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
5965 | return 0; | 5973 | return 0; |
5966 | } | 5974 | } |
5967 | 5975 | ||
5968 | static struct seq_operations md_seq_ops = { | 5976 | static const struct seq_operations md_seq_ops = { |
5969 | .start = md_seq_start, | 5977 | .start = md_seq_start, |
5970 | .next = md_seq_next, | 5978 | .next = md_seq_next, |
5971 | .stop = md_seq_stop, | 5979 | .stop = md_seq_stop, |
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 81a54f17417e..499620afb44b 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c | |||
@@ -1809,17 +1809,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1809 | r10_bio->sector = sect; | 1809 | r10_bio->sector = sect; |
1810 | 1810 | ||
1811 | raid10_find_phys(conf, r10_bio); | 1811 | raid10_find_phys(conf, r10_bio); |
1812 | /* Need to check if this section will still be | 1812 | |
1813 | /* Need to check if the array will still be | ||
1813 | * degraded | 1814 | * degraded |
1814 | */ | 1815 | */ |
1815 | for (j=0; j<conf->copies;j++) { | 1816 | for (j=0; j<conf->raid_disks; j++) |
1816 | int d = r10_bio->devs[j].devnum; | 1817 | if (conf->mirrors[j].rdev == NULL || |
1817 | if (conf->mirrors[d].rdev == NULL || | 1818 | test_bit(Faulty, &conf->mirrors[j].rdev->flags)) { |
1818 | test_bit(Faulty, &conf->mirrors[d].rdev->flags)) { | ||
1819 | still_degraded = 1; | 1819 | still_degraded = 1; |
1820 | break; | 1820 | break; |
1821 | } | 1821 | } |
1822 | } | 1822 | |
1823 | must_sync = bitmap_start_sync(mddev->bitmap, sect, | 1823 | must_sync = bitmap_start_sync(mddev->bitmap, sect, |
1824 | &sync_blocks, still_degraded); | 1824 | &sync_blocks, still_degraded); |
1825 | 1825 | ||