aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-05-07 15:01:41 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-05-07 15:01:41 -0400
commitee7fee0b91ceb1c057c67fcc573b2d8dfe6d92c5 (patch)
treee209cce2b19f99e722bcf806bf7f7fa9511f8dd8
parent8a0a9bd4db63bc45e3017bedeafbd88d0eb84d02 (diff)
parentc4647292fda0833bebe45be27f04453b736981fa (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: md: remove rd%d links immediately after stopping an array. md: remove ability to explicit set an inactive array to 'clean'. md: constify VFTs md: tidy up status_resync to handle large arrays. md: fix some (more) errors with bitmaps on devices larger than 2TB. md/raid10: don't clear bitmap during recovery if array will still be degraded. md: fix loading of out-of-date bitmap.
-rw-r--r--drivers/md/bitmap.c29
-rw-r--r--drivers/md/md.c70
-rw-r--r--drivers/md/raid10.c12
3 files changed, 60 insertions, 51 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 1fb91edc7de2..47c68bc75a17 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -986,6 +986,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
986 oldindex = index; 986 oldindex = index;
987 oldpage = page; 987 oldpage = page;
988 988
989 bitmap->filemap[bitmap->file_pages++] = page;
990 bitmap->last_page_size = count;
991
989 if (outofdate) { 992 if (outofdate) {
990 /* 993 /*
991 * if bitmap is out of date, dirty the 994 * if bitmap is out of date, dirty the
@@ -998,15 +1001,9 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
998 write_page(bitmap, page, 1); 1001 write_page(bitmap, page, 1);
999 1002
1000 ret = -EIO; 1003 ret = -EIO;
1001 if (bitmap->flags & BITMAP_WRITE_ERROR) { 1004 if (bitmap->flags & BITMAP_WRITE_ERROR)
1002 /* release, page not in filemap yet */
1003 put_page(page);
1004 goto err; 1005 goto err;
1005 }
1006 } 1006 }
1007
1008 bitmap->filemap[bitmap->file_pages++] = page;
1009 bitmap->last_page_size = count;
1010 } 1007 }
1011 paddr = kmap_atomic(page, KM_USER0); 1008 paddr = kmap_atomic(page, KM_USER0);
1012 if (bitmap->flags & BITMAP_HOSTENDIAN) 1009 if (bitmap->flags & BITMAP_HOSTENDIAN)
@@ -1016,9 +1013,11 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
1016 kunmap_atomic(paddr, KM_USER0); 1013 kunmap_atomic(paddr, KM_USER0);
1017 if (b) { 1014 if (b) {
1018 /* if the disk bit is set, set the memory bit */ 1015 /* if the disk bit is set, set the memory bit */
1019 bitmap_set_memory_bits(bitmap, i << CHUNK_BLOCK_SHIFT(bitmap), 1016 int needed = ((sector_t)(i+1) << (CHUNK_BLOCK_SHIFT(bitmap))
1020 ((i+1) << (CHUNK_BLOCK_SHIFT(bitmap)) >= start) 1017 >= start);
1021 ); 1018 bitmap_set_memory_bits(bitmap,
1019 (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
1020 needed);
1022 bit_cnt++; 1021 bit_cnt++;
1023 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1022 set_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1024 } 1023 }
@@ -1154,8 +1153,9 @@ void bitmap_daemon_work(struct bitmap *bitmap)
1154 spin_lock_irqsave(&bitmap->lock, flags); 1153 spin_lock_irqsave(&bitmap->lock, flags);
1155 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); 1154 clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
1156 } 1155 }
1157 bmc = bitmap_get_counter(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), 1156 bmc = bitmap_get_counter(bitmap,
1158 &blocks, 0); 1157 (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
1158 &blocks, 0);
1159 if (bmc) { 1159 if (bmc) {
1160/* 1160/*
1161 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc); 1161 if (j < 100) printk("bitmap: j=%lu, *bmc = 0x%x\n", j, *bmc);
@@ -1169,7 +1169,8 @@ void bitmap_daemon_work(struct bitmap *bitmap)
1169 } else if (*bmc == 1) { 1169 } else if (*bmc == 1) {
1170 /* we can clear the bit */ 1170 /* we can clear the bit */
1171 *bmc = 0; 1171 *bmc = 0;
1172 bitmap_count_page(bitmap, j << CHUNK_BLOCK_SHIFT(bitmap), 1172 bitmap_count_page(bitmap,
1173 (sector_t)j << CHUNK_BLOCK_SHIFT(bitmap),
1173 -1); 1174 -1);
1174 1175
1175 /* clear the bit */ 1176 /* clear the bit */
@@ -1514,7 +1515,7 @@ void bitmap_dirty_bits(struct bitmap *bitmap, unsigned long s, unsigned long e)
1514 unsigned long chunk; 1515 unsigned long chunk;
1515 1516
1516 for (chunk = s; chunk <= e; chunk++) { 1517 for (chunk = s; chunk <= e; chunk++) {
1517 sector_t sec = chunk << CHUNK_BLOCK_SHIFT(bitmap); 1518 sector_t sec = (sector_t)chunk << CHUNK_BLOCK_SHIFT(bitmap);
1518 bitmap_set_memory_bits(bitmap, sec, 1); 1519 bitmap_set_memory_bits(bitmap, sec, 1);
1519 bitmap_file_set_bit(bitmap, sec); 1520 bitmap_file_set_bit(bitmap, sec);
1520 } 1521 }
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 612343fdde94..fccc8343a250 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3066,11 +3066,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
3066 } else 3066 } else
3067 err = -EBUSY; 3067 err = -EBUSY;
3068 spin_unlock_irq(&mddev->write_lock); 3068 spin_unlock_irq(&mddev->write_lock);
3069 } else { 3069 } else
3070 mddev->ro = 0; 3070 err = -EINVAL;
3071 mddev->recovery_cp = MaxSector;
3072 err = do_md_run(mddev);
3073 }
3074 break; 3071 break;
3075 case active: 3072 case active:
3076 if (mddev->pers) { 3073 if (mddev->pers) {
@@ -4297,6 +4294,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4297{ 4294{
4298 int err = 0; 4295 int err = 0;
4299 struct gendisk *disk = mddev->gendisk; 4296 struct gendisk *disk = mddev->gendisk;
4297 mdk_rdev_t *rdev;
4300 4298
4301 if (atomic_read(&mddev->openers) > is_open) { 4299 if (atomic_read(&mddev->openers) > is_open) {
4302 printk("md: %s still in use.\n",mdname(mddev)); 4300 printk("md: %s still in use.\n",mdname(mddev));
@@ -4339,6 +4337,13 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4339 /* tell userspace to handle 'inactive' */ 4337 /* tell userspace to handle 'inactive' */
4340 sysfs_notify_dirent(mddev->sysfs_state); 4338 sysfs_notify_dirent(mddev->sysfs_state);
4341 4339
4340 list_for_each_entry(rdev, &mddev->disks, same_set)
4341 if (rdev->raid_disk >= 0) {
4342 char nm[20];
4343 sprintf(nm, "rd%d", rdev->raid_disk);
4344 sysfs_remove_link(&mddev->kobj, nm);
4345 }
4346
4342 set_capacity(disk, 0); 4347 set_capacity(disk, 0);
4343 mddev->changed = 1; 4348 mddev->changed = 1;
4344 4349
@@ -4359,7 +4364,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4359 * Free resources if final stop 4364 * Free resources if final stop
4360 */ 4365 */
4361 if (mode == 0) { 4366 if (mode == 0) {
4362 mdk_rdev_t *rdev;
4363 4367
4364 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev)); 4368 printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
4365 4369
@@ -4371,13 +4375,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4371 } 4375 }
4372 mddev->bitmap_offset = 0; 4376 mddev->bitmap_offset = 0;
4373 4377
4374 list_for_each_entry(rdev, &mddev->disks, same_set)
4375 if (rdev->raid_disk >= 0) {
4376 char nm[20];
4377 sprintf(nm, "rd%d", rdev->raid_disk);
4378 sysfs_remove_link(&mddev->kobj, nm);
4379 }
4380
4381 /* make sure all md_delayed_delete calls have finished */ 4378 /* make sure all md_delayed_delete calls have finished */
4382 flush_scheduled_work(); 4379 flush_scheduled_work();
4383 4380
@@ -5705,37 +5702,38 @@ static void status_unused(struct seq_file *seq)
5705 5702
5706static void status_resync(struct seq_file *seq, mddev_t * mddev) 5703static void status_resync(struct seq_file *seq, mddev_t * mddev)
5707{ 5704{
5708 sector_t max_blocks, resync, res; 5705 sector_t max_sectors, resync, res;
5709 unsigned long dt, db, rt; 5706 unsigned long dt, db;
5707 sector_t rt;
5710 int scale; 5708 int scale;
5711 unsigned int per_milli; 5709 unsigned int per_milli;
5712 5710
5713 resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active))/2; 5711 resync = mddev->curr_resync - atomic_read(&mddev->recovery_active);
5714 5712
5715 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) 5713 if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
5716 max_blocks = mddev->resync_max_sectors >> 1; 5714 max_sectors = mddev->resync_max_sectors;
5717 else 5715 else
5718 max_blocks = mddev->dev_sectors / 2; 5716 max_sectors = mddev->dev_sectors;
5719 5717
5720 /* 5718 /*
5721 * Should not happen. 5719 * Should not happen.
5722 */ 5720 */
5723 if (!max_blocks) { 5721 if (!max_sectors) {
5724 MD_BUG(); 5722 MD_BUG();
5725 return; 5723 return;
5726 } 5724 }
5727 /* Pick 'scale' such that (resync>>scale)*1000 will fit 5725 /* Pick 'scale' such that (resync>>scale)*1000 will fit
5728 * in a sector_t, and (max_blocks>>scale) will fit in a 5726 * in a sector_t, and (max_sectors>>scale) will fit in a
5729 * u32, as those are the requirements for sector_div. 5727 * u32, as those are the requirements for sector_div.
5730 * Thus 'scale' must be at least 10 5728 * Thus 'scale' must be at least 10
5731 */ 5729 */
5732 scale = 10; 5730 scale = 10;
5733 if (sizeof(sector_t) > sizeof(unsigned long)) { 5731 if (sizeof(sector_t) > sizeof(unsigned long)) {
5734 while ( max_blocks/2 > (1ULL<<(scale+32))) 5732 while ( max_sectors/2 > (1ULL<<(scale+32)))
5735 scale++; 5733 scale++;
5736 } 5734 }
5737 res = (resync>>scale)*1000; 5735 res = (resync>>scale)*1000;
5738 sector_div(res, (u32)((max_blocks>>scale)+1)); 5736 sector_div(res, (u32)((max_sectors>>scale)+1));
5739 5737
5740 per_milli = res; 5738 per_milli = res;
5741 { 5739 {
@@ -5756,25 +5754,35 @@ static void status_resync(struct seq_file *seq, mddev_t * mddev)
5756 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ? 5754 (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ?
5757 "resync" : "recovery"))), 5755 "resync" : "recovery"))),
5758 per_milli/10, per_milli % 10, 5756 per_milli/10, per_milli % 10,
5759 (unsigned long long) resync, 5757 (unsigned long long) resync/2,
5760 (unsigned long long) max_blocks); 5758 (unsigned long long) max_sectors/2);
5761 5759
5762 /* 5760 /*
5763 * We do not want to overflow, so the order of operands and
5764 * the * 100 / 100 trick are important. We do a +1 to be
5765 * safe against division by zero. We only estimate anyway.
5766 *
5767 * dt: time from mark until now 5761 * dt: time from mark until now
5768 * db: blocks written from mark until now 5762 * db: blocks written from mark until now
5769 * rt: remaining time 5763 * rt: remaining time
5764 *
5765 * rt is a sector_t, so could be 32bit or 64bit.
5766 * So we divide before multiply in case it is 32bit and close
5767 * to the limit.
5768 * We scale the divisor (db) by 32 to avoid loosing precision
5769 * near the end of resync when the number of remaining sectors
5770 * is close to 'db'.
5771 * We then divide rt by 32 after multiplying by db to compensate.
5772 * The '+1' avoids division by zero if db is very small.
5770 */ 5773 */
5771 dt = ((jiffies - mddev->resync_mark) / HZ); 5774 dt = ((jiffies - mddev->resync_mark) / HZ);
5772 if (!dt) dt++; 5775 if (!dt) dt++;
5773 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active)) 5776 db = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active))
5774 - mddev->resync_mark_cnt; 5777 - mddev->resync_mark_cnt;
5775 rt = (dt * ((unsigned long)(max_blocks-resync) / (db/2/100+1)))/100;
5776 5778
5777 seq_printf(seq, " finish=%lu.%lumin", rt / 60, (rt % 60)/6); 5779 rt = max_sectors - resync; /* number of remaining sectors */
5780 sector_div(rt, db/32+1);
5781 rt *= dt;
5782 rt >>= 5;
5783
5784 seq_printf(seq, " finish=%lu.%lumin", (unsigned long)rt / 60,
5785 ((unsigned long)rt % 60)/6);
5778 5786
5779 seq_printf(seq, " speed=%ldK/sec", db/2/dt); 5787 seq_printf(seq, " speed=%ldK/sec", db/2/dt);
5780} 5788}
@@ -5965,7 +5973,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
5965 return 0; 5973 return 0;
5966} 5974}
5967 5975
5968static struct seq_operations md_seq_ops = { 5976static const struct seq_operations md_seq_ops = {
5969 .start = md_seq_start, 5977 .start = md_seq_start,
5970 .next = md_seq_next, 5978 .next = md_seq_next,
5971 .stop = md_seq_stop, 5979 .stop = md_seq_stop,
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 81a54f17417e..499620afb44b 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1809,17 +1809,17 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
1809 r10_bio->sector = sect; 1809 r10_bio->sector = sect;
1810 1810
1811 raid10_find_phys(conf, r10_bio); 1811 raid10_find_phys(conf, r10_bio);
1812 /* Need to check if this section will still be 1812
1813 /* Need to check if the array will still be
1813 * degraded 1814 * degraded
1814 */ 1815 */
1815 for (j=0; j<conf->copies;j++) { 1816 for (j=0; j<conf->raid_disks; j++)
1816 int d = r10_bio->devs[j].devnum; 1817 if (conf->mirrors[j].rdev == NULL ||
1817 if (conf->mirrors[d].rdev == NULL || 1818 test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
1818 test_bit(Faulty, &conf->mirrors[d].rdev->flags)) {
1819 still_degraded = 1; 1819 still_degraded = 1;
1820 break; 1820 break;
1821 } 1821 }
1822 } 1822
1823 must_sync = bitmap_start_sync(mddev->bitmap, sect, 1823 must_sync = bitmap_start_sync(mddev->bitmap, sect,
1824 &sync_blocks, still_degraded); 1824 &sync_blocks, still_degraded);
1825 1825