From 87162a28acceda05dadf10a8333183f6d2b55265 Mon Sep 17 00:00:00 2001 From: "viro@ZenIV.linux.org.uk" Date: Fri, 9 Sep 2005 20:36:43 +0100 Subject: [PATCH] trivial __user annotations (md) Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- drivers/md/md.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 20ca80b7dc20..373ab92e367b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2087,7 +2087,7 @@ static int get_array_info(mddev_t * mddev, void __user * arg) return 0; } -static int get_bitmap_file(mddev_t * mddev, void * arg) +static int get_bitmap_file(mddev_t * mddev, void __user * arg) { mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */ char *ptr, *buf = NULL; @@ -2781,7 +2781,7 @@ static int md_ioctl(struct inode *inode, struct file *file, goto done_unlock; case GET_BITMAP_FILE: - err = get_bitmap_file(mddev, (void *)arg); + err = get_bitmap_file(mddev, argp); goto done_unlock; case GET_DISK_INFO: -- cgit v1.2.2 From 1923b99a0f4748aa6be0b9b9523ce224a3449b17 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:23:43 -0700 Subject: [PATCH] md: don't allow new md/bitmap file to be set if one already exists ... otherwise we loose a reference and can never free the file. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 373ab92e367b..63c566165189 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2430,7 +2430,7 @@ static int set_bitmap_file(mddev_t *mddev, int fd) { int err; - if (mddev->pers) + if (mddev->pers || mddev->bitmap_file) return -EBUSY; mddev->bitmap_file = fget(fd); -- cgit v1.2.2 From 36fa30636fb84b209210299684e1be66d9e58217 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:23:45 -0700 Subject: [PATCH] md: all hot-add and hot-remove of md intent logging bitmaps Both file-bitmaps and superblock bitmaps are supported. If you add a bitmap file on the array device, you lose. This introduces a 'default_bitmap_offset' field in mddev, as the ioctl used for adding a superblock bitmap doesn't have room for giving an offset. Later, this value will be setable via sysfs. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 87 insertions(+), 17 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 63c566165189..ae654466dc23 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -623,6 +623,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->size = sb->size; mddev->events = md_event(sb); mddev->bitmap_offset = 0; + mddev->default_bitmap_offset = MD_SB_BYTES >> 9; if (sb->state & (1<recovery_cp = MaxSector; @@ -648,7 +649,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) printk(KERN_WARNING "md: bitmaps only support for raid1\n"); return -EINVAL; } - mddev->bitmap_offset = (MD_SB_BYTES >> 9); + mddev->bitmap_offset = mddev->default_bitmap_offset; } } else if (mddev->pers == NULL) { @@ -939,6 +940,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->size = le64_to_cpu(sb->size)/2; mddev->events = le64_to_cpu(sb->events); mddev->bitmap_offset = 0; + mddev->default_bitmap_offset = 0; + if (mddev->minor_version == 0) + mddev->default_bitmap_offset = -(64*1024)/512; mddev->recovery_cp = le64_to_cpu(sb->resync_offset); memcpy(mddev->uuid, sb->set_uuid, 16); @@ -2073,6 +2077,8 @@ static int get_array_info(mddev_t * mddev, void __user * arg) info.state = 0; if (mddev->in_sync) info.state = (1<bitmap && mddev->bitmap_offset) + info.state = (1<pers || mddev->bitmap_file) - return -EBUSY; + if (mddev->pers) { + if (!mddev->pers->quiesce) + return -EBUSY; + if (mddev->recovery || mddev->sync_thread) + return -EBUSY; + /* we should be able to change the bitmap.. */ + } - mddev->bitmap_file = fget(fd); - if (mddev->bitmap_file == NULL) { - printk(KERN_ERR "%s: error: failed to get bitmap file\n", - mdname(mddev)); - return -EBADF; - } + if (fd >= 0) { + if (mddev->bitmap) + return -EEXIST; /* cannot add when bitmap is present */ + mddev->bitmap_file = fget(fd); - err = deny_bitmap_write_access(mddev->bitmap_file); - if (err) { - printk(KERN_ERR "%s: error: bitmap file is already in use\n", - mdname(mddev)); - fput(mddev->bitmap_file); - mddev->bitmap_file = NULL; - } else + if (mddev->bitmap_file == NULL) { + printk(KERN_ERR "%s: error: failed to get bitmap file\n", + mdname(mddev)); + return -EBADF; + } + + err = deny_bitmap_write_access(mddev->bitmap_file); + if (err) { + printk(KERN_ERR "%s: error: bitmap file is already in use\n", + mdname(mddev)); + fput(mddev->bitmap_file); + mddev->bitmap_file = NULL; + return err; + } mddev->bitmap_offset = 0; /* file overrides offset */ + } else if (mddev->bitmap == NULL) + return -ENOENT; /* cannot remove what isn't there */ + err = 0; + if (mddev->pers) { + mddev->pers->quiesce(mddev, 1); + if (fd >= 0) + err = bitmap_create(mddev); + if (fd < 0 || err) + bitmap_destroy(mddev); + mddev->pers->quiesce(mddev, 0); + } else if (fd < 0) { + if (mddev->bitmap_file) + fput(mddev->bitmap_file); + mddev->bitmap_file = NULL; + } + return err; } @@ -2528,6 +2560,11 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) { int rv = 0; int cnt = 0; + int state = 0; + + /* calculate expected state,ignoring low bits */ + if (mddev->bitmap && mddev->bitmap_offset) + state |= (1 << MD_SB_BITMAP_PRESENT); if (mddev->major_version != info->major_version || mddev->minor_version != info->minor_version || @@ -2536,12 +2573,16 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) mddev->level != info->level || /* mddev->layout != info->layout || */ !mddev->persistent != info->not_persistent|| - mddev->chunk_size != info->chunk_size ) + mddev->chunk_size != info->chunk_size || + /* ignore bottom 8 bits of state, and allow SB_BITMAP_PRESENT to change */ + ((state^info->state) & 0xfffffe00) + ) return -EINVAL; /* Check there is only one change */ if (mddev->size != info->size) cnt++; if (mddev->raid_disks != info->raid_disks) cnt++; if (mddev->layout != info->layout) cnt++; + if ((state ^ info->state) & (1< 1) return -EINVAL; @@ -2620,6 +2661,35 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info) } } } + if ((state ^ info->state) & (1<pers->quiesce == NULL) + return -EINVAL; + if (mddev->recovery || mddev->sync_thread) + return -EBUSY; + if (info->state & (1<bitmap) + return -EEXIST; + if (mddev->default_bitmap_offset == 0) + return -EINVAL; + mddev->bitmap_offset = mddev->default_bitmap_offset; + mddev->pers->quiesce(mddev, 1); + rv = bitmap_create(mddev); + if (rv) + bitmap_destroy(mddev); + mddev->pers->quiesce(mddev, 0); + } else { + /* remove the bitmap */ + if (!mddev->bitmap) + return -ENOENT; + if (mddev->bitmap->file) + return -EINVAL; + mddev->pers->quiesce(mddev, 1); + bitmap_destroy(mddev); + mddev->pers->quiesce(mddev, 0); + mddev->bitmap_offset = 0; + } + } md_update_sb(mddev); return rv; } -- cgit v1.2.2 From 8ddf9efe6708f3674f0ddfeb6425fd27bea109a2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:23:45 -0700 Subject: [PATCH] md: support write-mostly device in raid1 This allows a device in a raid1 to be marked as "write mostly". Read requests will only be sent if there is no other option. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index ae654466dc23..f1ac356e656d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -670,6 +670,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) if (mddev->level != LEVEL_MULTIPATH) { rdev->faulty = 0; + rdev->flags = 0; desc = sb->disks + rdev->desc_nr; if (desc->state & (1<in_sync = 1; rdev->raid_disk = desc->raid_disk; } + if (desc->state & (1<flags); } else /* MULTIPATH are always insync */ rdev->in_sync = 1; return 0; @@ -777,6 +780,8 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) spare++; working++; } + if (test_bit(WriteMostly, &rdev2->flags)) + d->state |= (1<raid_disk = role; break; } + rdev->flags = 0; + if (sb->devflags & WriteMostly1) + set_bit(WriteMostly, &rdev->flags); } else /* MULTIPATH are always insync */ rdev->in_sync = 1; @@ -2152,6 +2160,8 @@ static int get_disk_info(mddev_t * mddev, void __user * arg) info.state |= (1<flags)) + info.state |= (1<saved_raid_disk = rdev->raid_disk; rdev->in_sync = 0; /* just to be sure */ + if (info->state & (1<flags); + rdev->raid_disk = -1; err = bind_rdev_to_array(rdev, mddev); if (err) @@ -2277,6 +2290,9 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) else rdev->in_sync = 0; + if (info->state & (1<flags); + err = bind_rdev_to_array(rdev, mddev); if (err) { export_rdev(rdev); @@ -3329,6 +3345,8 @@ static int md_seq_show(struct seq_file *seq, void *v) char b[BDEVNAME_SIZE]; seq_printf(seq, " %s[%d]", bdevname(rdev->bdev,b), rdev->desc_nr); + if (test_bit(WriteMostly, &rdev->flags)) + seq_printf(seq, "(W)"); if (rdev->faulty) { seq_printf(seq, "(F)"); continue; -- cgit v1.2.2 From 7b1e35f6d666693e8f376ce02242efca3ec09aaf Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:23:50 -0700 Subject: [PATCH] md: allow hot-adding devices to arrays with non-persistant superblocks. It is possibly (and occasionally useful) to have a raid1 without persistent superblocks. The code in add_new_disk for adding a device to such an array always tries to read a superblock. This will obviously fail. So do the appropriate test and call md_import_device with appropriate args. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index f1ac356e656d..866c704e008a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2226,8 +2226,11 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) mdname(mddev)); return -EINVAL; } - rdev = md_import_device(dev, mddev->major_version, - mddev->minor_version); + if (mddev->persistent) + rdev = md_import_device(dev, mddev->major_version, + mddev->minor_version); + else + rdev = md_import_device(dev, -1, -1); if (IS_ERR(rdev)) { printk(KERN_WARNING "md: md_import_device returned %ld\n", -- cgit v1.2.2 From 71c0805cb48462c99fbe0e5fcc6c12d7b9929c09 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:23:51 -0700 Subject: [PATCH] md: allow md to load a superblock with feature-bit '1' set As this is used to flag an internal bitmap. Also, introduce symbolic names for feature bits. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 866c704e008a..1be3f2de396b 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -875,7 +875,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) sb->major_version != cpu_to_le32(1) || le32_to_cpu(sb->max_dev) > (4096-256)/2 || le64_to_cpu(sb->super_offset) != (rdev->sb_offset<<1) || - sb->feature_map != 0) + (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0) return -EINVAL; if (calc_sb_1_csum(sb) != sb->sb_csum) { @@ -954,7 +954,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->max_disks = (4096-256)/2; - if ((le32_to_cpu(sb->feature_map) & 1) && + if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) && mddev->bitmap_file == NULL ) { if (mddev->level != 1) { printk(KERN_WARNING "md: bitmaps only supported for raid1\n"); @@ -1029,7 +1029,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) if (mddev->bitmap && mddev->bitmap_file == NULL) { sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); - sb->feature_map = cpu_to_le32(1); + sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); } max_dev = 0; -- cgit v1.2.2 From 0002b2718dd04da67c21f8a7830de8d95a9b0345 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:23:53 -0700 Subject: [PATCH] md: limit size of sb read/written to appropriate amount version-1 superblocks are not (normally) 4K long, and can be of variable size. Writing the full 4K can cause corruption (but only in non-default configurations). With this patch the super-block-flavour can choose a size to read, and set a size to write based on what it finds. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 1be3f2de396b..be7873c61b3c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -393,7 +393,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size, return ret; } -static int read_disk_sb(mdk_rdev_t * rdev) +static int read_disk_sb(mdk_rdev_t * rdev, int size) { char b[BDEVNAME_SIZE]; if (!rdev->sb_page) { @@ -404,7 +404,7 @@ static int read_disk_sb(mdk_rdev_t * rdev) return 0; - if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, MD_SB_BYTES, rdev->sb_page, READ)) + if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, size, rdev->sb_page, READ)) goto fail; rdev->sb_loaded = 1; return 0; @@ -531,7 +531,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version sb_offset = calc_dev_sboffset(rdev->bdev); rdev->sb_offset = sb_offset; - ret = read_disk_sb(rdev); + ret = read_disk_sb(rdev, MD_SB_BYTES); if (ret) return ret; ret = -EINVAL; @@ -564,6 +564,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version rdev->preferred_minor = sb->md_minor; rdev->data_offset = 0; + rdev->sb_size = MD_SB_BYTES; if (sb->level == LEVEL_MULTIPATH) rdev->desc_nr = -1; @@ -837,6 +838,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) int ret; sector_t sb_offset; char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; + int bmask; /* * Calculate the position of the superblock. @@ -865,7 +867,10 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) } rdev->sb_offset = sb_offset; - ret = read_disk_sb(rdev); + /* superblock is rarely larger than 1K, but it can be larger, + * and it is safe to read 4k, so we do that + */ + ret = read_disk_sb(rdev, 4096); if (ret) return ret; @@ -891,6 +896,11 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) rdev->preferred_minor = 0xffff; rdev->data_offset = le64_to_cpu(sb->data_offset); + rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; + bmask = block_size(rdev->bdev)-1; + if (rdev->sb_size & bmask) + rdev-> sb_size = (rdev->sb_size | bmask)+1; + if (refdev == 0) return 1; else { @@ -1375,7 +1385,7 @@ repeat: dprintk("%s ", bdevname(rdev->bdev,b)); if (!rdev->faulty) { md_super_write(mddev,rdev, - rdev->sb_offset<<1, MD_SB_BYTES, + rdev->sb_offset<<1, rdev->sb_size, rdev->sb_page); dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", bdevname(rdev->bdev,b), -- cgit v1.2.2 From 72626685dc66d455742a7f215a0535c551628b9e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:23:54 -0700 Subject: [PATCH] md: add write-intent-bitmap support to raid5 Most awkward part of this is delaying write requests until bitmap updates have been flushed. To achieve this, we have a sequence number (seq_flush) which is incremented each time the raid5 is unplugged. If the raid thread notices that this has changed, it flushes bitmap changes, and assigned the value of seq_flush to seq_write. When a write request arrives, it is given the number from seq_write, and that write request may not complete until seq_flush is larger than the saved seq number. We have a new queue for storing stripes which are waiting for a bitmap flush and an extra flag for stripes to record if the write was 'degraded' and so should not clear the a bit in the bitmap. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index be7873c61b3c..dbf540a7fccc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -645,7 +645,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) if (sb->state & (1<bitmap_file == NULL) { - if (mddev->level != 1) { + if (mddev->level != 1 && mddev->level != 5) { /* FIXME use a better test */ printk(KERN_WARNING "md: bitmaps only support for raid1\n"); return -EINVAL; @@ -3517,7 +3517,6 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) */ void md_write_start(mddev_t *mddev, struct bio *bi) { - DEFINE_WAIT(w); if (bio_data_dir(bi) != WRITE) return; -- cgit v1.2.2 From 934ce7c840992a771ffc478b132092db9c935c42 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:23:55 -0700 Subject: [PATCH] md: write-intent bitmap support for raid6 This is a direct port of the raid5 patch. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index dbf540a7fccc..008149e2bc4a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -645,7 +645,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) if (sb->state & (1<bitmap_file == NULL) { - if (mddev->level != 1 && mddev->level != 5) { + if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6) { /* FIXME use a better test */ printk(KERN_WARNING "md: bitmaps only support for raid1\n"); return -EINVAL; -- cgit v1.2.2 From a6fb0934f923f889055152cb0b033674f627460b Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:23:56 -0700 Subject: [PATCH] md: use kthread infrastructure in md Switch MD to use the kthread infrastructure, to simplify the code and get rid of tasklist_lock abuse in md_unregister_thread. Also don't flush signals in md_thread, as the called thread will always do that. Signed-off-by: Christoph Hellwig Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 48 ++++++++++-------------------------------------- 1 file changed, 10 insertions(+), 38 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 008149e2bc4a..30e3624f3d90 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -3049,18 +3050,6 @@ static int md_thread(void * arg) { mdk_thread_t *thread = arg; - lock_kernel(); - - /* - * Detach thread - */ - - daemonize(thread->name, mdname(thread->mddev)); - - current->exit_signal = SIGCHLD; - allow_signal(SIGKILL); - thread->tsk = current; - /* * md_thread is a 'system-thread', it's priority should be very * high. We avoid resource deadlocks individually in each @@ -3072,14 +3061,14 @@ static int md_thread(void * arg) * bdflush, otherwise bdflush will deadlock if there are too * many dirty RAID5 blocks. */ - unlock_kernel(); complete(thread->event); - while (thread->run) { + while (!kthread_should_stop()) { void (*run)(mddev_t *); wait_event_interruptible_timeout(thread->wqueue, - test_bit(THREAD_WAKEUP, &thread->flags), + test_bit(THREAD_WAKEUP, &thread->flags) + || kthread_should_stop(), thread->timeout); try_to_freeze(); @@ -3088,11 +3077,8 @@ static int md_thread(void * arg) run = thread->run; if (run) run(thread->mddev); - - if (signal_pending(current)) - flush_signals(current); } - complete(thread->event); + return 0; } @@ -3109,11 +3095,9 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, const char *name) { mdk_thread_t *thread; - int ret; struct completion event; - thread = (mdk_thread_t *) kmalloc - (sizeof(mdk_thread_t), GFP_KERNEL); + thread = kmalloc(sizeof(mdk_thread_t), GFP_KERNEL); if (!thread) return NULL; @@ -3126,8 +3110,8 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, thread->mddev = mddev; thread->name = name; thread->timeout = MAX_SCHEDULE_TIMEOUT; - ret = kernel_thread(md_thread, thread, 0); - if (ret < 0) { + thread->tsk = kthread_run(md_thread, thread, mdname(thread->mddev)); + if (IS_ERR(thread->tsk)) { kfree(thread); return NULL; } @@ -3137,21 +3121,9 @@ mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev, void md_unregister_thread(mdk_thread_t *thread) { - struct completion event; - - init_completion(&event); - - thread->event = &event; - - /* As soon as ->run is set to NULL, the task could disappear, - * so we need to hold tasklist_lock until we have sent the signal - */ dprintk("interrupting MD-thread pid %d\n", thread->tsk->pid); - read_lock(&tasklist_lock); - thread->run = NULL; - send_sig(SIGKILL, thread->tsk, 1); - read_unlock(&tasklist_lock); - wait_for_completion(&event); + + kthread_stop(thread->tsk); kfree(thread); } -- cgit v1.2.2 From 53e87fbb5dc887766229eef3ba8bd8ab8853b066 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:23:58 -0700 Subject: [PATCH] md: choose better default offset for bitmap. On reflection, a better default location for hot-adding bitmaps with version-1 superblocks is immediately after the superblock. There might not be much room there, but there is usually atleast 3k, and that is a good start. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 30e3624f3d90..fd66c3958942 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -957,8 +957,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->events = le64_to_cpu(sb->events); mddev->bitmap_offset = 0; mddev->default_bitmap_offset = 0; - if (mddev->minor_version == 0) - mddev->default_bitmap_offset = -(64*1024)/512; + mddev->default_bitmap_offset = 1024; mddev->recovery_cp = le64_to_cpu(sb->resync_offset); memcpy(mddev->uuid, sb->set_uuid, 16); -- cgit v1.2.2 From 720a3dc39b030e273bc955641f2517874fd38fc5 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:23:59 -0700 Subject: [PATCH] md: use queue_hardsect_size instead of block_size for md superblock size calc. Doh. I want the physical hard-sector-size, not the current block size... Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index fd66c3958942..39b917bc0fc0 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -898,7 +898,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) rdev->data_offset = le64_to_cpu(sb->data_offset); rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; - bmask = block_size(rdev->bdev)-1; + bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1; if (rdev->sb_size & bmask) rdev-> sb_size = (rdev->sb_size | bmask)+1; -- cgit v1.2.2 From 1cd6bf19bbe8098153d7a478d8fc551edbea9305 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:24:00 -0700 Subject: [PATCH] md: add information about superblock version to /proc/mdstat Leave it unchanged if the original (0.90) is used, incase it might be a compatability problem. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 39b917bc0fc0..0a13016829da 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3346,6 +3346,15 @@ static int md_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "\n %llu blocks", (unsigned long long)size); } + if (mddev->persistent) { + if (mddev->major_version != 0 || + mddev->minor_version != 90) { + seq_printf(seq," super %d.%d", + mddev->major_version, + mddev->minor_version); + } + } else + seq_printf(seq, " super non-persistent"); if (mddev->pers) { mddev->pers->status (seq, mddev); -- cgit v1.2.2 From b325a32e5732d7aef70ca3c58acb3953ed20f66c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:24:00 -0700 Subject: [PATCH] md: report spare drives in /proc/mdstat Just like failed drives have (F), so spare drives now have (S). Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 0a13016829da..f27e8f644dfe 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3334,7 +3334,8 @@ static int md_seq_show(struct seq_file *seq, void *v) if (rdev->faulty) { seq_printf(seq, "(F)"); continue; - } + } else if (rdev->raid_disk < 0) + seq_printf(seq, "(S)"); /* spare */ size += rdev->size; } -- cgit v1.2.2 From 188c18fd7982d220f4dea234cbb5cc6c75d0f512 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:24:01 -0700 Subject: [PATCH] md: make sure the new 'sb_size' is set properly device added without pre-existing superblock. There are two ways to add devices to an md/raid array. It can have superblock written to it, and then given to the md driver, which will read the superblock (the new way) or md can be told (through SET_ARRAY_INFO) the shape of the array, and the told about individual drives, and md will create the required superblock (the old way). The newly introduced sb_size was only set for drives being added the new way, not the old ways. Oops :-( Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index f27e8f644dfe..8c1f7954882a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2303,6 +2303,8 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) else rdev->in_sync = 0; + rdev->sb_size = MD_SB_BYTES; + if (info->state & (1<flags); -- cgit v1.2.2 From 611815651b2640449bda928079160b9263efac1c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 9 Sep 2005 16:24:02 -0700 Subject: [PATCH] md: really get sb_size setting right in all cases There was another case where sb_size wasn't being set, so instead do the sensible thing and set if when filling in the content of a superblock. That ensures that whenever we write a superblock, the sb_size MUST be set. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 8c1f7954882a..1554b924fbb9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -712,6 +712,8 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) int i; int active=0, working=0,failed=0,spare=0,nr_disks=0; + rdev->sb_size = MD_SB_BYTES; + sb = (mdp_super_t*)page_address(rdev->sb_page); memset(sb, 0, sizeof(*sb)); @@ -2303,8 +2305,6 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) else rdev->in_sync = 0; - rdev->sb_size = MD_SB_BYTES; - if (info->state & (1<flags); -- cgit v1.2.2 From 338cec3253a6d43d02e5e96abc327197565efcc8 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 10 Sep 2005 00:26:54 -0700 Subject: [PATCH] merge some from Rusty's trivial patches This patch contains the most trivial from Rusty's trivial patches: - spelling fixes - remove duplicate includes Signed-off-by: Adrian Bunk Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 1554b924fbb9..2897df90df44 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -74,7 +74,7 @@ static DEFINE_SPINLOCK(pers_lock); * Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit' * is 1000 KB/sec, so the extra system load does not show up that much. * Increase it if you want to have more _guaranteed_ speed. Note that - * the RAID driver will use the maximum available bandwith if the IO + * the RAID driver will use the maximum available bandwidth if the IO * subsystem is idle. There is also an 'absolute maximum' reconstruction * speed limit - in case reconstruction slows down your system despite * idle IO detection. @@ -3616,7 +3616,7 @@ static void md_do_sync(mddev_t *mddev) printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev)); printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:" " %d KB/sec/disc.\n", sysctl_speed_limit_min); - printk(KERN_INFO "md: using maximum available idle IO bandwith " + printk(KERN_INFO "md: using maximum available idle IO bandwidth " "(but not more than %d KB/sec) for reconstruction.\n", sysctl_speed_limit_max); -- cgit v1.2.2