diff options
author | NeilBrown <neilb@suse.de> | 2008-02-06 04:39:51 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2008-02-06 13:41:18 -0500 |
commit | e691063a61f7f72a7d2882eb744b07a520cde23b (patch) | |
tree | 4f5ceb7ed310a7d1bb076271926260723ac5ee6d | |
parent | b47490c9bc73d0b34e4c194db40de183e592e446 (diff) |
md: support 'external' metadata for md arrays
- Add a state flag 'external' to indicate that the metadata is managed
externally (by user-space) so important changes need to be
left of user-space to handle.
Alternates are non-persistant ('none') where there is no stable metadata -
after the array is stopped there is no record of it's status - and
internal which can be version 0.90 or version 1.x
These are selected by writing to the 'metadata' attribute.
- move the updating of superblocks (sync_sbs) to after we have checked if
there are any superblocks or not.
- New array state 'write_pending'. This means that the metadata records
the array as 'clean', but a write has been requested, so the metadata has
to be updated to record a 'dirty' array before the write can continue.
This change is reported to md by writing 'active' to the array_state
attribute.
- tidy up marking of sb_dirty:
- don't set sb_dirty when resync finishes as md_check_recovery
calls md_update_sb when the sync thread finishes anyway.
- Don't set sb_dirty in multipath_run as the array might not be dirty.
- don't mark superblock dirty when switching to 'clean' if there
is no internal superblock (if external, userspace can choose to
update the superblock whenever it chooses to).
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/md/md.c | 77 | ||||
-rw-r--r-- | include/linux/raid/md_k.h | 3 |
2 files changed, 61 insertions, 19 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index c28a120b4161..e2782a04012d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -778,7 +778,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
778 | mddev->major_version = 0; | 778 | mddev->major_version = 0; |
779 | mddev->minor_version = sb->minor_version; | 779 | mddev->minor_version = sb->minor_version; |
780 | mddev->patch_version = sb->patch_version; | 780 | mddev->patch_version = sb->patch_version; |
781 | mddev->persistent = ! sb->not_persistent; | 781 | mddev->persistent = 1; |
782 | mddev->external = 0; | ||
782 | mddev->chunk_size = sb->chunk_size; | 783 | mddev->chunk_size = sb->chunk_size; |
783 | mddev->ctime = sb->ctime; | 784 | mddev->ctime = sb->ctime; |
784 | mddev->utime = sb->utime; | 785 | mddev->utime = sb->utime; |
@@ -904,7 +905,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
904 | sb->size = mddev->size; | 905 | sb->size = mddev->size; |
905 | sb->raid_disks = mddev->raid_disks; | 906 | sb->raid_disks = mddev->raid_disks; |
906 | sb->md_minor = mddev->md_minor; | 907 | sb->md_minor = mddev->md_minor; |
907 | sb->not_persistent = !mddev->persistent; | 908 | sb->not_persistent = 0; |
908 | sb->utime = mddev->utime; | 909 | sb->utime = mddev->utime; |
909 | sb->state = 0; | 910 | sb->state = 0; |
910 | sb->events_hi = (mddev->events>>32); | 911 | sb->events_hi = (mddev->events>>32); |
@@ -1158,6 +1159,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1158 | mddev->major_version = 1; | 1159 | mddev->major_version = 1; |
1159 | mddev->patch_version = 0; | 1160 | mddev->patch_version = 0; |
1160 | mddev->persistent = 1; | 1161 | mddev->persistent = 1; |
1162 | mddev->external = 0; | ||
1161 | mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9; | 1163 | mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9; |
1162 | mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); | 1164 | mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); |
1163 | mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); | 1165 | mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); |
@@ -1696,18 +1698,20 @@ repeat: | |||
1696 | MD_BUG(); | 1698 | MD_BUG(); |
1697 | mddev->events --; | 1699 | mddev->events --; |
1698 | } | 1700 | } |
1699 | sync_sbs(mddev, nospares); | ||
1700 | 1701 | ||
1701 | /* | 1702 | /* |
1702 | * do not write anything to disk if using | 1703 | * do not write anything to disk if using |
1703 | * nonpersistent superblocks | 1704 | * nonpersistent superblocks |
1704 | */ | 1705 | */ |
1705 | if (!mddev->persistent) { | 1706 | if (!mddev->persistent) { |
1706 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); | 1707 | if (!mddev->external) |
1708 | clear_bit(MD_CHANGE_PENDING, &mddev->flags); | ||
1709 | |||
1707 | spin_unlock_irq(&mddev->write_lock); | 1710 | spin_unlock_irq(&mddev->write_lock); |
1708 | wake_up(&mddev->sb_wait); | 1711 | wake_up(&mddev->sb_wait); |
1709 | return; | 1712 | return; |
1710 | } | 1713 | } |
1714 | sync_sbs(mddev, nospares); | ||
1711 | spin_unlock_irq(&mddev->write_lock); | 1715 | spin_unlock_irq(&mddev->write_lock); |
1712 | 1716 | ||
1713 | dprintk(KERN_INFO | 1717 | dprintk(KERN_INFO |
@@ -2425,6 +2429,8 @@ array_state_show(mddev_t *mddev, char *page) | |||
2425 | case 0: | 2429 | case 0: |
2426 | if (mddev->in_sync) | 2430 | if (mddev->in_sync) |
2427 | st = clean; | 2431 | st = clean; |
2432 | else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags)) | ||
2433 | st = write_pending; | ||
2428 | else if (mddev->safemode) | 2434 | else if (mddev->safemode) |
2429 | st = active_idle; | 2435 | st = active_idle; |
2430 | else | 2436 | else |
@@ -2455,11 +2461,9 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2455 | break; | 2461 | break; |
2456 | case clear: | 2462 | case clear: |
2457 | /* stopping an active array */ | 2463 | /* stopping an active array */ |
2458 | if (mddev->pers) { | 2464 | if (atomic_read(&mddev->active) > 1) |
2459 | if (atomic_read(&mddev->active) > 1) | 2465 | return -EBUSY; |
2460 | return -EBUSY; | 2466 | err = do_md_stop(mddev, 0); |
2461 | err = do_md_stop(mddev, 0); | ||
2462 | } | ||
2463 | break; | 2467 | break; |
2464 | case inactive: | 2468 | case inactive: |
2465 | /* stopping an active array */ | 2469 | /* stopping an active array */ |
@@ -2467,7 +2471,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2467 | if (atomic_read(&mddev->active) > 1) | 2471 | if (atomic_read(&mddev->active) > 1) |
2468 | return -EBUSY; | 2472 | return -EBUSY; |
2469 | err = do_md_stop(mddev, 2); | 2473 | err = do_md_stop(mddev, 2); |
2470 | } | 2474 | } else |
2475 | err = 0; /* already inactive */ | ||
2471 | break; | 2476 | break; |
2472 | case suspended: | 2477 | case suspended: |
2473 | break; /* not supported yet */ | 2478 | break; /* not supported yet */ |
@@ -2495,9 +2500,15 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2495 | restart_array(mddev); | 2500 | restart_array(mddev); |
2496 | spin_lock_irq(&mddev->write_lock); | 2501 | spin_lock_irq(&mddev->write_lock); |
2497 | if (atomic_read(&mddev->writes_pending) == 0) { | 2502 | if (atomic_read(&mddev->writes_pending) == 0) { |
2498 | mddev->in_sync = 1; | 2503 | if (mddev->in_sync == 0) { |
2499 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 2504 | mddev->in_sync = 1; |
2500 | } | 2505 | if (mddev->persistent) |
2506 | set_bit(MD_CHANGE_CLEAN, | ||
2507 | &mddev->flags); | ||
2508 | } | ||
2509 | err = 0; | ||
2510 | } else | ||
2511 | err = -EBUSY; | ||
2501 | spin_unlock_irq(&mddev->write_lock); | 2512 | spin_unlock_irq(&mddev->write_lock); |
2502 | } else { | 2513 | } else { |
2503 | mddev->ro = 0; | 2514 | mddev->ro = 0; |
@@ -2508,7 +2519,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) | |||
2508 | case active: | 2519 | case active: |
2509 | if (mddev->pers) { | 2520 | if (mddev->pers) { |
2510 | restart_array(mddev); | 2521 | restart_array(mddev); |
2511 | clear_bit(MD_CHANGE_CLEAN, &mddev->flags); | 2522 | if (mddev->external) |
2523 | clear_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
2512 | wake_up(&mddev->sb_wait); | 2524 | wake_up(&mddev->sb_wait); |
2513 | err = 0; | 2525 | err = 0; |
2514 | } else { | 2526 | } else { |
@@ -2659,7 +2671,9 @@ __ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store); | |||
2659 | 2671 | ||
2660 | 2672 | ||
2661 | /* Metdata version. | 2673 | /* Metdata version. |
2662 | * This is either 'none' for arrays with externally managed metadata, | 2674 | * This is one of |
2675 | * 'none' for arrays with no metadata (good luck...) | ||
2676 | * 'external' for arrays with externally managed metadata, | ||
2663 | * or N.M for internally known formats | 2677 | * or N.M for internally known formats |
2664 | */ | 2678 | */ |
2665 | static ssize_t | 2679 | static ssize_t |
@@ -2668,6 +2682,8 @@ metadata_show(mddev_t *mddev, char *page) | |||
2668 | if (mddev->persistent) | 2682 | if (mddev->persistent) |
2669 | return sprintf(page, "%d.%d\n", | 2683 | return sprintf(page, "%d.%d\n", |
2670 | mddev->major_version, mddev->minor_version); | 2684 | mddev->major_version, mddev->minor_version); |
2685 | else if (mddev->external) | ||
2686 | return sprintf(page, "external:%s\n", mddev->metadata_type); | ||
2671 | else | 2687 | else |
2672 | return sprintf(page, "none\n"); | 2688 | return sprintf(page, "none\n"); |
2673 | } | 2689 | } |
@@ -2682,6 +2698,21 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len) | |||
2682 | 2698 | ||
2683 | if (cmd_match(buf, "none")) { | 2699 | if (cmd_match(buf, "none")) { |
2684 | mddev->persistent = 0; | 2700 | mddev->persistent = 0; |
2701 | mddev->external = 0; | ||
2702 | mddev->major_version = 0; | ||
2703 | mddev->minor_version = 90; | ||
2704 | return len; | ||
2705 | } | ||
2706 | if (strncmp(buf, "external:", 9) == 0) { | ||
2707 | int namelen = len-9; | ||
2708 | if (namelen >= sizeof(mddev->metadata_type)) | ||
2709 | namelen = sizeof(mddev->metadata_type)-1; | ||
2710 | strncpy(mddev->metadata_type, buf+9, namelen); | ||
2711 | mddev->metadata_type[namelen] = 0; | ||
2712 | if (namelen && mddev->metadata_type[namelen-1] == '\n') | ||
2713 | mddev->metadata_type[--namelen] = 0; | ||
2714 | mddev->persistent = 0; | ||
2715 | mddev->external = 1; | ||
2685 | mddev->major_version = 0; | 2716 | mddev->major_version = 0; |
2686 | mddev->minor_version = 90; | 2717 | mddev->minor_version = 90; |
2687 | return len; | 2718 | return len; |
@@ -2698,6 +2729,7 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len) | |||
2698 | mddev->major_version = major; | 2729 | mddev->major_version = major; |
2699 | mddev->minor_version = minor; | 2730 | mddev->minor_version = minor; |
2700 | mddev->persistent = 1; | 2731 | mddev->persistent = 1; |
2732 | mddev->external = 0; | ||
2701 | return len; | 2733 | return len; |
2702 | } | 2734 | } |
2703 | 2735 | ||
@@ -3524,6 +3556,7 @@ static int do_md_stop(mddev_t * mddev, int mode) | |||
3524 | mddev->raid_disks = 0; | 3556 | mddev->raid_disks = 0; |
3525 | mddev->recovery_cp = 0; | 3557 | mddev->recovery_cp = 0; |
3526 | mddev->reshape_position = MaxSector; | 3558 | mddev->reshape_position = MaxSector; |
3559 | mddev->external = 0; | ||
3527 | 3560 | ||
3528 | } else if (mddev->pers) | 3561 | } else if (mddev->pers) |
3529 | printk(KERN_INFO "md: %s switched to read-only mode.\n", | 3562 | printk(KERN_INFO "md: %s switched to read-only mode.\n", |
@@ -4165,13 +4198,15 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
4165 | else | 4198 | else |
4166 | mddev->recovery_cp = 0; | 4199 | mddev->recovery_cp = 0; |
4167 | mddev->persistent = ! info->not_persistent; | 4200 | mddev->persistent = ! info->not_persistent; |
4201 | mddev->external = 0; | ||
4168 | 4202 | ||
4169 | mddev->layout = info->layout; | 4203 | mddev->layout = info->layout; |
4170 | mddev->chunk_size = info->chunk_size; | 4204 | mddev->chunk_size = info->chunk_size; |
4171 | 4205 | ||
4172 | mddev->max_disks = MD_SB_DISKS; | 4206 | mddev->max_disks = MD_SB_DISKS; |
4173 | 4207 | ||
4174 | mddev->flags = 0; | 4208 | if (mddev->persistent) |
4209 | mddev->flags = 0; | ||
4175 | set_bit(MD_CHANGE_DEVS, &mddev->flags); | 4210 | set_bit(MD_CHANGE_DEVS, &mddev->flags); |
4176 | 4211 | ||
4177 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 4212 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; |
@@ -4982,7 +5017,10 @@ static int md_seq_show(struct seq_file *seq, void *v) | |||
4982 | mddev->major_version, | 5017 | mddev->major_version, |
4983 | mddev->minor_version); | 5018 | mddev->minor_version); |
4984 | } | 5019 | } |
4985 | } else | 5020 | } else if (mddev->external) |
5021 | seq_printf(seq, " super external:%s", | ||
5022 | mddev->metadata_type); | ||
5023 | else | ||
4986 | seq_printf(seq, " super non-persistent"); | 5024 | seq_printf(seq, " super non-persistent"); |
4987 | 5025 | ||
4988 | if (mddev->pers) { | 5026 | if (mddev->pers) { |
@@ -5589,7 +5627,7 @@ void md_check_recovery(mddev_t *mddev) | |||
5589 | } | 5627 | } |
5590 | 5628 | ||
5591 | if ( ! ( | 5629 | if ( ! ( |
5592 | mddev->flags || | 5630 | (mddev->flags && !mddev->external) || |
5593 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || | 5631 | test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || |
5594 | test_bit(MD_RECOVERY_DONE, &mddev->recovery) || | 5632 | test_bit(MD_RECOVERY_DONE, &mddev->recovery) || |
5595 | (mddev->safemode == 1) || | 5633 | (mddev->safemode == 1) || |
@@ -5605,7 +5643,8 @@ void md_check_recovery(mddev_t *mddev) | |||
5605 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && | 5643 | if (mddev->safemode && !atomic_read(&mddev->writes_pending) && |
5606 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { | 5644 | !mddev->in_sync && mddev->recovery_cp == MaxSector) { |
5607 | mddev->in_sync = 1; | 5645 | mddev->in_sync = 1; |
5608 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | 5646 | if (mddev->persistent) |
5647 | set_bit(MD_CHANGE_CLEAN, &mddev->flags); | ||
5609 | } | 5648 | } |
5610 | if (mddev->safemode == 1) | 5649 | if (mddev->safemode == 1) |
5611 | mddev->safemode = 0; | 5650 | mddev->safemode = 0; |
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index dcb729244f47..b579cc628303 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h | |||
@@ -130,6 +130,9 @@ struct mddev_s | |||
130 | minor_version, | 130 | minor_version, |
131 | patch_version; | 131 | patch_version; |
132 | int persistent; | 132 | int persistent; |
133 | int external; /* metadata is | ||
134 | * managed externally */ | ||
135 | char metadata_type[17]; /* externally set*/ | ||
133 | int chunk_size; | 136 | int chunk_size; |
134 | time_t ctime, utime; | 137 | time_t ctime, utime; |
135 | int level, layout; | 138 | int level, layout; |