aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2008-02-06 04:39:51 -0500
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2008-02-06 13:41:18 -0500
commite691063a61f7f72a7d2882eb744b07a520cde23b (patch)
tree4f5ceb7ed310a7d1bb076271926260723ac5ee6d
parentb47490c9bc73d0b34e4c194db40de183e592e446 (diff)
md: support 'external' metadata for md arrays
- Add a state flag 'external' to indicate that the metadata is managed externally (by user-space) so important changes need to be left of user-space to handle. Alternates are non-persistant ('none') where there is no stable metadata - after the array is stopped there is no record of it's status - and internal which can be version 0.90 or version 1.x These are selected by writing to the 'metadata' attribute. - move the updating of superblocks (sync_sbs) to after we have checked if there are any superblocks or not. - New array state 'write_pending'. This means that the metadata records the array as 'clean', but a write has been requested, so the metadata has to be updated to record a 'dirty' array before the write can continue. This change is reported to md by writing 'active' to the array_state attribute. - tidy up marking of sb_dirty: - don't set sb_dirty when resync finishes as md_check_recovery calls md_update_sb when the sync thread finishes anyway. - Don't set sb_dirty in multipath_run as the array might not be dirty. - don't mark superblock dirty when switching to 'clean' if there is no internal superblock (if external, userspace can choose to update the superblock whenever it chooses to). Signed-off-by: Neil Brown <neilb@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/md/md.c77
-rw-r--r--include/linux/raid/md_k.h3
2 files changed, 61 insertions, 19 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c28a120b4161..e2782a04012d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -778,7 +778,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
778 mddev->major_version = 0; 778 mddev->major_version = 0;
779 mddev->minor_version = sb->minor_version; 779 mddev->minor_version = sb->minor_version;
780 mddev->patch_version = sb->patch_version; 780 mddev->patch_version = sb->patch_version;
781 mddev->persistent = ! sb->not_persistent; 781 mddev->persistent = 1;
782 mddev->external = 0;
782 mddev->chunk_size = sb->chunk_size; 783 mddev->chunk_size = sb->chunk_size;
783 mddev->ctime = sb->ctime; 784 mddev->ctime = sb->ctime;
784 mddev->utime = sb->utime; 785 mddev->utime = sb->utime;
@@ -904,7 +905,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
904 sb->size = mddev->size; 905 sb->size = mddev->size;
905 sb->raid_disks = mddev->raid_disks; 906 sb->raid_disks = mddev->raid_disks;
906 sb->md_minor = mddev->md_minor; 907 sb->md_minor = mddev->md_minor;
907 sb->not_persistent = !mddev->persistent; 908 sb->not_persistent = 0;
908 sb->utime = mddev->utime; 909 sb->utime = mddev->utime;
909 sb->state = 0; 910 sb->state = 0;
910 sb->events_hi = (mddev->events>>32); 911 sb->events_hi = (mddev->events>>32);
@@ -1158,6 +1159,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
1158 mddev->major_version = 1; 1159 mddev->major_version = 1;
1159 mddev->patch_version = 0; 1160 mddev->patch_version = 0;
1160 mddev->persistent = 1; 1161 mddev->persistent = 1;
1162 mddev->external = 0;
1161 mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9; 1163 mddev->chunk_size = le32_to_cpu(sb->chunksize) << 9;
1162 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1); 1164 mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
1163 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1); 1165 mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
@@ -1696,18 +1698,20 @@ repeat:
1696 MD_BUG(); 1698 MD_BUG();
1697 mddev->events --; 1699 mddev->events --;
1698 } 1700 }
1699 sync_sbs(mddev, nospares);
1700 1701
1701 /* 1702 /*
1702 * do not write anything to disk if using 1703 * do not write anything to disk if using
1703 * nonpersistent superblocks 1704 * nonpersistent superblocks
1704 */ 1705 */
1705 if (!mddev->persistent) { 1706 if (!mddev->persistent) {
1706 clear_bit(MD_CHANGE_PENDING, &mddev->flags); 1707 if (!mddev->external)
1708 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
1709
1707 spin_unlock_irq(&mddev->write_lock); 1710 spin_unlock_irq(&mddev->write_lock);
1708 wake_up(&mddev->sb_wait); 1711 wake_up(&mddev->sb_wait);
1709 return; 1712 return;
1710 } 1713 }
1714 sync_sbs(mddev, nospares);
1711 spin_unlock_irq(&mddev->write_lock); 1715 spin_unlock_irq(&mddev->write_lock);
1712 1716
1713 dprintk(KERN_INFO 1717 dprintk(KERN_INFO
@@ -2425,6 +2429,8 @@ array_state_show(mddev_t *mddev, char *page)
2425 case 0: 2429 case 0:
2426 if (mddev->in_sync) 2430 if (mddev->in_sync)
2427 st = clean; 2431 st = clean;
2432 else if (test_bit(MD_CHANGE_CLEAN, &mddev->flags))
2433 st = write_pending;
2428 else if (mddev->safemode) 2434 else if (mddev->safemode)
2429 st = active_idle; 2435 st = active_idle;
2430 else 2436 else
@@ -2455,11 +2461,9 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
2455 break; 2461 break;
2456 case clear: 2462 case clear:
2457 /* stopping an active array */ 2463 /* stopping an active array */
2458 if (mddev->pers) { 2464 if (atomic_read(&mddev->active) > 1)
2459 if (atomic_read(&mddev->active) > 1) 2465 return -EBUSY;
2460 return -EBUSY; 2466 err = do_md_stop(mddev, 0);
2461 err = do_md_stop(mddev, 0);
2462 }
2463 break; 2467 break;
2464 case inactive: 2468 case inactive:
2465 /* stopping an active array */ 2469 /* stopping an active array */
@@ -2467,7 +2471,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
2467 if (atomic_read(&mddev->active) > 1) 2471 if (atomic_read(&mddev->active) > 1)
2468 return -EBUSY; 2472 return -EBUSY;
2469 err = do_md_stop(mddev, 2); 2473 err = do_md_stop(mddev, 2);
2470 } 2474 } else
2475 err = 0; /* already inactive */
2471 break; 2476 break;
2472 case suspended: 2477 case suspended:
2473 break; /* not supported yet */ 2478 break; /* not supported yet */
@@ -2495,9 +2500,15 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
2495 restart_array(mddev); 2500 restart_array(mddev);
2496 spin_lock_irq(&mddev->write_lock); 2501 spin_lock_irq(&mddev->write_lock);
2497 if (atomic_read(&mddev->writes_pending) == 0) { 2502 if (atomic_read(&mddev->writes_pending) == 0) {
2498 mddev->in_sync = 1; 2503 if (mddev->in_sync == 0) {
2499 set_bit(MD_CHANGE_CLEAN, &mddev->flags); 2504 mddev->in_sync = 1;
2500 } 2505 if (mddev->persistent)
2506 set_bit(MD_CHANGE_CLEAN,
2507 &mddev->flags);
2508 }
2509 err = 0;
2510 } else
2511 err = -EBUSY;
2501 spin_unlock_irq(&mddev->write_lock); 2512 spin_unlock_irq(&mddev->write_lock);
2502 } else { 2513 } else {
2503 mddev->ro = 0; 2514 mddev->ro = 0;
@@ -2508,7 +2519,8 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
2508 case active: 2519 case active:
2509 if (mddev->pers) { 2520 if (mddev->pers) {
2510 restart_array(mddev); 2521 restart_array(mddev);
2511 clear_bit(MD_CHANGE_CLEAN, &mddev->flags); 2522 if (mddev->external)
2523 clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
2512 wake_up(&mddev->sb_wait); 2524 wake_up(&mddev->sb_wait);
2513 err = 0; 2525 err = 0;
2514 } else { 2526 } else {
@@ -2659,7 +2671,9 @@ __ATTR(component_size, S_IRUGO|S_IWUSR, size_show, size_store);
2659 2671
2660 2672
2661/* Metdata version. 2673/* Metdata version.
2662 * This is either 'none' for arrays with externally managed metadata, 2674 * This is one of
2675 * 'none' for arrays with no metadata (good luck...)
2676 * 'external' for arrays with externally managed metadata,
2663 * or N.M for internally known formats 2677 * or N.M for internally known formats
2664 */ 2678 */
2665static ssize_t 2679static ssize_t
@@ -2668,6 +2682,8 @@ metadata_show(mddev_t *mddev, char *page)
2668 if (mddev->persistent) 2682 if (mddev->persistent)
2669 return sprintf(page, "%d.%d\n", 2683 return sprintf(page, "%d.%d\n",
2670 mddev->major_version, mddev->minor_version); 2684 mddev->major_version, mddev->minor_version);
2685 else if (mddev->external)
2686 return sprintf(page, "external:%s\n", mddev->metadata_type);
2671 else 2687 else
2672 return sprintf(page, "none\n"); 2688 return sprintf(page, "none\n");
2673} 2689}
@@ -2682,6 +2698,21 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len)
2682 2698
2683 if (cmd_match(buf, "none")) { 2699 if (cmd_match(buf, "none")) {
2684 mddev->persistent = 0; 2700 mddev->persistent = 0;
2701 mddev->external = 0;
2702 mddev->major_version = 0;
2703 mddev->minor_version = 90;
2704 return len;
2705 }
2706 if (strncmp(buf, "external:", 9) == 0) {
2707 int namelen = len-9;
2708 if (namelen >= sizeof(mddev->metadata_type))
2709 namelen = sizeof(mddev->metadata_type)-1;
2710 strncpy(mddev->metadata_type, buf+9, namelen);
2711 mddev->metadata_type[namelen] = 0;
2712 if (namelen && mddev->metadata_type[namelen-1] == '\n')
2713 mddev->metadata_type[--namelen] = 0;
2714 mddev->persistent = 0;
2715 mddev->external = 1;
2685 mddev->major_version = 0; 2716 mddev->major_version = 0;
2686 mddev->minor_version = 90; 2717 mddev->minor_version = 90;
2687 return len; 2718 return len;
@@ -2698,6 +2729,7 @@ metadata_store(mddev_t *mddev, const char *buf, size_t len)
2698 mddev->major_version = major; 2729 mddev->major_version = major;
2699 mddev->minor_version = minor; 2730 mddev->minor_version = minor;
2700 mddev->persistent = 1; 2731 mddev->persistent = 1;
2732 mddev->external = 0;
2701 return len; 2733 return len;
2702} 2734}
2703 2735
@@ -3524,6 +3556,7 @@ static int do_md_stop(mddev_t * mddev, int mode)
3524 mddev->raid_disks = 0; 3556 mddev->raid_disks = 0;
3525 mddev->recovery_cp = 0; 3557 mddev->recovery_cp = 0;
3526 mddev->reshape_position = MaxSector; 3558 mddev->reshape_position = MaxSector;
3559 mddev->external = 0;
3527 3560
3528 } else if (mddev->pers) 3561 } else if (mddev->pers)
3529 printk(KERN_INFO "md: %s switched to read-only mode.\n", 3562 printk(KERN_INFO "md: %s switched to read-only mode.\n",
@@ -4165,13 +4198,15 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
4165 else 4198 else
4166 mddev->recovery_cp = 0; 4199 mddev->recovery_cp = 0;
4167 mddev->persistent = ! info->not_persistent; 4200 mddev->persistent = ! info->not_persistent;
4201 mddev->external = 0;
4168 4202
4169 mddev->layout = info->layout; 4203 mddev->layout = info->layout;
4170 mddev->chunk_size = info->chunk_size; 4204 mddev->chunk_size = info->chunk_size;
4171 4205
4172 mddev->max_disks = MD_SB_DISKS; 4206 mddev->max_disks = MD_SB_DISKS;
4173 4207
4174 mddev->flags = 0; 4208 if (mddev->persistent)
4209 mddev->flags = 0;
4175 set_bit(MD_CHANGE_DEVS, &mddev->flags); 4210 set_bit(MD_CHANGE_DEVS, &mddev->flags);
4176 4211
4177 mddev->default_bitmap_offset = MD_SB_BYTES >> 9; 4212 mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
@@ -4982,7 +5017,10 @@ static int md_seq_show(struct seq_file *seq, void *v)
4982 mddev->major_version, 5017 mddev->major_version,
4983 mddev->minor_version); 5018 mddev->minor_version);
4984 } 5019 }
4985 } else 5020 } else if (mddev->external)
5021 seq_printf(seq, " super external:%s",
5022 mddev->metadata_type);
5023 else
4986 seq_printf(seq, " super non-persistent"); 5024 seq_printf(seq, " super non-persistent");
4987 5025
4988 if (mddev->pers) { 5026 if (mddev->pers) {
@@ -5589,7 +5627,7 @@ void md_check_recovery(mddev_t *mddev)
5589 } 5627 }
5590 5628
5591 if ( ! ( 5629 if ( ! (
5592 mddev->flags || 5630 (mddev->flags && !mddev->external) ||
5593 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || 5631 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
5594 test_bit(MD_RECOVERY_DONE, &mddev->recovery) || 5632 test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
5595 (mddev->safemode == 1) || 5633 (mddev->safemode == 1) ||
@@ -5605,7 +5643,8 @@ void md_check_recovery(mddev_t *mddev)
5605 if (mddev->safemode && !atomic_read(&mddev->writes_pending) && 5643 if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
5606 !mddev->in_sync && mddev->recovery_cp == MaxSector) { 5644 !mddev->in_sync && mddev->recovery_cp == MaxSector) {
5607 mddev->in_sync = 1; 5645 mddev->in_sync = 1;
5608 set_bit(MD_CHANGE_CLEAN, &mddev->flags); 5646 if (mddev->persistent)
5647 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
5609 } 5648 }
5610 if (mddev->safemode == 1) 5649 if (mddev->safemode == 1)
5611 mddev->safemode = 0; 5650 mddev->safemode = 0;
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index dcb729244f47..b579cc628303 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -130,6 +130,9 @@ struct mddev_s
130 minor_version, 130 minor_version,
131 patch_version; 131 patch_version;
132 int persistent; 132 int persistent;
133 int external; /* metadata is
134 * managed externally */
135 char metadata_type[17]; /* externally set*/
133 int chunk_size; 136 int chunk_size;
134 time_t ctime, utime; 137 time_t ctime, utime;
135 int level, layout; 138 int level, layout;