diff options
-rw-r--r-- | drivers/md/md.c | 69 | ||||
-rw-r--r-- | drivers/md/raid1.c | 5 | ||||
-rw-r--r-- | drivers/md/raid5.c | 140 | ||||
-rw-r--r-- | include/linux/raid/md.h | 2 | ||||
-rw-r--r-- | include/linux/raid/md_k.h | 8 | ||||
-rw-r--r-- | include/linux/raid/md_p.h | 32 | ||||
-rw-r--r-- | include/linux/raid/raid5.h | 1 |
7 files changed, 231 insertions, 26 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index d169bc964676..b9dfdfccdb78 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -662,7 +662,8 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version | |||
662 | } | 662 | } |
663 | 663 | ||
664 | if (sb->major_version != 0 || | 664 | if (sb->major_version != 0 || |
665 | sb->minor_version != 90) { | 665 | sb->minor_version < 90 || |
666 | sb->minor_version > 91) { | ||
666 | printk(KERN_WARNING "Bad version number %d.%d on %s\n", | 667 | printk(KERN_WARNING "Bad version number %d.%d on %s\n", |
667 | sb->major_version, sb->minor_version, | 668 | sb->major_version, sb->minor_version, |
668 | b); | 669 | b); |
@@ -747,6 +748,20 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
747 | mddev->bitmap_offset = 0; | 748 | mddev->bitmap_offset = 0; |
748 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 749 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; |
749 | 750 | ||
751 | if (mddev->minor_version >= 91) { | ||
752 | mddev->reshape_position = sb->reshape_position; | ||
753 | mddev->delta_disks = sb->delta_disks; | ||
754 | mddev->new_level = sb->new_level; | ||
755 | mddev->new_layout = sb->new_layout; | ||
756 | mddev->new_chunk = sb->new_chunk; | ||
757 | } else { | ||
758 | mddev->reshape_position = MaxSector; | ||
759 | mddev->delta_disks = 0; | ||
760 | mddev->new_level = mddev->level; | ||
761 | mddev->new_layout = mddev->layout; | ||
762 | mddev->new_chunk = mddev->chunk_size; | ||
763 | } | ||
764 | |||
750 | if (sb->state & (1<<MD_SB_CLEAN)) | 765 | if (sb->state & (1<<MD_SB_CLEAN)) |
751 | mddev->recovery_cp = MaxSector; | 766 | mddev->recovery_cp = MaxSector; |
752 | else { | 767 | else { |
@@ -841,7 +856,6 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
841 | 856 | ||
842 | sb->md_magic = MD_SB_MAGIC; | 857 | sb->md_magic = MD_SB_MAGIC; |
843 | sb->major_version = mddev->major_version; | 858 | sb->major_version = mddev->major_version; |
844 | sb->minor_version = mddev->minor_version; | ||
845 | sb->patch_version = mddev->patch_version; | 859 | sb->patch_version = mddev->patch_version; |
846 | sb->gvalid_words = 0; /* ignored */ | 860 | sb->gvalid_words = 0; /* ignored */ |
847 | memcpy(&sb->set_uuid0, mddev->uuid+0, 4); | 861 | memcpy(&sb->set_uuid0, mddev->uuid+0, 4); |
@@ -860,6 +874,17 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
860 | sb->events_hi = (mddev->events>>32); | 874 | sb->events_hi = (mddev->events>>32); |
861 | sb->events_lo = (u32)mddev->events; | 875 | sb->events_lo = (u32)mddev->events; |
862 | 876 | ||
877 | if (mddev->reshape_position == MaxSector) | ||
878 | sb->minor_version = 90; | ||
879 | else { | ||
880 | sb->minor_version = 91; | ||
881 | sb->reshape_position = mddev->reshape_position; | ||
882 | sb->new_level = mddev->new_level; | ||
883 | sb->delta_disks = mddev->delta_disks; | ||
884 | sb->new_layout = mddev->new_layout; | ||
885 | sb->new_chunk = mddev->new_chunk; | ||
886 | } | ||
887 | mddev->minor_version = sb->minor_version; | ||
863 | if (mddev->in_sync) | 888 | if (mddev->in_sync) |
864 | { | 889 | { |
865 | sb->recovery_cp = mddev->recovery_cp; | 890 | sb->recovery_cp = mddev->recovery_cp; |
@@ -1104,6 +1129,20 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1104 | } | 1129 | } |
1105 | mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); | 1130 | mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); |
1106 | } | 1131 | } |
1132 | if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) { | ||
1133 | mddev->reshape_position = le64_to_cpu(sb->reshape_position); | ||
1134 | mddev->delta_disks = le32_to_cpu(sb->delta_disks); | ||
1135 | mddev->new_level = le32_to_cpu(sb->new_level); | ||
1136 | mddev->new_layout = le32_to_cpu(sb->new_layout); | ||
1137 | mddev->new_chunk = le32_to_cpu(sb->new_chunk)<<9; | ||
1138 | } else { | ||
1139 | mddev->reshape_position = MaxSector; | ||
1140 | mddev->delta_disks = 0; | ||
1141 | mddev->new_level = mddev->level; | ||
1142 | mddev->new_layout = mddev->layout; | ||
1143 | mddev->new_chunk = mddev->chunk_size; | ||
1144 | } | ||
1145 | |||
1107 | } else if (mddev->pers == NULL) { | 1146 | } else if (mddev->pers == NULL) { |
1108 | /* Insist of good event counter while assembling */ | 1147 | /* Insist of good event counter while assembling */ |
1109 | __u64 ev1 = le64_to_cpu(sb->events); | 1148 | __u64 ev1 = le64_to_cpu(sb->events); |
@@ -1175,6 +1214,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) | |||
1175 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); | 1214 | sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); |
1176 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); | 1215 | sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET); |
1177 | } | 1216 | } |
1217 | if (mddev->reshape_position != MaxSector) { | ||
1218 | sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); | ||
1219 | sb->reshape_position = cpu_to_le64(mddev->reshape_position); | ||
1220 | sb->new_layout = cpu_to_le32(mddev->new_layout); | ||
1221 | sb->delta_disks = cpu_to_le32(mddev->delta_disks); | ||
1222 | sb->new_level = cpu_to_le32(mddev->new_level); | ||
1223 | sb->new_chunk = cpu_to_le32(mddev->new_chunk>>9); | ||
1224 | } | ||
1178 | 1225 | ||
1179 | max_dev = 0; | 1226 | max_dev = 0; |
1180 | ITERATE_RDEV(mddev,rdev2,tmp) | 1227 | ITERATE_RDEV(mddev,rdev2,tmp) |
@@ -1497,7 +1544,7 @@ static void sync_sbs(mddev_t * mddev) | |||
1497 | } | 1544 | } |
1498 | } | 1545 | } |
1499 | 1546 | ||
1500 | static void md_update_sb(mddev_t * mddev) | 1547 | void md_update_sb(mddev_t * mddev) |
1501 | { | 1548 | { |
1502 | int err; | 1549 | int err; |
1503 | struct list_head *tmp; | 1550 | struct list_head *tmp; |
@@ -1574,6 +1621,7 @@ repeat: | |||
1574 | wake_up(&mddev->sb_wait); | 1621 | wake_up(&mddev->sb_wait); |
1575 | 1622 | ||
1576 | } | 1623 | } |
1624 | EXPORT_SYMBOL_GPL(md_update_sb); | ||
1577 | 1625 | ||
1578 | /* words written to sysfs files may, or my not, be \n terminated. | 1626 | /* words written to sysfs files may, or my not, be \n terminated. |
1579 | * We want to accept with case. For this we use cmd_match. | 1627 | * We want to accept with case. For this we use cmd_match. |
@@ -2545,6 +2593,14 @@ static int do_md_run(mddev_t * mddev) | |||
2545 | mddev->level = pers->level; | 2593 | mddev->level = pers->level; |
2546 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); | 2594 | strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel)); |
2547 | 2595 | ||
2596 | if (mddev->reshape_position != MaxSector && | ||
2597 | pers->reshape == NULL) { | ||
2598 | /* This personality cannot handle reshaping... */ | ||
2599 | mddev->pers = NULL; | ||
2600 | module_put(pers->owner); | ||
2601 | return -EINVAL; | ||
2602 | } | ||
2603 | |||
2548 | mddev->recovery = 0; | 2604 | mddev->recovery = 0; |
2549 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ | 2605 | mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */ |
2550 | mddev->barriers_work = 1; | 2606 | mddev->barriers_work = 1; |
@@ -3433,11 +3489,18 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) | |||
3433 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; | 3489 | mddev->default_bitmap_offset = MD_SB_BYTES >> 9; |
3434 | mddev->bitmap_offset = 0; | 3490 | mddev->bitmap_offset = 0; |
3435 | 3491 | ||
3492 | mddev->reshape_position = MaxSector; | ||
3493 | |||
3436 | /* | 3494 | /* |
3437 | * Generate a 128 bit UUID | 3495 | * Generate a 128 bit UUID |
3438 | */ | 3496 | */ |
3439 | get_random_bytes(mddev->uuid, 16); | 3497 | get_random_bytes(mddev->uuid, 16); |
3440 | 3498 | ||
3499 | mddev->new_level = mddev->level; | ||
3500 | mddev->new_chunk = mddev->chunk_size; | ||
3501 | mddev->new_layout = mddev->layout; | ||
3502 | mddev->delta_disks = 0; | ||
3503 | |||
3441 | return 0; | 3504 | return 0; |
3442 | } | 3505 | } |
3443 | 3506 | ||
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 5d88329e3c7a..b65b8cfbdf30 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c | |||
@@ -1789,6 +1789,11 @@ static int run(mddev_t *mddev) | |||
1789 | mdname(mddev), mddev->level); | 1789 | mdname(mddev), mddev->level); |
1790 | goto out; | 1790 | goto out; |
1791 | } | 1791 | } |
1792 | if (mddev->reshape_position != MaxSector) { | ||
1793 | printk("raid1: %s: reshape_position set but not supported\n", | ||
1794 | mdname(mddev)); | ||
1795 | goto out; | ||
1796 | } | ||
1792 | /* | 1797 | /* |
1793 | * copy the already verified devices into our private RAID1 | 1798 | * copy the already verified devices into our private RAID1 |
1794 | * bookkeeping area. [whatever we allocate in run(), | 1799 | * bookkeeping area. [whatever we allocate in run(), |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b29135acb1d9..20ae32d67e21 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/raid/raid5.h> | 22 | #include <linux/raid/raid5.h> |
23 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
24 | #include <linux/bitops.h> | 24 | #include <linux/bitops.h> |
25 | #include <linux/kthread.h> | ||
25 | #include <asm/atomic.h> | 26 | #include <asm/atomic.h> |
26 | 27 | ||
27 | #include <linux/raid/bitmap.h> | 28 | #include <linux/raid/bitmap.h> |
@@ -1504,6 +1505,7 @@ static void handle_stripe(struct stripe_head *sh) | |||
1504 | clear_bit(STRIPE_EXPANDING, &sh->state); | 1505 | clear_bit(STRIPE_EXPANDING, &sh->state); |
1505 | } else if (expanded) { | 1506 | } else if (expanded) { |
1506 | clear_bit(STRIPE_EXPAND_READY, &sh->state); | 1507 | clear_bit(STRIPE_EXPAND_READY, &sh->state); |
1508 | atomic_dec(&conf->reshape_stripes); | ||
1507 | wake_up(&conf->wait_for_overlap); | 1509 | wake_up(&conf->wait_for_overlap); |
1508 | md_done_sync(conf->mddev, STRIPE_SECTORS, 1); | 1510 | md_done_sync(conf->mddev, STRIPE_SECTORS, 1); |
1509 | } | 1511 | } |
@@ -1875,6 +1877,26 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1875 | */ | 1877 | */ |
1876 | int i; | 1878 | int i; |
1877 | int dd_idx; | 1879 | int dd_idx; |
1880 | |||
1881 | if (sector_nr == 0 && | ||
1882 | conf->expand_progress != 0) { | ||
1883 | /* restarting in the middle, skip the initial sectors */ | ||
1884 | sector_nr = conf->expand_progress; | ||
1885 | sector_div(sector_nr, conf->raid_disks-1); | ||
1886 | *skipped = 1; | ||
1887 | return sector_nr; | ||
1888 | } | ||
1889 | |||
1890 | /* Cannot proceed until we've updated the superblock... */ | ||
1891 | wait_event(conf->wait_for_overlap, | ||
1892 | atomic_read(&conf->reshape_stripes)==0); | ||
1893 | mddev->reshape_position = conf->expand_progress; | ||
1894 | |||
1895 | mddev->sb_dirty = 1; | ||
1896 | md_wakeup_thread(mddev->thread); | ||
1897 | wait_event(mddev->sb_wait, mddev->sb_dirty == 0 || | ||
1898 | kthread_should_stop()); | ||
1899 | |||
1878 | for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) { | 1900 | for (i=0; i < conf->chunk_size/512; i+= STRIPE_SECTORS) { |
1879 | int j; | 1901 | int j; |
1880 | int skipped = 0; | 1902 | int skipped = 0; |
@@ -1882,6 +1904,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i | |||
1882 | sh = get_active_stripe(conf, sector_nr+i, | 1904 | sh = get_active_stripe(conf, sector_nr+i, |
1883 | conf->raid_disks, pd_idx, 0); | 1905 | conf->raid_disks, pd_idx, 0); |
1884 | set_bit(STRIPE_EXPANDING, &sh->state); | 1906 | set_bit(STRIPE_EXPANDING, &sh->state); |
1907 | atomic_inc(&conf->reshape_stripes); | ||
1885 | /* If any of this stripe is beyond the end of the old | 1908 | /* If any of this stripe is beyond the end of the old |
1886 | * array, then we need to zero those blocks | 1909 | * array, then we need to zero those blocks |
1887 | */ | 1910 | */ |
@@ -2121,10 +2144,61 @@ static int run(mddev_t *mddev) | |||
2121 | return -EIO; | 2144 | return -EIO; |
2122 | } | 2145 | } |
2123 | 2146 | ||
2147 | if (mddev->reshape_position != MaxSector) { | ||
2148 | /* Check that we can continue the reshape. | ||
2149 | * Currently only disks can change, it must | ||
2150 | * increase, and we must be past the point where | ||
2151 | * a stripe over-writes itself | ||
2152 | */ | ||
2153 | sector_t here_new, here_old; | ||
2154 | int old_disks; | ||
2155 | |||
2156 | if (mddev->new_level != mddev->level || | ||
2157 | mddev->new_layout != mddev->layout || | ||
2158 | mddev->new_chunk != mddev->chunk_size) { | ||
2159 | printk(KERN_ERR "raid5: %s: unsupported reshape required - aborting.\n", | ||
2160 | mdname(mddev)); | ||
2161 | return -EINVAL; | ||
2162 | } | ||
2163 | if (mddev->delta_disks <= 0) { | ||
2164 | printk(KERN_ERR "raid5: %s: unsupported reshape (reduce disks) required - aborting.\n", | ||
2165 | mdname(mddev)); | ||
2166 | return -EINVAL; | ||
2167 | } | ||
2168 | old_disks = mddev->raid_disks - mddev->delta_disks; | ||
2169 | /* reshape_position must be on a new-stripe boundary, and one | ||
2170 | * further up in new geometry must map after here in old geometry. | ||
2171 | */ | ||
2172 | here_new = mddev->reshape_position; | ||
2173 | if (sector_div(here_new, (mddev->chunk_size>>9)*(mddev->raid_disks-1))) { | ||
2174 | printk(KERN_ERR "raid5: reshape_position not on a stripe boundary\n"); | ||
2175 | return -EINVAL; | ||
2176 | } | ||
2177 | /* here_new is the stripe we will write to */ | ||
2178 | here_old = mddev->reshape_position; | ||
2179 | sector_div(here_old, (mddev->chunk_size>>9)*(old_disks-1)); | ||
2180 | /* here_old is the first stripe that we might need to read from */ | ||
2181 | if (here_new >= here_old) { | ||
2182 | /* Reading from the same stripe as writing to - bad */ | ||
2183 | printk(KERN_ERR "raid5: reshape_position too early for auto-recovery - aborting.\n"); | ||
2184 | return -EINVAL; | ||
2185 | } | ||
2186 | printk(KERN_INFO "raid5: reshape will continue\n"); | ||
2187 | /* OK, we should be able to continue; */ | ||
2188 | } | ||
2189 | |||
2190 | |||
2124 | mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL); | 2191 | mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL); |
2125 | if ((conf = mddev->private) == NULL) | 2192 | if ((conf = mddev->private) == NULL) |
2126 | goto abort; | 2193 | goto abort; |
2127 | conf->disks = kzalloc(mddev->raid_disks * sizeof(struct disk_info), | 2194 | if (mddev->reshape_position == MaxSector) { |
2195 | conf->previous_raid_disks = conf->raid_disks = mddev->raid_disks; | ||
2196 | } else { | ||
2197 | conf->raid_disks = mddev->raid_disks; | ||
2198 | conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; | ||
2199 | } | ||
2200 | |||
2201 | conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info), | ||
2128 | GFP_KERNEL); | 2202 | GFP_KERNEL); |
2129 | if (!conf->disks) | 2203 | if (!conf->disks) |
2130 | goto abort; | 2204 | goto abort; |
@@ -2148,7 +2222,7 @@ static int run(mddev_t *mddev) | |||
2148 | 2222 | ||
2149 | ITERATE_RDEV(mddev,rdev,tmp) { | 2223 | ITERATE_RDEV(mddev,rdev,tmp) { |
2150 | raid_disk = rdev->raid_disk; | 2224 | raid_disk = rdev->raid_disk; |
2151 | if (raid_disk >= mddev->raid_disks | 2225 | if (raid_disk >= conf->raid_disks |
2152 | || raid_disk < 0) | 2226 | || raid_disk < 0) |
2153 | continue; | 2227 | continue; |
2154 | disk = conf->disks + raid_disk; | 2228 | disk = conf->disks + raid_disk; |
@@ -2164,7 +2238,6 @@ static int run(mddev_t *mddev) | |||
2164 | } | 2238 | } |
2165 | } | 2239 | } |
2166 | 2240 | ||
2167 | conf->raid_disks = mddev->raid_disks; | ||
2168 | /* | 2241 | /* |
2169 | * 0 for a fully functional array, 1 for a degraded array. | 2242 | * 0 for a fully functional array, 1 for a degraded array. |
2170 | */ | 2243 | */ |
@@ -2174,7 +2247,7 @@ static int run(mddev_t *mddev) | |||
2174 | conf->level = mddev->level; | 2247 | conf->level = mddev->level; |
2175 | conf->algorithm = mddev->layout; | 2248 | conf->algorithm = mddev->layout; |
2176 | conf->max_nr_stripes = NR_STRIPES; | 2249 | conf->max_nr_stripes = NR_STRIPES; |
2177 | conf->expand_progress = MaxSector; | 2250 | conf->expand_progress = mddev->reshape_position; |
2178 | 2251 | ||
2179 | /* device size must be a multiple of chunk size */ | 2252 | /* device size must be a multiple of chunk size */ |
2180 | mddev->size &= ~(mddev->chunk_size/1024 -1); | 2253 | mddev->size &= ~(mddev->chunk_size/1024 -1); |
@@ -2247,6 +2320,20 @@ static int run(mddev_t *mddev) | |||
2247 | 2320 | ||
2248 | print_raid5_conf(conf); | 2321 | print_raid5_conf(conf); |
2249 | 2322 | ||
2323 | if (conf->expand_progress != MaxSector) { | ||
2324 | printk("...ok start reshape thread\n"); | ||
2325 | atomic_set(&conf->reshape_stripes, 0); | ||
2326 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | ||
2327 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | ||
2328 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | ||
2329 | set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); | ||
2330 | mddev->sync_thread = md_register_thread(md_do_sync, mddev, | ||
2331 | "%s_reshape"); | ||
2332 | /* FIXME if md_register_thread fails?? */ | ||
2333 | md_wakeup_thread(mddev->sync_thread); | ||
2334 | |||
2335 | } | ||
2336 | |||
2250 | /* read-ahead size must cover two whole stripes, which is | 2337 | /* read-ahead size must cover two whole stripes, which is |
2251 | * 2 * (n-1) * chunksize where 'n' is the number of raid devices | 2338 | * 2 * (n-1) * chunksize where 'n' is the number of raid devices |
2252 | */ | 2339 | */ |
@@ -2262,8 +2349,8 @@ static int run(mddev_t *mddev) | |||
2262 | 2349 | ||
2263 | mddev->queue->unplug_fn = raid5_unplug_device; | 2350 | mddev->queue->unplug_fn = raid5_unplug_device; |
2264 | mddev->queue->issue_flush_fn = raid5_issue_flush; | 2351 | mddev->queue->issue_flush_fn = raid5_issue_flush; |
2352 | mddev->array_size = mddev->size * (conf->previous_raid_disks - 1); | ||
2265 | 2353 | ||
2266 | mddev->array_size = mddev->size * (mddev->raid_disks - 1); | ||
2267 | return 0; | 2354 | return 0; |
2268 | abort: | 2355 | abort: |
2269 | if (conf) { | 2356 | if (conf) { |
@@ -2436,7 +2523,7 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | |||
2436 | /* | 2523 | /* |
2437 | * find the disk ... | 2524 | * find the disk ... |
2438 | */ | 2525 | */ |
2439 | for (disk=0; disk < mddev->raid_disks; disk++) | 2526 | for (disk=0; disk < conf->raid_disks; disk++) |
2440 | if ((p=conf->disks + disk)->rdev == NULL) { | 2527 | if ((p=conf->disks + disk)->rdev == NULL) { |
2441 | clear_bit(In_sync, &rdev->flags); | 2528 | clear_bit(In_sync, &rdev->flags); |
2442 | rdev->raid_disk = disk; | 2529 | rdev->raid_disk = disk; |
@@ -2518,9 +2605,10 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks) | |||
2518 | if (err) | 2605 | if (err) |
2519 | return err; | 2606 | return err; |
2520 | 2607 | ||
2608 | atomic_set(&conf->reshape_stripes, 0); | ||
2521 | spin_lock_irq(&conf->device_lock); | 2609 | spin_lock_irq(&conf->device_lock); |
2522 | conf->previous_raid_disks = conf->raid_disks; | 2610 | conf->previous_raid_disks = conf->raid_disks; |
2523 | mddev->raid_disks = conf->raid_disks = raid_disks; | 2611 | conf->raid_disks = raid_disks; |
2524 | conf->expand_progress = 0; | 2612 | conf->expand_progress = 0; |
2525 | spin_unlock_irq(&conf->device_lock); | 2613 | spin_unlock_irq(&conf->device_lock); |
2526 | 2614 | ||
@@ -2542,6 +2630,14 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks) | |||
2542 | } | 2630 | } |
2543 | 2631 | ||
2544 | mddev->degraded = (raid_disks - conf->previous_raid_disks) - added_devices; | 2632 | mddev->degraded = (raid_disks - conf->previous_raid_disks) - added_devices; |
2633 | mddev->new_chunk = mddev->chunk_size; | ||
2634 | mddev->new_layout = mddev->layout; | ||
2635 | mddev->new_level = mddev->level; | ||
2636 | mddev->raid_disks = raid_disks; | ||
2637 | mddev->delta_disks = raid_disks - conf->previous_raid_disks; | ||
2638 | mddev->reshape_position = 0; | ||
2639 | mddev->sb_dirty = 1; | ||
2640 | |||
2545 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); | 2641 | clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); |
2546 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); | 2642 | clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); |
2547 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); | 2643 | set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); |
@@ -2552,6 +2648,7 @@ static int raid5_reshape(mddev_t *mddev, int raid_disks) | |||
2552 | mddev->recovery = 0; | 2648 | mddev->recovery = 0; |
2553 | spin_lock_irq(&conf->device_lock); | 2649 | spin_lock_irq(&conf->device_lock); |
2554 | mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; | 2650 | mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks; |
2651 | mddev->delta_disks = 0; | ||
2555 | conf->expand_progress = MaxSector; | 2652 | conf->expand_progress = MaxSector; |
2556 | spin_unlock_irq(&conf->device_lock); | 2653 | spin_unlock_irq(&conf->device_lock); |
2557 | return -EAGAIN; | 2654 | return -EAGAIN; |
@@ -2566,20 +2663,23 @@ static void end_reshape(raid5_conf_t *conf) | |||
2566 | { | 2663 | { |
2567 | struct block_device *bdev; | 2664 | struct block_device *bdev; |
2568 | 2665 | ||
2569 | conf->mddev->array_size = conf->mddev->size * (conf->mddev->raid_disks-1); | 2666 | if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { |
2570 | set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1); | 2667 | conf->mddev->array_size = conf->mddev->size * (conf->raid_disks-1); |
2571 | conf->mddev->changed = 1; | 2668 | set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1); |
2572 | 2669 | conf->mddev->changed = 1; | |
2573 | bdev = bdget_disk(conf->mddev->gendisk, 0); | 2670 | |
2574 | if (bdev) { | 2671 | bdev = bdget_disk(conf->mddev->gendisk, 0); |
2575 | mutex_lock(&bdev->bd_inode->i_mutex); | 2672 | if (bdev) { |
2576 | i_size_write(bdev->bd_inode, conf->mddev->array_size << 10); | 2673 | mutex_lock(&bdev->bd_inode->i_mutex); |
2577 | mutex_unlock(&bdev->bd_inode->i_mutex); | 2674 | i_size_write(bdev->bd_inode, conf->mddev->array_size << 10); |
2578 | bdput(bdev); | 2675 | mutex_unlock(&bdev->bd_inode->i_mutex); |
2676 | bdput(bdev); | ||
2677 | } | ||
2678 | spin_lock_irq(&conf->device_lock); | ||
2679 | conf->expand_progress = MaxSector; | ||
2680 | spin_unlock_irq(&conf->device_lock); | ||
2681 | conf->mddev->reshape_position = MaxSector; | ||
2579 | } | 2682 | } |
2580 | spin_lock_irq(&conf->device_lock); | ||
2581 | conf->expand_progress = MaxSector; | ||
2582 | spin_unlock_irq(&conf->device_lock); | ||
2583 | } | 2683 | } |
2584 | 2684 | ||
2585 | static void raid5_quiesce(mddev_t *mddev, int state) | 2685 | static void raid5_quiesce(mddev_t *mddev, int state) |
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index 9c77cde5a795..66b44e5e0d6e 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h | |||
@@ -95,6 +95,8 @@ extern int sync_page_io(struct block_device *bdev, sector_t sector, int size, | |||
95 | extern void md_do_sync(mddev_t *mddev); | 95 | extern void md_do_sync(mddev_t *mddev); |
96 | extern void md_new_event(mddev_t *mddev); | 96 | extern void md_new_event(mddev_t *mddev); |
97 | 97 | ||
98 | extern void md_update_sb(mddev_t * mddev); | ||
99 | |||
98 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } | 100 | #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } |
99 | 101 | ||
100 | #endif | 102 | #endif |
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 4e26ef2cacca..1a6f9f2f6282 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h | |||
@@ -132,6 +132,14 @@ struct mddev_s | |||
132 | 132 | ||
133 | char uuid[16]; | 133 | char uuid[16]; |
134 | 134 | ||
135 | /* If the array is being reshaped, we need to record the | ||
136 | * new shape and an indication of where we are up to. | ||
137 | * This is written to the superblock. | ||
138 | * If reshape_position is MaxSector, then no reshape is happening (yet). | ||
139 | */ | ||
140 | sector_t reshape_position; | ||
141 | int delta_disks, new_level, new_layout, new_chunk; | ||
142 | |||
135 | struct mdk_thread_s *thread; /* management thread */ | 143 | struct mdk_thread_s *thread; /* management thread */ |
136 | struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ | 144 | struct mdk_thread_s *sync_thread; /* doing resync or reconstruct */ |
137 | sector_t curr_resync; /* blocks scheduled */ | 145 | sector_t curr_resync; /* blocks scheduled */ |
diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index c100fa5d4bfa..774e1acfb8c4 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h | |||
@@ -102,6 +102,18 @@ typedef struct mdp_device_descriptor_s { | |||
102 | #define MD_SB_ERRORS 1 | 102 | #define MD_SB_ERRORS 1 |
103 | 103 | ||
104 | #define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */ | 104 | #define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */ |
105 | |||
106 | /* | ||
107 | * Notes: | ||
108 | * - if an array is being reshaped (restriped) in order to change the | ||
109 | * the number of active devices in the array, 'raid_disks' will be | ||
110 | * the larger of the old and new numbers. 'delta_disks' will | ||
111 | * be the "new - old". So if +ve, raid_disks is the new value, and | ||
112 | * "raid_disks-delta_disks" is the old. If -ve, raid_disks is the | ||
113 | * old value and "raid_disks+delta_disks" is the new (smaller) value. | ||
114 | */ | ||
115 | |||
116 | |||
105 | typedef struct mdp_superblock_s { | 117 | typedef struct mdp_superblock_s { |
106 | /* | 118 | /* |
107 | * Constant generic information | 119 | * Constant generic information |
@@ -146,7 +158,13 @@ typedef struct mdp_superblock_s { | |||
146 | __u32 cp_events_hi; /* 10 high-order of checkpoint update count */ | 158 | __u32 cp_events_hi; /* 10 high-order of checkpoint update count */ |
147 | #endif | 159 | #endif |
148 | __u32 recovery_cp; /* 11 recovery checkpoint sector count */ | 160 | __u32 recovery_cp; /* 11 recovery checkpoint sector count */ |
149 | __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 12]; | 161 | /* There are only valid for minor_version > 90 */ |
162 | __u64 reshape_position; /* 12,13 next address in array-space for reshape */ | ||
163 | __u32 new_level; /* 14 new level we are reshaping to */ | ||
164 | __u32 delta_disks; /* 15 change in number of raid_disks */ | ||
165 | __u32 new_layout; /* 16 new layout */ | ||
166 | __u32 new_chunk; /* 17 new chunk size (bytes) */ | ||
167 | __u32 gstate_sreserved[MD_SB_GENERIC_STATE_WORDS - 18]; | ||
150 | 168 | ||
151 | /* | 169 | /* |
152 | * Personality information | 170 | * Personality information |
@@ -207,7 +225,14 @@ struct mdp_superblock_1 { | |||
207 | * NOTE: signed, so bitmap can be before superblock | 225 | * NOTE: signed, so bitmap can be before superblock |
208 | * only meaningful of feature_map[0] is set. | 226 | * only meaningful of feature_map[0] is set. |
209 | */ | 227 | */ |
210 | __u8 pad1[128-100]; /* set to 0 when written */ | 228 | |
229 | /* These are only valid with feature bit '4' */ | ||
230 | __u64 reshape_position; /* next address in array-space for reshape */ | ||
231 | __u32 new_level; /* new level we are reshaping to */ | ||
232 | __u32 delta_disks; /* change in number of raid_disks */ | ||
233 | __u32 new_layout; /* new layout */ | ||
234 | __u32 new_chunk; /* new chunk size (bytes) */ | ||
235 | __u8 pad1[128-124]; /* set to 0 when written */ | ||
211 | 236 | ||
212 | /* constant this-device information - 64 bytes */ | 237 | /* constant this-device information - 64 bytes */ |
213 | __u64 data_offset; /* sector start of data, often 0 */ | 238 | __u64 data_offset; /* sector start of data, often 0 */ |
@@ -240,8 +265,9 @@ struct mdp_superblock_1 { | |||
240 | 265 | ||
241 | /* feature_map bits */ | 266 | /* feature_map bits */ |
242 | #define MD_FEATURE_BITMAP_OFFSET 1 | 267 | #define MD_FEATURE_BITMAP_OFFSET 1 |
268 | #define MD_FEATURE_RESHAPE_ACTIVE 4 | ||
243 | 269 | ||
244 | #define MD_FEATURE_ALL 1 | 270 | #define MD_FEATURE_ALL 5 |
245 | 271 | ||
246 | #endif | 272 | #endif |
247 | 273 | ||
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 55c738d50508..abcdf0d0658a 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h | |||
@@ -224,6 +224,7 @@ struct raid5_private_data { | |||
224 | struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ | 224 | struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ |
225 | atomic_t preread_active_stripes; /* stripes with scheduled io */ | 225 | atomic_t preread_active_stripes; /* stripes with scheduled io */ |
226 | 226 | ||
227 | atomic_t reshape_stripes; /* stripes with pending writes for reshape */ | ||
227 | /* unfortunately we need two cache names as we temporarily have | 228 | /* unfortunately we need two cache names as we temporarily have |
228 | * two caches. | 229 | * two caches. |
229 | */ | 230 | */ |