diff options
author | NeilBrown <neilb@suse.de> | 2007-05-09 05:35:38 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-09 15:30:57 -0400 |
commit | 08a02ecd28bad35a47357e611044dcbeab06e3d7 (patch) | |
tree | 31050d686e58d55411f95c89204321aa2ea2b63a | |
parent | 42b9bebe3fea3d3ce381bc6735a3fb50e6613f06 (diff) |
md: allow reshape_position for md arrays to be set via sysfs
"reshape_position" records how much progress has been made on a "reshape"
(adding drives, changing layout or chunksize).
When it is set, the number of drives, layout and chunksize can have
two possible values, an old an a new.
So allow these different values to be visible, and allow both old and new to
be set: Set the old ones first, then the reshape_position, then the new
values.
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | Documentation/md.txt | 72 | ||||
-rw-r--r-- | drivers/md/md.c | 70 |
2 files changed, 107 insertions, 35 deletions
diff --git a/Documentation/md.txt b/Documentation/md.txt index 2202f5dc8ac2..5818628207b5 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -178,6 +178,21 @@ All md devices contain: | |||
178 | The size should be at least PAGE_SIZE (4k) and should be a power | 178 | The size should be at least PAGE_SIZE (4k) and should be a power |
179 | of 2. This can only be set while assembling an array | 179 | of 2. This can only be set while assembling an array |
180 | 180 | ||
181 | layout | ||
182 | The "layout" for the array for the particular level. This is | ||
183 | simply a number that is interpretted differently by different | ||
184 | levels. It can be written while assembling an array. | ||
185 | |||
186 | reshape_position | ||
187 | This is either "none" or a sector number within the devices of | ||
188 | the array where "reshape" is up to. If this is set, the three | ||
189 | attributes mentioned above (raid_disks, chunk_size, layout) can | ||
190 | potentially have 2 values, an old and a new value. If these | ||
191 | values differ, reading the attribute returns | ||
192 | new (old) | ||
193 | and writing will effect the 'new' value, leaving the 'old' | ||
194 | unchanged. | ||
195 | |||
181 | component_size | 196 | component_size |
182 | For arrays with data redundancy (i.e. not raid0, linear, faulty, | 197 | For arrays with data redundancy (i.e. not raid0, linear, faulty, |
183 | multipath), all components must be the same size - or at least | 198 | multipath), all components must be the same size - or at least |
@@ -193,11 +208,6 @@ All md devices contain: | |||
193 | 1.2 (newer format in varying locations) or "none" indicating that | 208 | 1.2 (newer format in varying locations) or "none" indicating that |
194 | the kernel isn't managing metadata at all. | 209 | the kernel isn't managing metadata at all. |
195 | 210 | ||
196 | layout | ||
197 | The "layout" for the array for the particular level. This is | ||
198 | simply a number that is interpretted differently by different | ||
199 | levels. It can be written while assembling an array. | ||
200 | |||
201 | resync_start | 211 | resync_start |
202 | The point at which resync should start. If no resync is needed, | 212 | The point at which resync should start. If no resync is needed, |
203 | this will be a very large number. At array creation it will | 213 | this will be a very large number. At array creation it will |
@@ -259,29 +269,6 @@ All md devices contain: | |||
259 | like active, but no writes have been seen for a while (safe_mode_delay). | 269 | like active, but no writes have been seen for a while (safe_mode_delay). |
260 | 270 | ||
261 | 271 | ||
262 | sync_speed_min | ||
263 | sync_speed_max | ||
264 | This are similar to /proc/sys/dev/raid/speed_limit_{min,max} | ||
265 | however they only apply to the particular array. | ||
266 | If no value has been written to these, of if the word 'system' | ||
267 | is written, then the system-wide value is used. If a value, | ||
268 | in kibibytes-per-second is written, then it is used. | ||
269 | When the files are read, they show the currently active value | ||
270 | followed by "(local)" or "(system)" depending on whether it is | ||
271 | a locally set or system-wide value. | ||
272 | |||
273 | sync_completed | ||
274 | This shows the number of sectors that have been completed of | ||
275 | whatever the current sync_action is, followed by the number of | ||
276 | sectors in total that could need to be processed. The two | ||
277 | numbers are separated by a '/' thus effectively showing one | ||
278 | value, a fraction of the process that is complete. | ||
279 | |||
280 | sync_speed | ||
281 | This shows the current actual speed, in K/sec, of the current | ||
282 | sync_action. It is averaged over the last 30 seconds. | ||
283 | |||
284 | |||
285 | As component devices are added to an md array, they appear in the 'md' | 272 | As component devices are added to an md array, they appear in the 'md' |
286 | directory as new directories named | 273 | directory as new directories named |
287 | dev-XXX | 274 | dev-XXX |
@@ -412,6 +399,35 @@ also have | |||
412 | Note that the numbers are 'bit' numbers, not 'block' numbers. | 399 | Note that the numbers are 'bit' numbers, not 'block' numbers. |
413 | They should be scaled by the bitmap_chunksize. | 400 | They should be scaled by the bitmap_chunksize. |
414 | 401 | ||
402 | sync_speed_min | ||
403 | sync_speed_max | ||
404 | This are similar to /proc/sys/dev/raid/speed_limit_{min,max} | ||
405 | however they only apply to the particular array. | ||
406 | If no value has been written to these, of if the word 'system' | ||
407 | is written, then the system-wide value is used. If a value, | ||
408 | in kibibytes-per-second is written, then it is used. | ||
409 | When the files are read, they show the currently active value | ||
410 | followed by "(local)" or "(system)" depending on whether it is | ||
411 | a locally set or system-wide value. | ||
412 | |||
413 | sync_completed | ||
414 | This shows the number of sectors that have been completed of | ||
415 | whatever the current sync_action is, followed by the number of | ||
416 | sectors in total that could need to be processed. The two | ||
417 | numbers are separated by a '/' thus effectively showing one | ||
418 | value, a fraction of the process that is complete. | ||
419 | |||
420 | sync_speed | ||
421 | This shows the current actual speed, in K/sec, of the current | ||
422 | sync_action. It is averaged over the last 30 seconds. | ||
423 | |||
424 | suspend_lo | ||
425 | suspend_hi | ||
426 | The two values, given as numbers of sectors, indicate a range | ||
427 | within the array where IO will be blocked. This is currently | ||
428 | only supported for raid4/5/6. | ||
429 | |||
430 | |||
415 | Each active md device may also have attributes specific to the | 431 | Each active md device may also have attributes specific to the |
416 | personality module that manages it. | 432 | personality module that manages it. |
417 | These are specific to the implementation of the module and could | 433 | These are specific to the implementation of the module and could |
diff --git a/drivers/md/md.c b/drivers/md/md.c index bad324171cad..65814b0340cb 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -274,6 +274,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
274 | atomic_set(&new->active, 1); | 274 | atomic_set(&new->active, 1); |
275 | spin_lock_init(&new->write_lock); | 275 | spin_lock_init(&new->write_lock); |
276 | init_waitqueue_head(&new->sb_wait); | 276 | init_waitqueue_head(&new->sb_wait); |
277 | new->reshape_position = MaxSector; | ||
277 | 278 | ||
278 | new->queue = blk_alloc_queue(GFP_KERNEL); | 279 | new->queue = blk_alloc_queue(GFP_KERNEL); |
279 | if (!new->queue) { | 280 | if (!new->queue) { |
@@ -2242,6 +2243,10 @@ static ssize_t | |||
2242 | layout_show(mddev_t *mddev, char *page) | 2243 | layout_show(mddev_t *mddev, char *page) |
2243 | { | 2244 | { |
2244 | /* just a number, not meaningful for all levels */ | 2245 | /* just a number, not meaningful for all levels */ |
2246 | if (mddev->reshape_position != MaxSector && | ||
2247 | mddev->layout != mddev->new_layout) | ||
2248 | return sprintf(page, "%d (%d)\n", | ||
2249 | mddev->new_layout, mddev->layout); | ||
2245 | return sprintf(page, "%d\n", mddev->layout); | 2250 | return sprintf(page, "%d\n", mddev->layout); |
2246 | } | 2251 | } |
2247 | 2252 | ||
@@ -2250,13 +2255,16 @@ layout_store(mddev_t *mddev, const char *buf, size_t len) | |||
2250 | { | 2255 | { |
2251 | char *e; | 2256 | char *e; |
2252 | unsigned long n = simple_strtoul(buf, &e, 10); | 2257 | unsigned long n = simple_strtoul(buf, &e, 10); |
2253 | if (mddev->pers) | ||
2254 | return -EBUSY; | ||
2255 | 2258 | ||
2256 | if (!*buf || (*e && *e != '\n')) | 2259 | if (!*buf || (*e && *e != '\n')) |
2257 | return -EINVAL; | 2260 | return -EINVAL; |
2258 | 2261 | ||
2259 | mddev->layout = n; | 2262 | if (mddev->pers) |
2263 | return -EBUSY; | ||
2264 | if (mddev->reshape_position != MaxSector) | ||
2265 | mddev->new_layout = n; | ||
2266 | else | ||
2267 | mddev->layout = n; | ||
2260 | return len; | 2268 | return len; |
2261 | } | 2269 | } |
2262 | static struct md_sysfs_entry md_layout = | 2270 | static struct md_sysfs_entry md_layout = |
@@ -2268,6 +2276,10 @@ raid_disks_show(mddev_t *mddev, char *page) | |||
2268 | { | 2276 | { |
2269 | if (mddev->raid_disks == 0) | 2277 | if (mddev->raid_disks == 0) |
2270 | return 0; | 2278 | return 0; |
2279 | if (mddev->reshape_position != MaxSector && | ||
2280 | mddev->delta_disks != 0) | ||
2281 | return sprintf(page, "%d (%d)\n", mddev->raid_disks, | ||
2282 | mddev->raid_disks - mddev->delta_disks); | ||
2271 | return sprintf(page, "%d\n", mddev->raid_disks); | 2283 | return sprintf(page, "%d\n", mddev->raid_disks); |
2272 | } | 2284 | } |
2273 | 2285 | ||
@@ -2285,7 +2297,11 @@ raid_disks_store(mddev_t *mddev, const char *buf, size_t len) | |||
2285 | 2297 | ||
2286 | if (mddev->pers) | 2298 | if (mddev->pers) |
2287 | rv = update_raid_disks(mddev, n); | 2299 | rv = update_raid_disks(mddev, n); |
2288 | else | 2300 | else if (mddev->reshape_position != MaxSector) { |
2301 | int olddisks = mddev->raid_disks - mddev->delta_disks; | ||
2302 | mddev->delta_disks = n - olddisks; | ||
2303 | mddev->raid_disks = n; | ||
2304 | } else | ||
2289 | mddev->raid_disks = n; | 2305 | mddev->raid_disks = n; |
2290 | return rv ? rv : len; | 2306 | return rv ? rv : len; |
2291 | } | 2307 | } |
@@ -2295,6 +2311,10 @@ __ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store); | |||
2295 | static ssize_t | 2311 | static ssize_t |
2296 | chunk_size_show(mddev_t *mddev, char *page) | 2312 | chunk_size_show(mddev_t *mddev, char *page) |
2297 | { | 2313 | { |
2314 | if (mddev->reshape_position != MaxSector && | ||
2315 | mddev->chunk_size != mddev->new_chunk) | ||
2316 | return sprintf(page, "%d (%d)\n", mddev->new_chunk, | ||
2317 | mddev->chunk_size); | ||
2298 | return sprintf(page, "%d\n", mddev->chunk_size); | 2318 | return sprintf(page, "%d\n", mddev->chunk_size); |
2299 | } | 2319 | } |
2300 | 2320 | ||
@@ -2305,12 +2325,15 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) | |||
2305 | char *e; | 2325 | char *e; |
2306 | unsigned long n = simple_strtoul(buf, &e, 10); | 2326 | unsigned long n = simple_strtoul(buf, &e, 10); |
2307 | 2327 | ||
2308 | if (mddev->pers) | ||
2309 | return -EBUSY; | ||
2310 | if (!*buf || (*e && *e != '\n')) | 2328 | if (!*buf || (*e && *e != '\n')) |
2311 | return -EINVAL; | 2329 | return -EINVAL; |
2312 | 2330 | ||
2313 | mddev->chunk_size = n; | 2331 | if (mddev->pers) |
2332 | return -EBUSY; | ||
2333 | else if (mddev->reshape_position != MaxSector) | ||
2334 | mddev->new_chunk = n; | ||
2335 | else | ||
2336 | mddev->chunk_size = n; | ||
2314 | return len; | 2337 | return len; |
2315 | } | 2338 | } |
2316 | static struct md_sysfs_entry md_chunk_size = | 2339 | static struct md_sysfs_entry md_chunk_size = |
@@ -2896,6 +2919,37 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len) | |||
2896 | static struct md_sysfs_entry md_suspend_hi = | 2919 | static struct md_sysfs_entry md_suspend_hi = |
2897 | __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); | 2920 | __ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); |
2898 | 2921 | ||
2922 | static ssize_t | ||
2923 | reshape_position_show(mddev_t *mddev, char *page) | ||
2924 | { | ||
2925 | if (mddev->reshape_position != MaxSector) | ||
2926 | return sprintf(page, "%llu\n", | ||
2927 | (unsigned long long)mddev->reshape_position); | ||
2928 | strcpy(page, "none\n"); | ||
2929 | return 5; | ||
2930 | } | ||
2931 | |||
2932 | static ssize_t | ||
2933 | reshape_position_store(mddev_t *mddev, const char *buf, size_t len) | ||
2934 | { | ||
2935 | char *e; | ||
2936 | unsigned long long new = simple_strtoull(buf, &e, 10); | ||
2937 | if (mddev->pers) | ||
2938 | return -EBUSY; | ||
2939 | if (buf == e || (*e && *e != '\n')) | ||
2940 | return -EINVAL; | ||
2941 | mddev->reshape_position = new; | ||
2942 | mddev->delta_disks = 0; | ||
2943 | mddev->new_level = mddev->level; | ||
2944 | mddev->new_layout = mddev->layout; | ||
2945 | mddev->new_chunk = mddev->chunk_size; | ||
2946 | return len; | ||
2947 | } | ||
2948 | |||
2949 | static struct md_sysfs_entry md_reshape_position = | ||
2950 | __ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show, | ||
2951 | reshape_position_store); | ||
2952 | |||
2899 | 2953 | ||
2900 | static struct attribute *md_default_attrs[] = { | 2954 | static struct attribute *md_default_attrs[] = { |
2901 | &md_level.attr, | 2955 | &md_level.attr, |
@@ -2908,6 +2962,7 @@ static struct attribute *md_default_attrs[] = { | |||
2908 | &md_new_device.attr, | 2962 | &md_new_device.attr, |
2909 | &md_safe_delay.attr, | 2963 | &md_safe_delay.attr, |
2910 | &md_array_state.attr, | 2964 | &md_array_state.attr, |
2965 | &md_reshape_position.attr, | ||
2911 | NULL, | 2966 | NULL, |
2912 | }; | 2967 | }; |
2913 | 2968 | ||
@@ -3446,6 +3501,7 @@ static int do_md_stop(mddev_t * mddev, int mode) | |||
3446 | mddev->size = 0; | 3501 | mddev->size = 0; |
3447 | mddev->raid_disks = 0; | 3502 | mddev->raid_disks = 0; |
3448 | mddev->recovery_cp = 0; | 3503 | mddev->recovery_cp = 0; |
3504 | mddev->reshape_position = MaxSector; | ||
3449 | 3505 | ||
3450 | } else if (mddev->pers) | 3506 | } else if (mddev->pers) |
3451 | printk(KERN_INFO "md: %s switched to read-only mode.\n", | 3507 | printk(KERN_INFO "md: %s switched to read-only mode.\n", |