diff options
author | NeilBrown <neilb@suse.de> | 2006-06-26 03:27:58 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-06-26 12:58:39 -0400 |
commit | 9e653b6342c94016f5cc9937061ef99e9c4b4045 (patch) | |
tree | ec6d984d1c6142a59a70308af511c3244276a8e5 | |
parent | 42543769142d2375f2b5f8fc9cac999f84bd4c4c (diff) |
[PATCH] md: Set/get state of array via sysfs
This allows the state of an md/array to be directly controlled via sysfs and
adds the ability to stop and array without tearing it down.
Array states/settings:
clear
No devices, no size, no level
Equivalent to STOP_ARRAY ioctl
inactive
May have some settings, but array is not active
all IO results in error
When written, doesn't tear down array, but just stops it
suspended (not supported yet)
All IO requests will block. The array can be reconfigured.
Writing this, if accepted, will block until array is quiescent
readonly
no resync can happen. no superblocks get written.
write requests fail
read-auto
like readonly, but behaves like 'clean' on a write request.
clean - no pending writes, but otherwise active.
When written to inactive array, starts without resync
If a write request arrives then
if metadata is known, mark 'dirty' and switch to 'active'.
if not known, block and switch to write-pending
If written to an active array that has pending writes, then fails.
active
fully active: IO and resync can be happening.
When written to inactive array, starts with resync
write-pending (not supported yet)
clean, but writes are blocked waiting for 'active' to be written.
active-idle
like active, but no writes have been seen for a while (100msec).
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | Documentation/md.txt | 39 | ||||
-rw-r--r-- | drivers/md/md.c | 197 |
2 files changed, 227 insertions, 9 deletions
diff --git a/Documentation/md.txt b/Documentation/md.txt index b19978e035fc..df0b45515775 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -216,6 +216,45 @@ All md devices contain: | |||
216 | period as a number of seconds. The default is 200msec (0.200). | 216 | period as a number of seconds. The default is 200msec (0.200). |
217 | Writing a value of 0 disables safemode. | 217 | Writing a value of 0 disables safemode. |
218 | 218 | ||
219 | array_state | ||
220 | This file contains a single word which describes the current | ||
221 | state of the array. In many cases, the state can be set by | ||
222 | writing the word for the desired state, however some states | ||
223 | cannot be explicitly set, and some transitions are not allowed. | ||
224 | |||
225 | clear | ||
226 | No devices, no size, no level | ||
227 | Writing is equivalent to STOP_ARRAY ioctl | ||
228 | inactive | ||
229 | May have some settings, but array is not active | ||
230 | all IO results in error | ||
231 | When written, doesn't tear down array, but just stops it | ||
232 | suspended (not supported yet) | ||
233 | All IO requests will block. The array can be reconfigured. | ||
234 | Writing this, if accepted, will block until array is quiessent | ||
235 | readonly | ||
236 | no resync can happen. no superblocks get written. | ||
237 | write requests fail | ||
238 | read-auto | ||
239 | like readonly, but behaves like 'clean' on a write request. | ||
240 | |||
241 | clean - no pending writes, but otherwise active. | ||
242 | When written to inactive array, starts without resync | ||
243 | If a write request arrives then | ||
244 | if metadata is known, mark 'dirty' and switch to 'active'. | ||
245 | if not known, block and switch to write-pending | ||
246 | If written to an active array that has pending writes, then fails. | ||
247 | active | ||
248 | fully active: IO and resync can be happening. | ||
249 | When written to inactive array, starts with resync | ||
250 | |||
251 | write-pending | ||
252 | clean, but writes are blocked waiting for 'active' to be written. | ||
253 | |||
254 | active-idle | ||
255 | like active, but no writes have been seen for a while (safe_mode_delay). | ||
256 | |||
257 | |||
219 | sync_speed_min | 258 | sync_speed_min |
220 | sync_speed_max | 259 | sync_speed_max |
221 | This are similar to /proc/sys/dev/raid/speed_limit_{min,max} | 260 | This are similar to /proc/sys/dev/raid/speed_limit_{min,max} |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 34b6902cda46..f6562ee4c6fc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -2185,6 +2185,176 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) | |||
2185 | static struct md_sysfs_entry md_chunk_size = | 2185 | static struct md_sysfs_entry md_chunk_size = |
2186 | __ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store); | 2186 | __ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store); |
2187 | 2187 | ||
2188 | /* | ||
2189 | * The array state can be: | ||
2190 | * | ||
2191 | * clear | ||
2192 | * No devices, no size, no level | ||
2193 | * Equivalent to STOP_ARRAY ioctl | ||
2194 | * inactive | ||
2195 | * May have some settings, but array is not active | ||
2196 | * all IO results in error | ||
2197 | * When written, doesn't tear down array, but just stops it | ||
2198 | * suspended (not supported yet) | ||
2199 | * All IO requests will block. The array can be reconfigured. | ||
2200 | * Writing this, if accepted, will block until array is quiessent | ||
2201 | * readonly | ||
2202 | * no resync can happen. no superblocks get written. | ||
2203 | * write requests fail | ||
2204 | * read-auto | ||
2205 | * like readonly, but behaves like 'clean' on a write request. | ||
2206 | * | ||
2207 | * clean - no pending writes, but otherwise active. | ||
2208 | * When written to inactive array, starts without resync | ||
2209 | * If a write request arrives then | ||
2210 | * if metadata is known, mark 'dirty' and switch to 'active'. | ||
2211 | * if not known, block and switch to write-pending | ||
2212 | * If written to an active array that has pending writes, then fails. | ||
2213 | * active | ||
2214 | * fully active: IO and resync can be happening. | ||
2215 | * When written to inactive array, starts with resync | ||
2216 | * | ||
2217 | * write-pending | ||
2218 | * clean, but writes are blocked waiting for 'active' to be written. | ||
2219 | * | ||
2220 | * active-idle | ||
2221 | * like active, but no writes have been seen for a while (100msec). | ||
2222 | * | ||
2223 | */ | ||
2224 | enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active, | ||
2225 | write_pending, active_idle, bad_word}; | ||
2226 | char *array_states[] = { | ||
2227 | "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active", | ||
2228 | "write-pending", "active-idle", NULL }; | ||
2229 | |||
2230 | static int match_word(const char *word, char **list) | ||
2231 | { | ||
2232 | int n; | ||
2233 | for (n=0; list[n]; n++) | ||
2234 | if (cmd_match(word, list[n])) | ||
2235 | break; | ||
2236 | return n; | ||
2237 | } | ||
2238 | |||
2239 | static ssize_t | ||
2240 | array_state_show(mddev_t *mddev, char *page) | ||
2241 | { | ||
2242 | enum array_state st = inactive; | ||
2243 | |||
2244 | if (mddev->pers) | ||
2245 | switch(mddev->ro) { | ||
2246 | case 1: | ||
2247 | st = readonly; | ||
2248 | break; | ||
2249 | case 2: | ||
2250 | st = read_auto; | ||
2251 | break; | ||
2252 | case 0: | ||
2253 | if (mddev->in_sync) | ||
2254 | st = clean; | ||
2255 | else if (mddev->safemode) | ||
2256 | st = active_idle; | ||
2257 | else | ||
2258 | st = active; | ||
2259 | } | ||
2260 | else { | ||
2261 | if (list_empty(&mddev->disks) && | ||
2262 | mddev->raid_disks == 0 && | ||
2263 | mddev->size == 0) | ||
2264 | st = clear; | ||
2265 | else | ||
2266 | st = inactive; | ||
2267 | } | ||
2268 | return sprintf(page, "%s\n", array_states[st]); | ||
2269 | } | ||
2270 | |||
2271 | static int do_md_stop(mddev_t * mddev, int ro); | ||
2272 | static int do_md_run(mddev_t * mddev); | ||
2273 | static int restart_array(mddev_t *mddev); | ||
2274 | |||
2275 | static ssize_t | ||
2276 | array_state_store(mddev_t *mddev, const char *buf, size_t len) | ||
2277 | { | ||
2278 | int err = -EINVAL; | ||
2279 | enum array_state st = match_word(buf, array_states); | ||
2280 | switch(st) { | ||
2281 | case bad_word: | ||
2282 | break; | ||
2283 | case clear: | ||
2284 | /* stopping an active array */ | ||
2285 | if (mddev->pers) { | ||
2286 | if (atomic_read(&mddev->active) > 1) | ||
2287 | return -EBUSY; | ||
2288 | err = do_md_stop(mddev, 0); | ||
2289 | } | ||
2290 | break; | ||
2291 | case inactive: | ||
2292 | /* stopping an active array */ | ||
2293 | if (mddev->pers) { | ||
2294 | if (atomic_read(&mddev->active) > 1) | ||
2295 | return -EBUSY; | ||
2296 | err = do_md_stop(mddev, 2); | ||
2297 | } | ||
2298 | break; | ||
2299 | case suspended: | ||
2300 | break; /* not supported yet */ | ||
2301 | case readonly: | ||
2302 | if (mddev->pers) | ||
2303 | err = do_md_stop(mddev, 1); | ||
2304 | else { | ||
2305 | mddev->ro = 1; | ||
2306 | err = do_md_run(mddev); | ||
2307 | } | ||
2308 | break; | ||
2309 | case read_auto: | ||
2310 | /* stopping an active array */ | ||
2311 | if (mddev->pers) { | ||
2312 | err = do_md_stop(mddev, 1); | ||
2313 | if (err == 0) | ||
2314 | mddev->ro = 2; /* FIXME mark devices writable */ | ||
2315 | } else { | ||
2316 | mddev->ro = 2; | ||
2317 | err = do_md_run(mddev); | ||
2318 | } | ||
2319 | break; | ||
2320 | case clean: | ||
2321 | if (mddev->pers) { | ||
2322 | restart_array(mddev); | ||
2323 | spin_lock_irq(&mddev->write_lock); | ||
2324 | if (atomic_read(&mddev->writes_pending) == 0) { | ||
2325 | mddev->in_sync = 1; | ||
2326 | mddev->sb_dirty = 1; | ||
2327 | } | ||
2328 | spin_unlock_irq(&mddev->write_lock); | ||
2329 | } else { | ||
2330 | mddev->ro = 0; | ||
2331 | mddev->recovery_cp = MaxSector; | ||
2332 | err = do_md_run(mddev); | ||
2333 | } | ||
2334 | break; | ||
2335 | case active: | ||
2336 | if (mddev->pers) { | ||
2337 | restart_array(mddev); | ||
2338 | mddev->sb_dirty = 0; | ||
2339 | wake_up(&mddev->sb_wait); | ||
2340 | err = 0; | ||
2341 | } else { | ||
2342 | mddev->ro = 0; | ||
2343 | err = do_md_run(mddev); | ||
2344 | } | ||
2345 | break; | ||
2346 | case write_pending: | ||
2347 | case active_idle: | ||
2348 | /* these cannot be set */ | ||
2349 | break; | ||
2350 | } | ||
2351 | if (err) | ||
2352 | return err; | ||
2353 | else | ||
2354 | return len; | ||
2355 | } | ||
2356 | static struct md_sysfs_entry md_array_state = __ATTR(array_state, 0644, array_state_show, array_state_store); | ||
2357 | |||
2188 | static ssize_t | 2358 | static ssize_t |
2189 | null_show(mddev_t *mddev, char *page) | 2359 | null_show(mddev_t *mddev, char *page) |
2190 | { | 2360 | { |
@@ -2553,6 +2723,7 @@ static struct attribute *md_default_attrs[] = { | |||
2553 | &md_metadata.attr, | 2723 | &md_metadata.attr, |
2554 | &md_new_device.attr, | 2724 | &md_new_device.attr, |
2555 | &md_safe_delay.attr, | 2725 | &md_safe_delay.attr, |
2726 | &md_array_state.attr, | ||
2556 | NULL, | 2727 | NULL, |
2557 | }; | 2728 | }; |
2558 | 2729 | ||
@@ -2919,11 +3090,8 @@ static int restart_array(mddev_t *mddev) | |||
2919 | md_wakeup_thread(mddev->thread); | 3090 | md_wakeup_thread(mddev->thread); |
2920 | md_wakeup_thread(mddev->sync_thread); | 3091 | md_wakeup_thread(mddev->sync_thread); |
2921 | err = 0; | 3092 | err = 0; |
2922 | } else { | 3093 | } else |
2923 | printk(KERN_ERR "md: %s has no personality assigned.\n", | ||
2924 | mdname(mddev)); | ||
2925 | err = -EINVAL; | 3094 | err = -EINVAL; |
2926 | } | ||
2927 | 3095 | ||
2928 | out: | 3096 | out: |
2929 | return err; | 3097 | return err; |
@@ -2955,7 +3123,12 @@ static void restore_bitmap_write_access(struct file *file) | |||
2955 | spin_unlock(&inode->i_lock); | 3123 | spin_unlock(&inode->i_lock); |
2956 | } | 3124 | } |
2957 | 3125 | ||
2958 | static int do_md_stop(mddev_t * mddev, int ro) | 3126 | /* mode: |
3127 | * 0 - completely stop and dis-assemble array | ||
3128 | * 1 - switch to readonly | ||
3129 | * 2 - stop but do not disassemble array | ||
3130 | */ | ||
3131 | static int do_md_stop(mddev_t * mddev, int mode) | ||
2959 | { | 3132 | { |
2960 | int err = 0; | 3133 | int err = 0; |
2961 | struct gendisk *disk = mddev->gendisk; | 3134 | struct gendisk *disk = mddev->gendisk; |
@@ -2977,12 +3150,15 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2977 | 3150 | ||
2978 | invalidate_partition(disk, 0); | 3151 | invalidate_partition(disk, 0); |
2979 | 3152 | ||
2980 | if (ro) { | 3153 | switch(mode) { |
3154 | case 1: /* readonly */ | ||
2981 | err = -ENXIO; | 3155 | err = -ENXIO; |
2982 | if (mddev->ro==1) | 3156 | if (mddev->ro==1) |
2983 | goto out; | 3157 | goto out; |
2984 | mddev->ro = 1; | 3158 | mddev->ro = 1; |
2985 | } else { | 3159 | break; |
3160 | case 0: /* disassemble */ | ||
3161 | case 2: /* stop */ | ||
2986 | bitmap_flush(mddev); | 3162 | bitmap_flush(mddev); |
2987 | md_super_wait(mddev); | 3163 | md_super_wait(mddev); |
2988 | if (mddev->ro) | 3164 | if (mddev->ro) |
@@ -3002,7 +3178,7 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
3002 | mddev->in_sync = 1; | 3178 | mddev->in_sync = 1; |
3003 | md_update_sb(mddev); | 3179 | md_update_sb(mddev); |
3004 | } | 3180 | } |
3005 | if (ro) | 3181 | if (mode == 1) |
3006 | set_disk_ro(disk, 1); | 3182 | set_disk_ro(disk, 1); |
3007 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 3183 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
3008 | } | 3184 | } |
@@ -3010,7 +3186,7 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
3010 | /* | 3186 | /* |
3011 | * Free resources if final stop | 3187 | * Free resources if final stop |
3012 | */ | 3188 | */ |
3013 | if (!ro) { | 3189 | if (mode == 0) { |
3014 | mdk_rdev_t *rdev; | 3190 | mdk_rdev_t *rdev; |
3015 | struct list_head *tmp; | 3191 | struct list_head *tmp; |
3016 | struct gendisk *disk; | 3192 | struct gendisk *disk; |
@@ -3034,6 +3210,9 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
3034 | export_array(mddev); | 3210 | export_array(mddev); |
3035 | 3211 | ||
3036 | mddev->array_size = 0; | 3212 | mddev->array_size = 0; |
3213 | mddev->size = 0; | ||
3214 | mddev->raid_disks = 0; | ||
3215 | |||
3037 | disk = mddev->gendisk; | 3216 | disk = mddev->gendisk; |
3038 | if (disk) | 3217 | if (disk) |
3039 | set_capacity(disk, 0); | 3218 | set_capacity(disk, 0); |