diff options
-rw-r--r-- | Documentation/md.txt | 39 | ||||
-rw-r--r-- | drivers/md/md.c | 197 |
2 files changed, 227 insertions, 9 deletions
diff --git a/Documentation/md.txt b/Documentation/md.txt index b19978e035fc..df0b45515775 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt | |||
@@ -216,6 +216,45 @@ All md devices contain: | |||
216 | period as a number of seconds. The default is 200msec (0.200). | 216 | period as a number of seconds. The default is 200msec (0.200). |
217 | Writing a value of 0 disables safemode. | 217 | Writing a value of 0 disables safemode. |
218 | 218 | ||
219 | array_state | ||
220 | This file contains a single word which describes the current | ||
221 | state of the array. In many cases, the state can be set by | ||
222 | writing the word for the desired state, however some states | ||
223 | cannot be explicitly set, and some transitions are not allowed. | ||
224 | |||
225 | clear | ||
226 | No devices, no size, no level | ||
227 | Writing is equivalent to STOP_ARRAY ioctl | ||
228 | inactive | ||
229 | May have some settings, but array is not active | ||
230 | all IO results in error | ||
231 | When written, doesn't tear down array, but just stops it | ||
232 | suspended (not supported yet) | ||
233 | All IO requests will block. The array can be reconfigured. | ||
234 | Writing this, if accepted, will block until array is quiessent | ||
235 | readonly | ||
236 | no resync can happen. no superblocks get written. | ||
237 | write requests fail | ||
238 | read-auto | ||
239 | like readonly, but behaves like 'clean' on a write request. | ||
240 | |||
241 | clean - no pending writes, but otherwise active. | ||
242 | When written to inactive array, starts without resync | ||
243 | If a write request arrives then | ||
244 | if metadata is known, mark 'dirty' and switch to 'active'. | ||
245 | if not known, block and switch to write-pending | ||
246 | If written to an active array that has pending writes, then fails. | ||
247 | active | ||
248 | fully active: IO and resync can be happening. | ||
249 | When written to inactive array, starts with resync | ||
250 | |||
251 | write-pending | ||
252 | clean, but writes are blocked waiting for 'active' to be written. | ||
253 | |||
254 | active-idle | ||
255 | like active, but no writes have been seen for a while (safe_mode_delay). | ||
256 | |||
257 | |||
219 | sync_speed_min | 258 | sync_speed_min |
220 | sync_speed_max | 259 | sync_speed_max |
221 | This are similar to /proc/sys/dev/raid/speed_limit_{min,max} | 260 | This are similar to /proc/sys/dev/raid/speed_limit_{min,max} |
diff --git a/drivers/md/md.c b/drivers/md/md.c index 34b6902cda46..f6562ee4c6fc 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -2185,6 +2185,176 @@ chunk_size_store(mddev_t *mddev, const char *buf, size_t len) | |||
2185 | static struct md_sysfs_entry md_chunk_size = | 2185 | static struct md_sysfs_entry md_chunk_size = |
2186 | __ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store); | 2186 | __ATTR(chunk_size, 0644, chunk_size_show, chunk_size_store); |
2187 | 2187 | ||
2188 | /* | ||
2189 | * The array state can be: | ||
2190 | * | ||
2191 | * clear | ||
2192 | * No devices, no size, no level | ||
2193 | * Equivalent to STOP_ARRAY ioctl | ||
2194 | * inactive | ||
2195 | * May have some settings, but array is not active | ||
2196 | * all IO results in error | ||
2197 | * When written, doesn't tear down array, but just stops it | ||
2198 | * suspended (not supported yet) | ||
2199 | * All IO requests will block. The array can be reconfigured. | ||
2200 | * Writing this, if accepted, will block until array is quiessent | ||
2201 | * readonly | ||
2202 | * no resync can happen. no superblocks get written. | ||
2203 | * write requests fail | ||
2204 | * read-auto | ||
2205 | * like readonly, but behaves like 'clean' on a write request. | ||
2206 | * | ||
2207 | * clean - no pending writes, but otherwise active. | ||
2208 | * When written to inactive array, starts without resync | ||
2209 | * If a write request arrives then | ||
2210 | * if metadata is known, mark 'dirty' and switch to 'active'. | ||
2211 | * if not known, block and switch to write-pending | ||
2212 | * If written to an active array that has pending writes, then fails. | ||
2213 | * active | ||
2214 | * fully active: IO and resync can be happening. | ||
2215 | * When written to inactive array, starts with resync | ||
2216 | * | ||
2217 | * write-pending | ||
2218 | * clean, but writes are blocked waiting for 'active' to be written. | ||
2219 | * | ||
2220 | * active-idle | ||
2221 | * like active, but no writes have been seen for a while (100msec). | ||
2222 | * | ||
2223 | */ | ||
2224 | enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active, | ||
2225 | write_pending, active_idle, bad_word}; | ||
2226 | char *array_states[] = { | ||
2227 | "clear", "inactive", "suspended", "readonly", "read-auto", "clean", "active", | ||
2228 | "write-pending", "active-idle", NULL }; | ||
2229 | |||
2230 | static int match_word(const char *word, char **list) | ||
2231 | { | ||
2232 | int n; | ||
2233 | for (n=0; list[n]; n++) | ||
2234 | if (cmd_match(word, list[n])) | ||
2235 | break; | ||
2236 | return n; | ||
2237 | } | ||
2238 | |||
2239 | static ssize_t | ||
2240 | array_state_show(mddev_t *mddev, char *page) | ||
2241 | { | ||
2242 | enum array_state st = inactive; | ||
2243 | |||
2244 | if (mddev->pers) | ||
2245 | switch(mddev->ro) { | ||
2246 | case 1: | ||
2247 | st = readonly; | ||
2248 | break; | ||
2249 | case 2: | ||
2250 | st = read_auto; | ||
2251 | break; | ||
2252 | case 0: | ||
2253 | if (mddev->in_sync) | ||
2254 | st = clean; | ||
2255 | else if (mddev->safemode) | ||
2256 | st = active_idle; | ||
2257 | else | ||
2258 | st = active; | ||
2259 | } | ||
2260 | else { | ||
2261 | if (list_empty(&mddev->disks) && | ||
2262 | mddev->raid_disks == 0 && | ||
2263 | mddev->size == 0) | ||
2264 | st = clear; | ||
2265 | else | ||
2266 | st = inactive; | ||
2267 | } | ||
2268 | return sprintf(page, "%s\n", array_states[st]); | ||
2269 | } | ||
2270 | |||
2271 | static int do_md_stop(mddev_t * mddev, int ro); | ||
2272 | static int do_md_run(mddev_t * mddev); | ||
2273 | static int restart_array(mddev_t *mddev); | ||
2274 | |||
2275 | static ssize_t | ||
2276 | array_state_store(mddev_t *mddev, const char *buf, size_t len) | ||
2277 | { | ||
2278 | int err = -EINVAL; | ||
2279 | enum array_state st = match_word(buf, array_states); | ||
2280 | switch(st) { | ||
2281 | case bad_word: | ||
2282 | break; | ||
2283 | case clear: | ||
2284 | /* stopping an active array */ | ||
2285 | if (mddev->pers) { | ||
2286 | if (atomic_read(&mddev->active) > 1) | ||
2287 | return -EBUSY; | ||
2288 | err = do_md_stop(mddev, 0); | ||
2289 | } | ||
2290 | break; | ||
2291 | case inactive: | ||
2292 | /* stopping an active array */ | ||
2293 | if (mddev->pers) { | ||
2294 | if (atomic_read(&mddev->active) > 1) | ||
2295 | return -EBUSY; | ||
2296 | err = do_md_stop(mddev, 2); | ||
2297 | } | ||
2298 | break; | ||
2299 | case suspended: | ||
2300 | break; /* not supported yet */ | ||
2301 | case readonly: | ||
2302 | if (mddev->pers) | ||
2303 | err = do_md_stop(mddev, 1); | ||
2304 | else { | ||
2305 | mddev->ro = 1; | ||
2306 | err = do_md_run(mddev); | ||
2307 | } | ||
2308 | break; | ||
2309 | case read_auto: | ||
2310 | /* stopping an active array */ | ||
2311 | if (mddev->pers) { | ||
2312 | err = do_md_stop(mddev, 1); | ||
2313 | if (err == 0) | ||
2314 | mddev->ro = 2; /* FIXME mark devices writable */ | ||
2315 | } else { | ||
2316 | mddev->ro = 2; | ||
2317 | err = do_md_run(mddev); | ||
2318 | } | ||
2319 | break; | ||
2320 | case clean: | ||
2321 | if (mddev->pers) { | ||
2322 | restart_array(mddev); | ||
2323 | spin_lock_irq(&mddev->write_lock); | ||
2324 | if (atomic_read(&mddev->writes_pending) == 0) { | ||
2325 | mddev->in_sync = 1; | ||
2326 | mddev->sb_dirty = 1; | ||
2327 | } | ||
2328 | spin_unlock_irq(&mddev->write_lock); | ||
2329 | } else { | ||
2330 | mddev->ro = 0; | ||
2331 | mddev->recovery_cp = MaxSector; | ||
2332 | err = do_md_run(mddev); | ||
2333 | } | ||
2334 | break; | ||
2335 | case active: | ||
2336 | if (mddev->pers) { | ||
2337 | restart_array(mddev); | ||
2338 | mddev->sb_dirty = 0; | ||
2339 | wake_up(&mddev->sb_wait); | ||
2340 | err = 0; | ||
2341 | } else { | ||
2342 | mddev->ro = 0; | ||
2343 | err = do_md_run(mddev); | ||
2344 | } | ||
2345 | break; | ||
2346 | case write_pending: | ||
2347 | case active_idle: | ||
2348 | /* these cannot be set */ | ||
2349 | break; | ||
2350 | } | ||
2351 | if (err) | ||
2352 | return err; | ||
2353 | else | ||
2354 | return len; | ||
2355 | } | ||
2356 | static struct md_sysfs_entry md_array_state = __ATTR(array_state, 0644, array_state_show, array_state_store); | ||
2357 | |||
2188 | static ssize_t | 2358 | static ssize_t |
2189 | null_show(mddev_t *mddev, char *page) | 2359 | null_show(mddev_t *mddev, char *page) |
2190 | { | 2360 | { |
@@ -2553,6 +2723,7 @@ static struct attribute *md_default_attrs[] = { | |||
2553 | &md_metadata.attr, | 2723 | &md_metadata.attr, |
2554 | &md_new_device.attr, | 2724 | &md_new_device.attr, |
2555 | &md_safe_delay.attr, | 2725 | &md_safe_delay.attr, |
2726 | &md_array_state.attr, | ||
2556 | NULL, | 2727 | NULL, |
2557 | }; | 2728 | }; |
2558 | 2729 | ||
@@ -2919,11 +3090,8 @@ static int restart_array(mddev_t *mddev) | |||
2919 | md_wakeup_thread(mddev->thread); | 3090 | md_wakeup_thread(mddev->thread); |
2920 | md_wakeup_thread(mddev->sync_thread); | 3091 | md_wakeup_thread(mddev->sync_thread); |
2921 | err = 0; | 3092 | err = 0; |
2922 | } else { | 3093 | } else |
2923 | printk(KERN_ERR "md: %s has no personality assigned.\n", | ||
2924 | mdname(mddev)); | ||
2925 | err = -EINVAL; | 3094 | err = -EINVAL; |
2926 | } | ||
2927 | 3095 | ||
2928 | out: | 3096 | out: |
2929 | return err; | 3097 | return err; |
@@ -2955,7 +3123,12 @@ static void restore_bitmap_write_access(struct file *file) | |||
2955 | spin_unlock(&inode->i_lock); | 3123 | spin_unlock(&inode->i_lock); |
2956 | } | 3124 | } |
2957 | 3125 | ||
2958 | static int do_md_stop(mddev_t * mddev, int ro) | 3126 | /* mode: |
3127 | * 0 - completely stop and dis-assemble array | ||
3128 | * 1 - switch to readonly | ||
3129 | * 2 - stop but do not disassemble array | ||
3130 | */ | ||
3131 | static int do_md_stop(mddev_t * mddev, int mode) | ||
2959 | { | 3132 | { |
2960 | int err = 0; | 3133 | int err = 0; |
2961 | struct gendisk *disk = mddev->gendisk; | 3134 | struct gendisk *disk = mddev->gendisk; |
@@ -2977,12 +3150,15 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
2977 | 3150 | ||
2978 | invalidate_partition(disk, 0); | 3151 | invalidate_partition(disk, 0); |
2979 | 3152 | ||
2980 | if (ro) { | 3153 | switch(mode) { |
3154 | case 1: /* readonly */ | ||
2981 | err = -ENXIO; | 3155 | err = -ENXIO; |
2982 | if (mddev->ro==1) | 3156 | if (mddev->ro==1) |
2983 | goto out; | 3157 | goto out; |
2984 | mddev->ro = 1; | 3158 | mddev->ro = 1; |
2985 | } else { | 3159 | break; |
3160 | case 0: /* disassemble */ | ||
3161 | case 2: /* stop */ | ||
2986 | bitmap_flush(mddev); | 3162 | bitmap_flush(mddev); |
2987 | md_super_wait(mddev); | 3163 | md_super_wait(mddev); |
2988 | if (mddev->ro) | 3164 | if (mddev->ro) |
@@ -3002,7 +3178,7 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
3002 | mddev->in_sync = 1; | 3178 | mddev->in_sync = 1; |
3003 | md_update_sb(mddev); | 3179 | md_update_sb(mddev); |
3004 | } | 3180 | } |
3005 | if (ro) | 3181 | if (mode == 1) |
3006 | set_disk_ro(disk, 1); | 3182 | set_disk_ro(disk, 1); |
3007 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 3183 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
3008 | } | 3184 | } |
@@ -3010,7 +3186,7 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
3010 | /* | 3186 | /* |
3011 | * Free resources if final stop | 3187 | * Free resources if final stop |
3012 | */ | 3188 | */ |
3013 | if (!ro) { | 3189 | if (mode == 0) { |
3014 | mdk_rdev_t *rdev; | 3190 | mdk_rdev_t *rdev; |
3015 | struct list_head *tmp; | 3191 | struct list_head *tmp; |
3016 | struct gendisk *disk; | 3192 | struct gendisk *disk; |
@@ -3034,6 +3210,9 @@ static int do_md_stop(mddev_t * mddev, int ro) | |||
3034 | export_array(mddev); | 3210 | export_array(mddev); |
3035 | 3211 | ||
3036 | mddev->array_size = 0; | 3212 | mddev->array_size = 0; |
3213 | mddev->size = 0; | ||
3214 | mddev->raid_disks = 0; | ||
3215 | |||
3037 | disk = mddev->gendisk; | 3216 | disk = mddev->gendisk; |
3038 | if (disk) | 3217 | if (disk) |
3039 | set_capacity(disk, 0); | 3218 | set_capacity(disk, 0); |