aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2015-03-04 00:35:43 -0500
committerIngo Molnar <mingo@kernel.org>2015-03-04 00:35:43 -0500
commitd2c032e3dc58137a7261a7824d3acce435db1d66 (patch)
tree7eea1c7c6103eefe879f07472eec99b3c41eb792 /drivers/md/md.c
parent7e8e385aaf6ed5b64b5d9108081cfcdcdd021b78 (diff)
parent13a7a6ac0a11197edcd0f756a035f472b42cdf8b (diff)
Merge tag 'v4.0-rc2' into x86/asm, to refresh the tree
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c830
1 files changed, 524 insertions, 306 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 709755fb6d7b..cadf9cc02b25 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -72,6 +72,7 @@ static struct workqueue_struct *md_misc_wq;
72 72
73static int remove_and_add_spares(struct mddev *mddev, 73static int remove_and_add_spares(struct mddev *mddev,
74 struct md_rdev *this); 74 struct md_rdev *this);
75static void mddev_detach(struct mddev *mddev);
75 76
76/* 77/*
77 * Default number of read corrections we'll attempt on an rdev 78 * Default number of read corrections we'll attempt on an rdev
@@ -292,8 +293,8 @@ static void md_make_request(struct request_queue *q, struct bio *bio)
292/* mddev_suspend makes sure no new requests are submitted 293/* mddev_suspend makes sure no new requests are submitted
293 * to the device, and that any requests that have been submitted 294 * to the device, and that any requests that have been submitted
294 * are completely handled. 295 * are completely handled.
295 * Once ->stop is called and completes, the module will be completely 296 * Once mddev_detach() is called and completes, the module will be
296 * unused. 297 * completely unused.
297 */ 298 */
298void mddev_suspend(struct mddev *mddev) 299void mddev_suspend(struct mddev *mddev)
299{ 300{
@@ -321,10 +322,47 @@ EXPORT_SYMBOL_GPL(mddev_resume);
321 322
322int mddev_congested(struct mddev *mddev, int bits) 323int mddev_congested(struct mddev *mddev, int bits)
323{ 324{
324 return mddev->suspended; 325 struct md_personality *pers = mddev->pers;
326 int ret = 0;
327
328 rcu_read_lock();
329 if (mddev->suspended)
330 ret = 1;
331 else if (pers && pers->congested)
332 ret = pers->congested(mddev, bits);
333 rcu_read_unlock();
334 return ret;
335}
336EXPORT_SYMBOL_GPL(mddev_congested);
337static int md_congested(void *data, int bits)
338{
339 struct mddev *mddev = data;
340 return mddev_congested(mddev, bits);
325} 341}
326EXPORT_SYMBOL(mddev_congested);
327 342
343static int md_mergeable_bvec(struct request_queue *q,
344 struct bvec_merge_data *bvm,
345 struct bio_vec *biovec)
346{
347 struct mddev *mddev = q->queuedata;
348 int ret;
349 rcu_read_lock();
350 if (mddev->suspended) {
351 /* Must always allow one vec */
352 if (bvm->bi_size == 0)
353 ret = biovec->bv_len;
354 else
355 ret = 0;
356 } else {
357 struct md_personality *pers = mddev->pers;
358 if (pers && pers->mergeable_bvec)
359 ret = pers->mergeable_bvec(mddev, bvm, biovec);
360 else
361 ret = biovec->bv_len;
362 }
363 rcu_read_unlock();
364 return ret;
365}
328/* 366/*
329 * Generic flush handling for md 367 * Generic flush handling for md
330 */ 368 */
@@ -397,12 +435,12 @@ static void md_submit_flush_data(struct work_struct *ws)
397 435
398void md_flush_request(struct mddev *mddev, struct bio *bio) 436void md_flush_request(struct mddev *mddev, struct bio *bio)
399{ 437{
400 spin_lock_irq(&mddev->write_lock); 438 spin_lock_irq(&mddev->lock);
401 wait_event_lock_irq(mddev->sb_wait, 439 wait_event_lock_irq(mddev->sb_wait,
402 !mddev->flush_bio, 440 !mddev->flush_bio,
403 mddev->write_lock); 441 mddev->lock);
404 mddev->flush_bio = bio; 442 mddev->flush_bio = bio;
405 spin_unlock_irq(&mddev->write_lock); 443 spin_unlock_irq(&mddev->lock);
406 444
407 INIT_WORK(&mddev->flush_work, submit_flushes); 445 INIT_WORK(&mddev->flush_work, submit_flushes);
408 queue_work(md_wq, &mddev->flush_work); 446 queue_work(md_wq, &mddev->flush_work);
@@ -465,7 +503,7 @@ void mddev_init(struct mddev *mddev)
465 atomic_set(&mddev->active, 1); 503 atomic_set(&mddev->active, 1);
466 atomic_set(&mddev->openers, 0); 504 atomic_set(&mddev->openers, 0);
467 atomic_set(&mddev->active_io, 0); 505 atomic_set(&mddev->active_io, 0);
468 spin_lock_init(&mddev->write_lock); 506 spin_lock_init(&mddev->lock);
469 atomic_set(&mddev->flush_pending, 0); 507 atomic_set(&mddev->flush_pending, 0);
470 init_waitqueue_head(&mddev->sb_wait); 508 init_waitqueue_head(&mddev->sb_wait);
471 init_waitqueue_head(&mddev->recovery_wait); 509 init_waitqueue_head(&mddev->recovery_wait);
@@ -552,32 +590,9 @@ static struct mddev *mddev_find(dev_t unit)
552 goto retry; 590 goto retry;
553} 591}
554 592
555static inline int __must_check mddev_lock(struct mddev *mddev)
556{
557 return mutex_lock_interruptible(&mddev->reconfig_mutex);
558}
559
560/* Sometimes we need to take the lock in a situation where
561 * failure due to interrupts is not acceptable.
562 */
563static inline void mddev_lock_nointr(struct mddev *mddev)
564{
565 mutex_lock(&mddev->reconfig_mutex);
566}
567
568static inline int mddev_is_locked(struct mddev *mddev)
569{
570 return mutex_is_locked(&mddev->reconfig_mutex);
571}
572
573static inline int mddev_trylock(struct mddev *mddev)
574{
575 return mutex_trylock(&mddev->reconfig_mutex);
576}
577
578static struct attribute_group md_redundancy_group; 593static struct attribute_group md_redundancy_group;
579 594
580static void mddev_unlock(struct mddev *mddev) 595void mddev_unlock(struct mddev *mddev)
581{ 596{
582 if (mddev->to_remove) { 597 if (mddev->to_remove) {
583 /* These cannot be removed under reconfig_mutex as 598 /* These cannot be removed under reconfig_mutex as
@@ -619,6 +634,7 @@ static void mddev_unlock(struct mddev *mddev)
619 md_wakeup_thread(mddev->thread); 634 md_wakeup_thread(mddev->thread);
620 spin_unlock(&pers_lock); 635 spin_unlock(&pers_lock);
621} 636}
637EXPORT_SYMBOL_GPL(mddev_unlock);
622 638
623static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr) 639static struct md_rdev *find_rdev_nr_rcu(struct mddev *mddev, int nr)
624{ 640{
@@ -2230,7 +2246,7 @@ repeat:
2230 return; 2246 return;
2231 } 2247 }
2232 2248
2233 spin_lock_irq(&mddev->write_lock); 2249 spin_lock(&mddev->lock);
2234 2250
2235 mddev->utime = get_seconds(); 2251 mddev->utime = get_seconds();
2236 2252
@@ -2287,7 +2303,7 @@ repeat:
2287 } 2303 }
2288 2304
2289 sync_sbs(mddev, nospares); 2305 sync_sbs(mddev, nospares);
2290 spin_unlock_irq(&mddev->write_lock); 2306 spin_unlock(&mddev->lock);
2291 2307
2292 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n", 2308 pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
2293 mdname(mddev), mddev->in_sync); 2309 mdname(mddev), mddev->in_sync);
@@ -2326,15 +2342,15 @@ repeat:
2326 md_super_wait(mddev); 2342 md_super_wait(mddev);
2327 /* if there was a failure, MD_CHANGE_DEVS was set, and we re-write super */ 2343 /* if there was a failure, MD_CHANGE_DEVS was set, and we re-write super */
2328 2344
2329 spin_lock_irq(&mddev->write_lock); 2345 spin_lock(&mddev->lock);
2330 if (mddev->in_sync != sync_req || 2346 if (mddev->in_sync != sync_req ||
2331 test_bit(MD_CHANGE_DEVS, &mddev->flags)) { 2347 test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
2332 /* have to write it out again */ 2348 /* have to write it out again */
2333 spin_unlock_irq(&mddev->write_lock); 2349 spin_unlock(&mddev->lock);
2334 goto repeat; 2350 goto repeat;
2335 } 2351 }
2336 clear_bit(MD_CHANGE_PENDING, &mddev->flags); 2352 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
2337 spin_unlock_irq(&mddev->write_lock); 2353 spin_unlock(&mddev->lock);
2338 wake_up(&mddev->sb_wait); 2354 wake_up(&mddev->sb_wait);
2339 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 2355 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
2340 sysfs_notify(&mddev->kobj, NULL, "sync_completed"); 2356 sysfs_notify(&mddev->kobj, NULL, "sync_completed");
@@ -2381,40 +2397,41 @@ state_show(struct md_rdev *rdev, char *page)
2381{ 2397{
2382 char *sep = ""; 2398 char *sep = "";
2383 size_t len = 0; 2399 size_t len = 0;
2400 unsigned long flags = ACCESS_ONCE(rdev->flags);
2384 2401
2385 if (test_bit(Faulty, &rdev->flags) || 2402 if (test_bit(Faulty, &flags) ||
2386 rdev->badblocks.unacked_exist) { 2403 rdev->badblocks.unacked_exist) {
2387 len+= sprintf(page+len, "%sfaulty",sep); 2404 len+= sprintf(page+len, "%sfaulty",sep);
2388 sep = ","; 2405 sep = ",";
2389 } 2406 }
2390 if (test_bit(In_sync, &rdev->flags)) { 2407 if (test_bit(In_sync, &flags)) {
2391 len += sprintf(page+len, "%sin_sync",sep); 2408 len += sprintf(page+len, "%sin_sync",sep);
2392 sep = ","; 2409 sep = ",";
2393 } 2410 }
2394 if (test_bit(WriteMostly, &rdev->flags)) { 2411 if (test_bit(WriteMostly, &flags)) {
2395 len += sprintf(page+len, "%swrite_mostly",sep); 2412 len += sprintf(page+len, "%swrite_mostly",sep);
2396 sep = ","; 2413 sep = ",";
2397 } 2414 }
2398 if (test_bit(Blocked, &rdev->flags) || 2415 if (test_bit(Blocked, &flags) ||
2399 (rdev->badblocks.unacked_exist 2416 (rdev->badblocks.unacked_exist
2400 && !test_bit(Faulty, &rdev->flags))) { 2417 && !test_bit(Faulty, &flags))) {
2401 len += sprintf(page+len, "%sblocked", sep); 2418 len += sprintf(page+len, "%sblocked", sep);
2402 sep = ","; 2419 sep = ",";
2403 } 2420 }
2404 if (!test_bit(Faulty, &rdev->flags) && 2421 if (!test_bit(Faulty, &flags) &&
2405 !test_bit(In_sync, &rdev->flags)) { 2422 !test_bit(In_sync, &flags)) {
2406 len += sprintf(page+len, "%sspare", sep); 2423 len += sprintf(page+len, "%sspare", sep);
2407 sep = ","; 2424 sep = ",";
2408 } 2425 }
2409 if (test_bit(WriteErrorSeen, &rdev->flags)) { 2426 if (test_bit(WriteErrorSeen, &flags)) {
2410 len += sprintf(page+len, "%swrite_error", sep); 2427 len += sprintf(page+len, "%swrite_error", sep);
2411 sep = ","; 2428 sep = ",";
2412 } 2429 }
2413 if (test_bit(WantReplacement, &rdev->flags)) { 2430 if (test_bit(WantReplacement, &flags)) {
2414 len += sprintf(page+len, "%swant_replacement", sep); 2431 len += sprintf(page+len, "%swant_replacement", sep);
2415 sep = ","; 2432 sep = ",";
2416 } 2433 }
2417 if (test_bit(Replacement, &rdev->flags)) { 2434 if (test_bit(Replacement, &flags)) {
2418 len += sprintf(page+len, "%sreplacement", sep); 2435 len += sprintf(page+len, "%sreplacement", sep);
2419 sep = ","; 2436 sep = ",";
2420 } 2437 }
@@ -2538,7 +2555,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
2538 return err ? err : len; 2555 return err ? err : len;
2539} 2556}
2540static struct rdev_sysfs_entry rdev_state = 2557static struct rdev_sysfs_entry rdev_state =
2541__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store); 2558__ATTR_PREALLOC(state, S_IRUGO|S_IWUSR, state_show, state_store);
2542 2559
2543static ssize_t 2560static ssize_t
2544errors_show(struct md_rdev *rdev, char *page) 2561errors_show(struct md_rdev *rdev, char *page)
@@ -2927,21 +2944,12 @@ rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
2927{ 2944{
2928 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr); 2945 struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
2929 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj); 2946 struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
2930 struct mddev *mddev = rdev->mddev;
2931 ssize_t rv;
2932 2947
2933 if (!entry->show) 2948 if (!entry->show)
2934 return -EIO; 2949 return -EIO;
2935 2950 if (!rdev->mddev)
2936 rv = mddev ? mddev_lock(mddev) : -EBUSY; 2951 return -EBUSY;
2937 if (!rv) { 2952 return entry->show(rdev, page);
2938 if (rdev->mddev == NULL)
2939 rv = -EBUSY;
2940 else
2941 rv = entry->show(rdev, page);
2942 mddev_unlock(mddev);
2943 }
2944 return rv;
2945} 2953}
2946 2954
2947static ssize_t 2955static ssize_t
@@ -3212,11 +3220,13 @@ safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
3212 mddev->safemode_delay = 0; 3220 mddev->safemode_delay = 0;
3213 else { 3221 else {
3214 unsigned long old_delay = mddev->safemode_delay; 3222 unsigned long old_delay = mddev->safemode_delay;
3215 mddev->safemode_delay = (msec*HZ)/1000; 3223 unsigned long new_delay = (msec*HZ)/1000;
3216 if (mddev->safemode_delay == 0) 3224
3217 mddev->safemode_delay = 1; 3225 if (new_delay == 0)
3218 if (mddev->safemode_delay < old_delay || old_delay == 0) 3226 new_delay = 1;
3219 md_safemode_timeout((unsigned long)mddev); 3227 mddev->safemode_delay = new_delay;
3228 if (new_delay < old_delay || old_delay == 0)
3229 mod_timer(&mddev->safemode_timer, jiffies+1);
3220 } 3230 }
3221 return len; 3231 return len;
3222} 3232}
@@ -3226,41 +3236,52 @@ __ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
3226static ssize_t 3236static ssize_t
3227level_show(struct mddev *mddev, char *page) 3237level_show(struct mddev *mddev, char *page)
3228{ 3238{
3229 struct md_personality *p = mddev->pers; 3239 struct md_personality *p;
3240 int ret;
3241 spin_lock(&mddev->lock);
3242 p = mddev->pers;
3230 if (p) 3243 if (p)
3231 return sprintf(page, "%s\n", p->name); 3244 ret = sprintf(page, "%s\n", p->name);
3232 else if (mddev->clevel[0]) 3245 else if (mddev->clevel[0])
3233 return sprintf(page, "%s\n", mddev->clevel); 3246 ret = sprintf(page, "%s\n", mddev->clevel);
3234 else if (mddev->level != LEVEL_NONE) 3247 else if (mddev->level != LEVEL_NONE)
3235 return sprintf(page, "%d\n", mddev->level); 3248 ret = sprintf(page, "%d\n", mddev->level);
3236 else 3249 else
3237 return 0; 3250 ret = 0;
3251 spin_unlock(&mddev->lock);
3252 return ret;
3238} 3253}
3239 3254
3240static ssize_t 3255static ssize_t
3241level_store(struct mddev *mddev, const char *buf, size_t len) 3256level_store(struct mddev *mddev, const char *buf, size_t len)
3242{ 3257{
3243 char clevel[16]; 3258 char clevel[16];
3244 ssize_t rv = len; 3259 ssize_t rv;
3245 struct md_personality *pers; 3260 size_t slen = len;
3261 struct md_personality *pers, *oldpers;
3246 long level; 3262 long level;
3247 void *priv; 3263 void *priv, *oldpriv;
3248 struct md_rdev *rdev; 3264 struct md_rdev *rdev;
3249 3265
3266 if (slen == 0 || slen >= sizeof(clevel))
3267 return -EINVAL;
3268
3269 rv = mddev_lock(mddev);
3270 if (rv)
3271 return rv;
3272
3250 if (mddev->pers == NULL) { 3273 if (mddev->pers == NULL) {
3251 if (len == 0) 3274 strncpy(mddev->clevel, buf, slen);
3252 return 0; 3275 if (mddev->clevel[slen-1] == '\n')
3253 if (len >= sizeof(mddev->clevel)) 3276 slen--;
3254 return -ENOSPC; 3277 mddev->clevel[slen] = 0;
3255 strncpy(mddev->clevel, buf, len);
3256 if (mddev->clevel[len-1] == '\n')
3257 len--;
3258 mddev->clevel[len] = 0;
3259 mddev->level = LEVEL_NONE; 3278 mddev->level = LEVEL_NONE;
3260 return rv; 3279 rv = len;
3280 goto out_unlock;
3261 } 3281 }
3282 rv = -EROFS;
3262 if (mddev->ro) 3283 if (mddev->ro)
3263 return -EROFS; 3284 goto out_unlock;
3264 3285
3265 /* request to change the personality. Need to ensure: 3286 /* request to change the personality. Need to ensure:
3266 * - array is not engaged in resync/recovery/reshape 3287 * - array is not engaged in resync/recovery/reshape
@@ -3268,25 +3289,25 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
3268 * - new personality will access other array. 3289 * - new personality will access other array.
3269 */ 3290 */
3270 3291
3292 rv = -EBUSY;
3271 if (mddev->sync_thread || 3293 if (mddev->sync_thread ||
3272 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || 3294 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
3273 mddev->reshape_position != MaxSector || 3295 mddev->reshape_position != MaxSector ||
3274 mddev->sysfs_active) 3296 mddev->sysfs_active)
3275 return -EBUSY; 3297 goto out_unlock;
3276 3298
3299 rv = -EINVAL;
3277 if (!mddev->pers->quiesce) { 3300 if (!mddev->pers->quiesce) {
3278 printk(KERN_WARNING "md: %s: %s does not support online personality change\n", 3301 printk(KERN_WARNING "md: %s: %s does not support online personality change\n",
3279 mdname(mddev), mddev->pers->name); 3302 mdname(mddev), mddev->pers->name);
3280 return -EINVAL; 3303 goto out_unlock;
3281 } 3304 }
3282 3305
3283 /* Now find the new personality */ 3306 /* Now find the new personality */
3284 if (len == 0 || len >= sizeof(clevel)) 3307 strncpy(clevel, buf, slen);
3285 return -EINVAL; 3308 if (clevel[slen-1] == '\n')
3286 strncpy(clevel, buf, len); 3309 slen--;
3287 if (clevel[len-1] == '\n') 3310 clevel[slen] = 0;
3288 len--;
3289 clevel[len] = 0;
3290 if (kstrtol(clevel, 10, &level)) 3311 if (kstrtol(clevel, 10, &level))
3291 level = LEVEL_NONE; 3312 level = LEVEL_NONE;
3292 3313
@@ -3297,20 +3318,23 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
3297 if (!pers || !try_module_get(pers->owner)) { 3318 if (!pers || !try_module_get(pers->owner)) {
3298 spin_unlock(&pers_lock); 3319 spin_unlock(&pers_lock);
3299 printk(KERN_WARNING "md: personality %s not loaded\n", clevel); 3320 printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
3300 return -EINVAL; 3321 rv = -EINVAL;
3322 goto out_unlock;
3301 } 3323 }
3302 spin_unlock(&pers_lock); 3324 spin_unlock(&pers_lock);
3303 3325
3304 if (pers == mddev->pers) { 3326 if (pers == mddev->pers) {
3305 /* Nothing to do! */ 3327 /* Nothing to do! */
3306 module_put(pers->owner); 3328 module_put(pers->owner);
3307 return rv; 3329 rv = len;
3330 goto out_unlock;
3308 } 3331 }
3309 if (!pers->takeover) { 3332 if (!pers->takeover) {
3310 module_put(pers->owner); 3333 module_put(pers->owner);
3311 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n", 3334 printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
3312 mdname(mddev), clevel); 3335 mdname(mddev), clevel);
3313 return -EINVAL; 3336 rv = -EINVAL;
3337 goto out_unlock;
3314 } 3338 }
3315 3339
3316 rdev_for_each(rdev, mddev) 3340 rdev_for_each(rdev, mddev)
@@ -3330,30 +3354,29 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
3330 module_put(pers->owner); 3354 module_put(pers->owner);
3331 printk(KERN_WARNING "md: %s: %s would not accept array\n", 3355 printk(KERN_WARNING "md: %s: %s would not accept array\n",
3332 mdname(mddev), clevel); 3356 mdname(mddev), clevel);
3333 return PTR_ERR(priv); 3357 rv = PTR_ERR(priv);
3358 goto out_unlock;
3334 } 3359 }
3335 3360
3336 /* Looks like we have a winner */ 3361 /* Looks like we have a winner */
3337 mddev_suspend(mddev); 3362 mddev_suspend(mddev);
3338 mddev->pers->stop(mddev); 3363 mddev_detach(mddev);
3339 3364
3340 if (mddev->pers->sync_request == NULL && 3365 spin_lock(&mddev->lock);
3341 pers->sync_request != NULL) { 3366 oldpers = mddev->pers;
3342 /* need to add the md_redundancy_group */ 3367 oldpriv = mddev->private;
3343 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group)) 3368 mddev->pers = pers;
3344 printk(KERN_WARNING 3369 mddev->private = priv;
3345 "md: cannot register extra attributes for %s\n", 3370 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3346 mdname(mddev)); 3371 mddev->level = mddev->new_level;
3347 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action"); 3372 mddev->layout = mddev->new_layout;
3348 } 3373 mddev->chunk_sectors = mddev->new_chunk_sectors;
3349 if (mddev->pers->sync_request != NULL && 3374 mddev->delta_disks = 0;
3350 pers->sync_request == NULL) { 3375 mddev->reshape_backwards = 0;
3351 /* need to remove the md_redundancy_group */ 3376 mddev->degraded = 0;
3352 if (mddev->to_remove == NULL) 3377 spin_unlock(&mddev->lock);
3353 mddev->to_remove = &md_redundancy_group;
3354 }
3355 3378
3356 if (mddev->pers->sync_request == NULL && 3379 if (oldpers->sync_request == NULL &&
3357 mddev->external) { 3380 mddev->external) {
3358 /* We are converting from a no-redundancy array 3381 /* We are converting from a no-redundancy array
3359 * to a redundancy array and metadata is managed 3382 * to a redundancy array and metadata is managed
@@ -3367,6 +3390,24 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
3367 mddev->safemode = 0; 3390 mddev->safemode = 0;
3368 } 3391 }
3369 3392
3393 oldpers->free(mddev, oldpriv);
3394
3395 if (oldpers->sync_request == NULL &&
3396 pers->sync_request != NULL) {
3397 /* need to add the md_redundancy_group */
3398 if (sysfs_create_group(&mddev->kobj, &md_redundancy_group))
3399 printk(KERN_WARNING
3400 "md: cannot register extra attributes for %s\n",
3401 mdname(mddev));
3402 mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
3403 }
3404 if (oldpers->sync_request != NULL &&
3405 pers->sync_request == NULL) {
3406 /* need to remove the md_redundancy_group */
3407 if (mddev->to_remove == NULL)
3408 mddev->to_remove = &md_redundancy_group;
3409 }
3410
3370 rdev_for_each(rdev, mddev) { 3411 rdev_for_each(rdev, mddev) {
3371 if (rdev->raid_disk < 0) 3412 if (rdev->raid_disk < 0)
3372 continue; 3413 continue;
@@ -3392,17 +3433,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
3392 } 3433 }
3393 } 3434 }
3394 3435
3395 module_put(mddev->pers->owner); 3436 if (pers->sync_request == NULL) {
3396 mddev->pers = pers;
3397 mddev->private = priv;
3398 strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
3399 mddev->level = mddev->new_level;
3400 mddev->layout = mddev->new_layout;
3401 mddev->chunk_sectors = mddev->new_chunk_sectors;
3402 mddev->delta_disks = 0;
3403 mddev->reshape_backwards = 0;
3404 mddev->degraded = 0;
3405 if (mddev->pers->sync_request == NULL) {
3406 /* this is now an array without redundancy, so 3437 /* this is now an array without redundancy, so
3407 * it must always be in_sync 3438 * it must always be in_sync
3408 */ 3439 */
@@ -3417,6 +3448,9 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
3417 md_update_sb(mddev, 1); 3448 md_update_sb(mddev, 1);
3418 sysfs_notify(&mddev->kobj, NULL, "level"); 3449 sysfs_notify(&mddev->kobj, NULL, "level");
3419 md_new_event(mddev); 3450 md_new_event(mddev);
3451 rv = len;
3452out_unlock:
3453 mddev_unlock(mddev);
3420 return rv; 3454 return rv;
3421} 3455}
3422 3456
@@ -3439,28 +3473,32 @@ layout_store(struct mddev *mddev, const char *buf, size_t len)
3439{ 3473{
3440 char *e; 3474 char *e;
3441 unsigned long n = simple_strtoul(buf, &e, 10); 3475 unsigned long n = simple_strtoul(buf, &e, 10);
3476 int err;
3442 3477
3443 if (!*buf || (*e && *e != '\n')) 3478 if (!*buf || (*e && *e != '\n'))
3444 return -EINVAL; 3479 return -EINVAL;
3480 err = mddev_lock(mddev);
3481 if (err)
3482 return err;
3445 3483
3446 if (mddev->pers) { 3484 if (mddev->pers) {
3447 int err;
3448 if (mddev->pers->check_reshape == NULL) 3485 if (mddev->pers->check_reshape == NULL)
3449 return -EBUSY; 3486 err = -EBUSY;
3450 if (mddev->ro) 3487 else if (mddev->ro)
3451 return -EROFS; 3488 err = -EROFS;
3452 mddev->new_layout = n; 3489 else {
3453 err = mddev->pers->check_reshape(mddev); 3490 mddev->new_layout = n;
3454 if (err) { 3491 err = mddev->pers->check_reshape(mddev);
3455 mddev->new_layout = mddev->layout; 3492 if (err)
3456 return err; 3493 mddev->new_layout = mddev->layout;
3457 } 3494 }
3458 } else { 3495 } else {
3459 mddev->new_layout = n; 3496 mddev->new_layout = n;
3460 if (mddev->reshape_position == MaxSector) 3497 if (mddev->reshape_position == MaxSector)
3461 mddev->layout = n; 3498 mddev->layout = n;
3462 } 3499 }
3463 return len; 3500 mddev_unlock(mddev);
3501 return err ?: len;
3464} 3502}
3465static struct md_sysfs_entry md_layout = 3503static struct md_sysfs_entry md_layout =
3466__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store); 3504__ATTR(layout, S_IRUGO|S_IWUSR, layout_show, layout_store);
@@ -3483,32 +3521,39 @@ static ssize_t
3483raid_disks_store(struct mddev *mddev, const char *buf, size_t len) 3521raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
3484{ 3522{
3485 char *e; 3523 char *e;
3486 int rv = 0; 3524 int err;
3487 unsigned long n = simple_strtoul(buf, &e, 10); 3525 unsigned long n = simple_strtoul(buf, &e, 10);
3488 3526
3489 if (!*buf || (*e && *e != '\n')) 3527 if (!*buf || (*e && *e != '\n'))
3490 return -EINVAL; 3528 return -EINVAL;
3491 3529
3530 err = mddev_lock(mddev);
3531 if (err)
3532 return err;
3492 if (mddev->pers) 3533 if (mddev->pers)
3493 rv = update_raid_disks(mddev, n); 3534 err = update_raid_disks(mddev, n);
3494 else if (mddev->reshape_position != MaxSector) { 3535 else if (mddev->reshape_position != MaxSector) {
3495 struct md_rdev *rdev; 3536 struct md_rdev *rdev;
3496 int olddisks = mddev->raid_disks - mddev->delta_disks; 3537 int olddisks = mddev->raid_disks - mddev->delta_disks;
3497 3538
3539 err = -EINVAL;
3498 rdev_for_each(rdev, mddev) { 3540 rdev_for_each(rdev, mddev) {
3499 if (olddisks < n && 3541 if (olddisks < n &&
3500 rdev->data_offset < rdev->new_data_offset) 3542 rdev->data_offset < rdev->new_data_offset)
3501 return -EINVAL; 3543 goto out_unlock;
3502 if (olddisks > n && 3544 if (olddisks > n &&
3503 rdev->data_offset > rdev->new_data_offset) 3545 rdev->data_offset > rdev->new_data_offset)
3504 return -EINVAL; 3546 goto out_unlock;
3505 } 3547 }
3548 err = 0;
3506 mddev->delta_disks = n - olddisks; 3549 mddev->delta_disks = n - olddisks;
3507 mddev->raid_disks = n; 3550 mddev->raid_disks = n;
3508 mddev->reshape_backwards = (mddev->delta_disks < 0); 3551 mddev->reshape_backwards = (mddev->delta_disks < 0);
3509 } else 3552 } else
3510 mddev->raid_disks = n; 3553 mddev->raid_disks = n;
3511 return rv ? rv : len; 3554out_unlock:
3555 mddev_unlock(mddev);
3556 return err ? err : len;
3512} 3557}
3513static struct md_sysfs_entry md_raid_disks = 3558static struct md_sysfs_entry md_raid_disks =
3514__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store); 3559__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
@@ -3527,30 +3572,34 @@ chunk_size_show(struct mddev *mddev, char *page)
3527static ssize_t 3572static ssize_t
3528chunk_size_store(struct mddev *mddev, const char *buf, size_t len) 3573chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
3529{ 3574{
3575 int err;
3530 char *e; 3576 char *e;
3531 unsigned long n = simple_strtoul(buf, &e, 10); 3577 unsigned long n = simple_strtoul(buf, &e, 10);
3532 3578
3533 if (!*buf || (*e && *e != '\n')) 3579 if (!*buf || (*e && *e != '\n'))
3534 return -EINVAL; 3580 return -EINVAL;
3535 3581
3582 err = mddev_lock(mddev);
3583 if (err)
3584 return err;
3536 if (mddev->pers) { 3585 if (mddev->pers) {
3537 int err;
3538 if (mddev->pers->check_reshape == NULL) 3586 if (mddev->pers->check_reshape == NULL)
3539 return -EBUSY; 3587 err = -EBUSY;
3540 if (mddev->ro) 3588 else if (mddev->ro)
3541 return -EROFS; 3589 err = -EROFS;
3542 mddev->new_chunk_sectors = n >> 9; 3590 else {
3543 err = mddev->pers->check_reshape(mddev); 3591 mddev->new_chunk_sectors = n >> 9;
3544 if (err) { 3592 err = mddev->pers->check_reshape(mddev);
3545 mddev->new_chunk_sectors = mddev->chunk_sectors; 3593 if (err)
3546 return err; 3594 mddev->new_chunk_sectors = mddev->chunk_sectors;
3547 } 3595 }
3548 } else { 3596 } else {
3549 mddev->new_chunk_sectors = n >> 9; 3597 mddev->new_chunk_sectors = n >> 9;
3550 if (mddev->reshape_position == MaxSector) 3598 if (mddev->reshape_position == MaxSector)
3551 mddev->chunk_sectors = n >> 9; 3599 mddev->chunk_sectors = n >> 9;
3552 } 3600 }
3553 return len; 3601 mddev_unlock(mddev);
3602 return err ?: len;
3554} 3603}
3555static struct md_sysfs_entry md_chunk_size = 3604static struct md_sysfs_entry md_chunk_size =
3556__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store); 3605__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
@@ -3566,23 +3615,31 @@ resync_start_show(struct mddev *mddev, char *page)
3566static ssize_t 3615static ssize_t
3567resync_start_store(struct mddev *mddev, const char *buf, size_t len) 3616resync_start_store(struct mddev *mddev, const char *buf, size_t len)
3568{ 3617{
3618 int err;
3569 char *e; 3619 char *e;
3570 unsigned long long n = simple_strtoull(buf, &e, 10); 3620 unsigned long long n = simple_strtoull(buf, &e, 10);
3571 3621
3622 err = mddev_lock(mddev);
3623 if (err)
3624 return err;
3572 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) 3625 if (mddev->pers && !test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
3573 return -EBUSY; 3626 err = -EBUSY;
3574 if (cmd_match(buf, "none")) 3627 else if (cmd_match(buf, "none"))
3575 n = MaxSector; 3628 n = MaxSector;
3576 else if (!*buf || (*e && *e != '\n')) 3629 else if (!*buf || (*e && *e != '\n'))
3577 return -EINVAL; 3630 err = -EINVAL;
3578 3631
3579 mddev->recovery_cp = n; 3632 if (!err) {
3580 if (mddev->pers) 3633 mddev->recovery_cp = n;
3581 set_bit(MD_CHANGE_CLEAN, &mddev->flags); 3634 if (mddev->pers)
3582 return len; 3635 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3636 }
3637 mddev_unlock(mddev);
3638 return err ?: len;
3583} 3639}
3584static struct md_sysfs_entry md_resync_start = 3640static struct md_sysfs_entry md_resync_start =
3585__ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store); 3641__ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
3642 resync_start_show, resync_start_store);
3586 3643
3587/* 3644/*
3588 * The array state can be: 3645 * The array state can be:
@@ -3677,8 +3734,39 @@ static int restart_array(struct mddev *mddev);
3677static ssize_t 3734static ssize_t
3678array_state_store(struct mddev *mddev, const char *buf, size_t len) 3735array_state_store(struct mddev *mddev, const char *buf, size_t len)
3679{ 3736{
3680 int err = -EINVAL; 3737 int err;
3681 enum array_state st = match_word(buf, array_states); 3738 enum array_state st = match_word(buf, array_states);
3739
3740 if (mddev->pers && (st == active || st == clean) && mddev->ro != 1) {
3741 /* don't take reconfig_mutex when toggling between
3742 * clean and active
3743 */
3744 spin_lock(&mddev->lock);
3745 if (st == active) {
3746 restart_array(mddev);
3747 clear_bit(MD_CHANGE_PENDING, &mddev->flags);
3748 wake_up(&mddev->sb_wait);
3749 err = 0;
3750 } else /* st == clean */ {
3751 restart_array(mddev);
3752 if (atomic_read(&mddev->writes_pending) == 0) {
3753 if (mddev->in_sync == 0) {
3754 mddev->in_sync = 1;
3755 if (mddev->safemode == 1)
3756 mddev->safemode = 0;
3757 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
3758 }
3759 err = 0;
3760 } else
3761 err = -EBUSY;
3762 }
3763 spin_unlock(&mddev->lock);
3764 return err;
3765 }
3766 err = mddev_lock(mddev);
3767 if (err)
3768 return err;
3769 err = -EINVAL;
3682 switch(st) { 3770 switch(st) {
3683 case bad_word: 3771 case bad_word:
3684 break; 3772 break;
@@ -3722,7 +3810,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
3722 case clean: 3810 case clean:
3723 if (mddev->pers) { 3811 if (mddev->pers) {
3724 restart_array(mddev); 3812 restart_array(mddev);
3725 spin_lock_irq(&mddev->write_lock); 3813 spin_lock(&mddev->lock);
3726 if (atomic_read(&mddev->writes_pending) == 0) { 3814 if (atomic_read(&mddev->writes_pending) == 0) {
3727 if (mddev->in_sync == 0) { 3815 if (mddev->in_sync == 0) {
3728 mddev->in_sync = 1; 3816 mddev->in_sync = 1;
@@ -3733,7 +3821,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
3733 err = 0; 3821 err = 0;
3734 } else 3822 } else
3735 err = -EBUSY; 3823 err = -EBUSY;
3736 spin_unlock_irq(&mddev->write_lock); 3824 spin_unlock(&mddev->lock);
3737 } else 3825 } else
3738 err = -EINVAL; 3826 err = -EINVAL;
3739 break; 3827 break;
@@ -3754,17 +3842,17 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
3754 /* these cannot be set */ 3842 /* these cannot be set */
3755 break; 3843 break;
3756 } 3844 }
3757 if (err) 3845
3758 return err; 3846 if (!err) {
3759 else {
3760 if (mddev->hold_active == UNTIL_IOCTL) 3847 if (mddev->hold_active == UNTIL_IOCTL)
3761 mddev->hold_active = 0; 3848 mddev->hold_active = 0;
3762 sysfs_notify_dirent_safe(mddev->sysfs_state); 3849 sysfs_notify_dirent_safe(mddev->sysfs_state);
3763 return len;
3764 } 3850 }
3851 mddev_unlock(mddev);
3852 return err ?: len;
3765} 3853}
3766static struct md_sysfs_entry md_array_state = 3854static struct md_sysfs_entry md_array_state =
3767__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); 3855__ATTR_PREALLOC(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
3768 3856
3769static ssize_t 3857static ssize_t
3770max_corrected_read_errors_show(struct mddev *mddev, char *page) { 3858max_corrected_read_errors_show(struct mddev *mddev, char *page) {
@@ -3822,6 +3910,11 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
3822 minor != MINOR(dev)) 3910 minor != MINOR(dev))
3823 return -EOVERFLOW; 3911 return -EOVERFLOW;
3824 3912
3913 flush_workqueue(md_misc_wq);
3914
3915 err = mddev_lock(mddev);
3916 if (err)
3917 return err;
3825 if (mddev->persistent) { 3918 if (mddev->persistent) {
3826 rdev = md_import_device(dev, mddev->major_version, 3919 rdev = md_import_device(dev, mddev->major_version,
3827 mddev->minor_version); 3920 mddev->minor_version);
@@ -3845,6 +3938,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
3845 out: 3938 out:
3846 if (err) 3939 if (err)
3847 export_rdev(rdev); 3940 export_rdev(rdev);
3941 mddev_unlock(mddev);
3848 return err ? err : len; 3942 return err ? err : len;
3849} 3943}
3850 3944
@@ -3856,7 +3950,11 @@ bitmap_store(struct mddev *mddev, const char *buf, size_t len)
3856{ 3950{
3857 char *end; 3951 char *end;
3858 unsigned long chunk, end_chunk; 3952 unsigned long chunk, end_chunk;
3953 int err;
3859 3954
3955 err = mddev_lock(mddev);
3956 if (err)
3957 return err;
3860 if (!mddev->bitmap) 3958 if (!mddev->bitmap)
3861 goto out; 3959 goto out;
3862 /* buf should be <chunk> <chunk> ... or <chunk>-<chunk> ... (range) */ 3960 /* buf should be <chunk> <chunk> ... or <chunk>-<chunk> ... (range) */
@@ -3874,6 +3972,7 @@ bitmap_store(struct mddev *mddev, const char *buf, size_t len)
3874 } 3972 }
3875 bitmap_unplug(mddev->bitmap); /* flush the bits to disk */ 3973 bitmap_unplug(mddev->bitmap); /* flush the bits to disk */
3876out: 3974out:
3975 mddev_unlock(mddev);
3877 return len; 3976 return len;
3878} 3977}
3879 3978
@@ -3901,6 +4000,9 @@ size_store(struct mddev *mddev, const char *buf, size_t len)
3901 4000
3902 if (err < 0) 4001 if (err < 0)
3903 return err; 4002 return err;
4003 err = mddev_lock(mddev);
4004 if (err)
4005 return err;
3904 if (mddev->pers) { 4006 if (mddev->pers) {
3905 err = update_size(mddev, sectors); 4007 err = update_size(mddev, sectors);
3906 md_update_sb(mddev, 1); 4008 md_update_sb(mddev, 1);
@@ -3911,6 +4013,7 @@ size_store(struct mddev *mddev, const char *buf, size_t len)
3911 else 4013 else
3912 err = -ENOSPC; 4014 err = -ENOSPC;
3913 } 4015 }
4016 mddev_unlock(mddev);
3914 return err ? err : len; 4017 return err ? err : len;
3915} 4018}
3916 4019
@@ -3940,21 +4043,28 @@ metadata_store(struct mddev *mddev, const char *buf, size_t len)
3940{ 4043{
3941 int major, minor; 4044 int major, minor;
3942 char *e; 4045 char *e;
4046 int err;
3943 /* Changing the details of 'external' metadata is 4047 /* Changing the details of 'external' metadata is
3944 * always permitted. Otherwise there must be 4048 * always permitted. Otherwise there must be
3945 * no devices attached to the array. 4049 * no devices attached to the array.
3946 */ 4050 */
4051
4052 err = mddev_lock(mddev);
4053 if (err)
4054 return err;
4055 err = -EBUSY;
3947 if (mddev->external && strncmp(buf, "external:", 9) == 0) 4056 if (mddev->external && strncmp(buf, "external:", 9) == 0)
3948 ; 4057 ;
3949 else if (!list_empty(&mddev->disks)) 4058 else if (!list_empty(&mddev->disks))
3950 return -EBUSY; 4059 goto out_unlock;
3951 4060
4061 err = 0;
3952 if (cmd_match(buf, "none")) { 4062 if (cmd_match(buf, "none")) {
3953 mddev->persistent = 0; 4063 mddev->persistent = 0;
3954 mddev->external = 0; 4064 mddev->external = 0;
3955 mddev->major_version = 0; 4065 mddev->major_version = 0;
3956 mddev->minor_version = 90; 4066 mddev->minor_version = 90;
3957 return len; 4067 goto out_unlock;
3958 } 4068 }
3959 if (strncmp(buf, "external:", 9) == 0) { 4069 if (strncmp(buf, "external:", 9) == 0) {
3960 size_t namelen = len-9; 4070 size_t namelen = len-9;
@@ -3968,45 +4078,51 @@ metadata_store(struct mddev *mddev, const char *buf, size_t len)
3968 mddev->external = 1; 4078 mddev->external = 1;
3969 mddev->major_version = 0; 4079 mddev->major_version = 0;
3970 mddev->minor_version = 90; 4080 mddev->minor_version = 90;
3971 return len; 4081 goto out_unlock;
3972 } 4082 }
3973 major = simple_strtoul(buf, &e, 10); 4083 major = simple_strtoul(buf, &e, 10);
4084 err = -EINVAL;
3974 if (e==buf || *e != '.') 4085 if (e==buf || *e != '.')
3975 return -EINVAL; 4086 goto out_unlock;
3976 buf = e+1; 4087 buf = e+1;
3977 minor = simple_strtoul(buf, &e, 10); 4088 minor = simple_strtoul(buf, &e, 10);
3978 if (e==buf || (*e && *e != '\n') ) 4089 if (e==buf || (*e && *e != '\n') )
3979 return -EINVAL; 4090 goto out_unlock;
4091 err = -ENOENT;
3980 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL) 4092 if (major >= ARRAY_SIZE(super_types) || super_types[major].name == NULL)
3981 return -ENOENT; 4093 goto out_unlock;
3982 mddev->major_version = major; 4094 mddev->major_version = major;
3983 mddev->minor_version = minor; 4095 mddev->minor_version = minor;
3984 mddev->persistent = 1; 4096 mddev->persistent = 1;
3985 mddev->external = 0; 4097 mddev->external = 0;
3986 return len; 4098 err = 0;
4099out_unlock:
4100 mddev_unlock(mddev);
4101 return err ?: len;
3987} 4102}
3988 4103
3989static struct md_sysfs_entry md_metadata = 4104static struct md_sysfs_entry md_metadata =
3990__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store); 4105__ATTR_PREALLOC(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
3991 4106
3992static ssize_t 4107static ssize_t
3993action_show(struct mddev *mddev, char *page) 4108action_show(struct mddev *mddev, char *page)
3994{ 4109{
3995 char *type = "idle"; 4110 char *type = "idle";
3996 if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) 4111 unsigned long recovery = mddev->recovery;
4112 if (test_bit(MD_RECOVERY_FROZEN, &recovery))
3997 type = "frozen"; 4113 type = "frozen";
3998 else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || 4114 else if (test_bit(MD_RECOVERY_RUNNING, &recovery) ||
3999 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) { 4115 (!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &recovery))) {
4000 if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) 4116 if (test_bit(MD_RECOVERY_RESHAPE, &recovery))
4001 type = "reshape"; 4117 type = "reshape";
4002 else if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { 4118 else if (test_bit(MD_RECOVERY_SYNC, &recovery)) {
4003 if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) 4119 if (!test_bit(MD_RECOVERY_REQUESTED, &recovery))
4004 type = "resync"; 4120 type = "resync";
4005 else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) 4121 else if (test_bit(MD_RECOVERY_CHECK, &recovery))
4006 type = "check"; 4122 type = "check";
4007 else 4123 else
4008 type = "repair"; 4124 type = "repair";
4009 } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) 4125 } else if (test_bit(MD_RECOVERY_RECOVER, &recovery))
4010 type = "recover"; 4126 type = "recover";
4011 } 4127 }
4012 return sprintf(page, "%s\n", type); 4128 return sprintf(page, "%s\n", type);
@@ -4027,7 +4143,10 @@ action_store(struct mddev *mddev, const char *page, size_t len)
4027 flush_workqueue(md_misc_wq); 4143 flush_workqueue(md_misc_wq);
4028 if (mddev->sync_thread) { 4144 if (mddev->sync_thread) {
4029 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 4145 set_bit(MD_RECOVERY_INTR, &mddev->recovery);
4030 md_reap_sync_thread(mddev); 4146 if (mddev_lock(mddev) == 0) {
4147 md_reap_sync_thread(mddev);
4148 mddev_unlock(mddev);
4149 }
4031 } 4150 }
4032 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || 4151 } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
4033 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) 4152 test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
@@ -4041,7 +4160,11 @@ action_store(struct mddev *mddev, const char *page, size_t len)
4041 int err; 4160 int err;
4042 if (mddev->pers->start_reshape == NULL) 4161 if (mddev->pers->start_reshape == NULL)
4043 return -EINVAL; 4162 return -EINVAL;
4044 err = mddev->pers->start_reshape(mddev); 4163 err = mddev_lock(mddev);
4164 if (!err) {
4165 err = mddev->pers->start_reshape(mddev);
4166 mddev_unlock(mddev);
4167 }
4045 if (err) 4168 if (err)
4046 return err; 4169 return err;
4047 sysfs_notify(&mddev->kobj, NULL, "degraded"); 4170 sysfs_notify(&mddev->kobj, NULL, "degraded");
@@ -4067,7 +4190,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
4067} 4190}
4068 4191
4069static struct md_sysfs_entry md_scan_mode = 4192static struct md_sysfs_entry md_scan_mode =
4070__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store); 4193__ATTR_PREALLOC(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
4071 4194
4072static ssize_t 4195static ssize_t
4073last_sync_action_show(struct mddev *mddev, char *page) 4196last_sync_action_show(struct mddev *mddev, char *page)
@@ -4213,7 +4336,8 @@ sync_completed_show(struct mddev *mddev, char *page)
4213 return sprintf(page, "%llu / %llu\n", resync, max_sectors); 4336 return sprintf(page, "%llu / %llu\n", resync, max_sectors);
4214} 4337}
4215 4338
4216static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); 4339static struct md_sysfs_entry md_sync_completed =
4340 __ATTR_PREALLOC(sync_completed, S_IRUGO, sync_completed_show, NULL);
4217 4341
4218static ssize_t 4342static ssize_t
4219min_sync_show(struct mddev *mddev, char *page) 4343min_sync_show(struct mddev *mddev, char *page)
@@ -4225,22 +4349,36 @@ static ssize_t
4225min_sync_store(struct mddev *mddev, const char *buf, size_t len) 4349min_sync_store(struct mddev *mddev, const char *buf, size_t len)
4226{ 4350{
4227 unsigned long long min; 4351 unsigned long long min;
4352 int err;
4353 int chunk;
4354
4228 if (kstrtoull(buf, 10, &min)) 4355 if (kstrtoull(buf, 10, &min))
4229 return -EINVAL; 4356 return -EINVAL;
4357
4358 spin_lock(&mddev->lock);
4359 err = -EINVAL;
4230 if (min > mddev->resync_max) 4360 if (min > mddev->resync_max)
4231 return -EINVAL; 4361 goto out_unlock;
4362
4363 err = -EBUSY;
4232 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 4364 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4233 return -EBUSY; 4365 goto out_unlock;
4234 4366
4235 /* Must be a multiple of chunk_size */ 4367 /* Must be a multiple of chunk_size */
4236 if (mddev->chunk_sectors) { 4368 chunk = mddev->chunk_sectors;
4369 if (chunk) {
4237 sector_t temp = min; 4370 sector_t temp = min;
4238 if (sector_div(temp, mddev->chunk_sectors)) 4371
4239 return -EINVAL; 4372 err = -EINVAL;
4373 if (sector_div(temp, chunk))
4374 goto out_unlock;
4240 } 4375 }
4241 mddev->resync_min = min; 4376 mddev->resync_min = min;
4377 err = 0;
4242 4378
4243 return len; 4379out_unlock:
4380 spin_unlock(&mddev->lock);
4381 return err ?: len;
4244} 4382}
4245 4383
4246static struct md_sysfs_entry md_min_sync = 4384static struct md_sysfs_entry md_min_sync =
@@ -4258,29 +4396,42 @@ max_sync_show(struct mddev *mddev, char *page)
4258static ssize_t 4396static ssize_t
4259max_sync_store(struct mddev *mddev, const char *buf, size_t len) 4397max_sync_store(struct mddev *mddev, const char *buf, size_t len)
4260{ 4398{
4399 int err;
4400 spin_lock(&mddev->lock);
4261 if (strncmp(buf, "max", 3) == 0) 4401 if (strncmp(buf, "max", 3) == 0)
4262 mddev->resync_max = MaxSector; 4402 mddev->resync_max = MaxSector;
4263 else { 4403 else {
4264 unsigned long long max; 4404 unsigned long long max;
4405 int chunk;
4406
4407 err = -EINVAL;
4265 if (kstrtoull(buf, 10, &max)) 4408 if (kstrtoull(buf, 10, &max))
4266 return -EINVAL; 4409 goto out_unlock;
4267 if (max < mddev->resync_min) 4410 if (max < mddev->resync_min)
4268 return -EINVAL; 4411 goto out_unlock;
4412
4413 err = -EBUSY;
4269 if (max < mddev->resync_max && 4414 if (max < mddev->resync_max &&
4270 mddev->ro == 0 && 4415 mddev->ro == 0 &&
4271 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 4416 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
4272 return -EBUSY; 4417 goto out_unlock;
4273 4418
4274 /* Must be a multiple of chunk_size */ 4419 /* Must be a multiple of chunk_size */
4275 if (mddev->chunk_sectors) { 4420 chunk = mddev->chunk_sectors;
4421 if (chunk) {
4276 sector_t temp = max; 4422 sector_t temp = max;
4277 if (sector_div(temp, mddev->chunk_sectors)) 4423
4278 return -EINVAL; 4424 err = -EINVAL;
4425 if (sector_div(temp, chunk))
4426 goto out_unlock;
4279 } 4427 }
4280 mddev->resync_max = max; 4428 mddev->resync_max = max;
4281 } 4429 }
4282 wake_up(&mddev->recovery_wait); 4430 wake_up(&mddev->recovery_wait);
4283 return len; 4431 err = 0;
4432out_unlock:
4433 spin_unlock(&mddev->lock);
4434 return err ?: len;
4284} 4435}
4285 4436
4286static struct md_sysfs_entry md_max_sync = 4437static struct md_sysfs_entry md_max_sync =
@@ -4297,14 +4448,20 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4297{ 4448{
4298 char *e; 4449 char *e;
4299 unsigned long long new = simple_strtoull(buf, &e, 10); 4450 unsigned long long new = simple_strtoull(buf, &e, 10);
4300 unsigned long long old = mddev->suspend_lo; 4451 unsigned long long old;
4452 int err;
4301 4453
4302 if (mddev->pers == NULL ||
4303 mddev->pers->quiesce == NULL)
4304 return -EINVAL;
4305 if (buf == e || (*e && *e != '\n')) 4454 if (buf == e || (*e && *e != '\n'))
4306 return -EINVAL; 4455 return -EINVAL;
4307 4456
4457 err = mddev_lock(mddev);
4458 if (err)
4459 return err;
4460 err = -EINVAL;
4461 if (mddev->pers == NULL ||
4462 mddev->pers->quiesce == NULL)
4463 goto unlock;
4464 old = mddev->suspend_lo;
4308 mddev->suspend_lo = new; 4465 mddev->suspend_lo = new;
4309 if (new >= old) 4466 if (new >= old)
4310 /* Shrinking suspended region */ 4467 /* Shrinking suspended region */
@@ -4314,7 +4471,10 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
4314 mddev->pers->quiesce(mddev, 1); 4471 mddev->pers->quiesce(mddev, 1);
4315 mddev->pers->quiesce(mddev, 0); 4472 mddev->pers->quiesce(mddev, 0);
4316 } 4473 }
4317 return len; 4474 err = 0;
4475unlock:
4476 mddev_unlock(mddev);
4477 return err ?: len;
4318} 4478}
4319static struct md_sysfs_entry md_suspend_lo = 4479static struct md_sysfs_entry md_suspend_lo =
4320__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store); 4480__ATTR(suspend_lo, S_IRUGO|S_IWUSR, suspend_lo_show, suspend_lo_store);
@@ -4330,14 +4490,20 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4330{ 4490{
4331 char *e; 4491 char *e;
4332 unsigned long long new = simple_strtoull(buf, &e, 10); 4492 unsigned long long new = simple_strtoull(buf, &e, 10);
4333 unsigned long long old = mddev->suspend_hi; 4493 unsigned long long old;
4494 int err;
4334 4495
4335 if (mddev->pers == NULL ||
4336 mddev->pers->quiesce == NULL)
4337 return -EINVAL;
4338 if (buf == e || (*e && *e != '\n')) 4496 if (buf == e || (*e && *e != '\n'))
4339 return -EINVAL; 4497 return -EINVAL;
4340 4498
4499 err = mddev_lock(mddev);
4500 if (err)
4501 return err;
4502 err = -EINVAL;
4503 if (mddev->pers == NULL ||
4504 mddev->pers->quiesce == NULL)
4505 goto unlock;
4506 old = mddev->suspend_hi;
4341 mddev->suspend_hi = new; 4507 mddev->suspend_hi = new;
4342 if (new <= old) 4508 if (new <= old)
4343 /* Shrinking suspended region */ 4509 /* Shrinking suspended region */
@@ -4347,7 +4513,10 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
4347 mddev->pers->quiesce(mddev, 1); 4513 mddev->pers->quiesce(mddev, 1);
4348 mddev->pers->quiesce(mddev, 0); 4514 mddev->pers->quiesce(mddev, 0);
4349 } 4515 }
4350 return len; 4516 err = 0;
4517unlock:
4518 mddev_unlock(mddev);
4519 return err ?: len;
4351} 4520}
4352static struct md_sysfs_entry md_suspend_hi = 4521static struct md_sysfs_entry md_suspend_hi =
4353__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store); 4522__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
@@ -4367,11 +4536,17 @@ reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4367{ 4536{
4368 struct md_rdev *rdev; 4537 struct md_rdev *rdev;
4369 char *e; 4538 char *e;
4539 int err;
4370 unsigned long long new = simple_strtoull(buf, &e, 10); 4540 unsigned long long new = simple_strtoull(buf, &e, 10);
4371 if (mddev->pers) 4541
4372 return -EBUSY;
4373 if (buf == e || (*e && *e != '\n')) 4542 if (buf == e || (*e && *e != '\n'))
4374 return -EINVAL; 4543 return -EINVAL;
4544 err = mddev_lock(mddev);
4545 if (err)
4546 return err;
4547 err = -EBUSY;
4548 if (mddev->pers)
4549 goto unlock;
4375 mddev->reshape_position = new; 4550 mddev->reshape_position = new;
4376 mddev->delta_disks = 0; 4551 mddev->delta_disks = 0;
4377 mddev->reshape_backwards = 0; 4552 mddev->reshape_backwards = 0;
@@ -4380,7 +4555,10 @@ reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
4380 mddev->new_chunk_sectors = mddev->chunk_sectors; 4555 mddev->new_chunk_sectors = mddev->chunk_sectors;
4381 rdev_for_each(rdev, mddev) 4556 rdev_for_each(rdev, mddev)
4382 rdev->new_data_offset = rdev->data_offset; 4557 rdev->new_data_offset = rdev->data_offset;
4383 return len; 4558 err = 0;
4559unlock:
4560 mddev_unlock(mddev);
4561 return err ?: len;
4384} 4562}
4385 4563
4386static struct md_sysfs_entry md_reshape_position = 4564static struct md_sysfs_entry md_reshape_position =
@@ -4398,6 +4576,8 @@ static ssize_t
4398reshape_direction_store(struct mddev *mddev, const char *buf, size_t len) 4576reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4399{ 4577{
4400 int backwards = 0; 4578 int backwards = 0;
4579 int err;
4580
4401 if (cmd_match(buf, "forwards")) 4581 if (cmd_match(buf, "forwards"))
4402 backwards = 0; 4582 backwards = 0;
4403 else if (cmd_match(buf, "backwards")) 4583 else if (cmd_match(buf, "backwards"))
@@ -4407,16 +4587,19 @@ reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
4407 if (mddev->reshape_backwards == backwards) 4587 if (mddev->reshape_backwards == backwards)
4408 return len; 4588 return len;
4409 4589
4590 err = mddev_lock(mddev);
4591 if (err)
4592 return err;
4410 /* check if we are allowed to change */ 4593 /* check if we are allowed to change */
4411 if (mddev->delta_disks) 4594 if (mddev->delta_disks)
4412 return -EBUSY; 4595 err = -EBUSY;
4413 4596 else if (mddev->persistent &&
4414 if (mddev->persistent &&
4415 mddev->major_version == 0) 4597 mddev->major_version == 0)
4416 return -EINVAL; 4598 err = -EINVAL;
4417 4599 else
4418 mddev->reshape_backwards = backwards; 4600 mddev->reshape_backwards = backwards;
4419 return len; 4601 mddev_unlock(mddev);
4602 return err ?: len;
4420} 4603}
4421 4604
4422static struct md_sysfs_entry md_reshape_direction = 4605static struct md_sysfs_entry md_reshape_direction =
@@ -4437,6 +4620,11 @@ static ssize_t
4437array_size_store(struct mddev *mddev, const char *buf, size_t len) 4620array_size_store(struct mddev *mddev, const char *buf, size_t len)
4438{ 4621{
4439 sector_t sectors; 4622 sector_t sectors;
4623 int err;
4624
4625 err = mddev_lock(mddev);
4626 if (err)
4627 return err;
4440 4628
4441 if (strncmp(buf, "default", 7) == 0) { 4629 if (strncmp(buf, "default", 7) == 0) {
4442 if (mddev->pers) 4630 if (mddev->pers)
@@ -4447,19 +4635,22 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len)
4447 mddev->external_size = 0; 4635 mddev->external_size = 0;
4448 } else { 4636 } else {
4449 if (strict_blocks_to_sectors(buf, &sectors) < 0) 4637 if (strict_blocks_to_sectors(buf, &sectors) < 0)
4450 return -EINVAL; 4638 err = -EINVAL;
4451 if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors) 4639 else if (mddev->pers && mddev->pers->size(mddev, 0, 0) < sectors)
4452 return -E2BIG; 4640 err = -E2BIG;
4453 4641 else
4454 mddev->external_size = 1; 4642 mddev->external_size = 1;
4455 } 4643 }
4456 4644
4457 mddev->array_sectors = sectors; 4645 if (!err) {
4458 if (mddev->pers) { 4646 mddev->array_sectors = sectors;
4459 set_capacity(mddev->gendisk, mddev->array_sectors); 4647 if (mddev->pers) {
4460 revalidate_disk(mddev->gendisk); 4648 set_capacity(mddev->gendisk, mddev->array_sectors);
4649 revalidate_disk(mddev->gendisk);
4650 }
4461 } 4651 }
4462 return len; 4652 mddev_unlock(mddev);
4653 return err ?: len;
4463} 4654}
4464 4655
4465static struct md_sysfs_entry md_array_size = 4656static struct md_sysfs_entry md_array_size =
@@ -4523,11 +4714,7 @@ md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
4523 mddev_get(mddev); 4714 mddev_get(mddev);
4524 spin_unlock(&all_mddevs_lock); 4715 spin_unlock(&all_mddevs_lock);
4525 4716
4526 rv = mddev_lock(mddev); 4717 rv = entry->show(mddev, page);
4527 if (!rv) {
4528 rv = entry->show(mddev, page);
4529 mddev_unlock(mddev);
4530 }
4531 mddev_put(mddev); 4718 mddev_put(mddev);
4532 return rv; 4719 return rv;
4533} 4720}
@@ -4551,13 +4738,7 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
4551 } 4738 }
4552 mddev_get(mddev); 4739 mddev_get(mddev);
4553 spin_unlock(&all_mddevs_lock); 4740 spin_unlock(&all_mddevs_lock);
4554 if (entry->store == new_dev_store) 4741 rv = entry->store(mddev, page, length);
4555 flush_workqueue(md_misc_wq);
4556 rv = mddev_lock(mddev);
4557 if (!rv) {
4558 rv = entry->store(mddev, page, length);
4559 mddev_unlock(mddev);
4560 }
4561 mddev_put(mddev); 4742 mddev_put(mddev);
4562 return rv; 4743 return rv;
4563} 4744}
@@ -4825,7 +5006,6 @@ int md_run(struct mddev *mddev)
4825 mddev->clevel); 5006 mddev->clevel);
4826 return -EINVAL; 5007 return -EINVAL;
4827 } 5008 }
4828 mddev->pers = pers;
4829 spin_unlock(&pers_lock); 5009 spin_unlock(&pers_lock);
4830 if (mddev->level != pers->level) { 5010 if (mddev->level != pers->level) {
4831 mddev->level = pers->level; 5011 mddev->level = pers->level;
@@ -4836,7 +5016,6 @@ int md_run(struct mddev *mddev)
4836 if (mddev->reshape_position != MaxSector && 5016 if (mddev->reshape_position != MaxSector &&
4837 pers->start_reshape == NULL) { 5017 pers->start_reshape == NULL) {
4838 /* This personality cannot handle reshaping... */ 5018 /* This personality cannot handle reshaping... */
4839 mddev->pers = NULL;
4840 module_put(pers->owner); 5019 module_put(pers->owner);
4841 return -EINVAL; 5020 return -EINVAL;
4842 } 5021 }
@@ -4880,35 +5059,38 @@ int md_run(struct mddev *mddev)
4880 if (start_readonly && mddev->ro == 0) 5059 if (start_readonly && mddev->ro == 0)
4881 mddev->ro = 2; /* read-only, but switch on first write */ 5060 mddev->ro = 2; /* read-only, but switch on first write */
4882 5061
4883 err = mddev->pers->run(mddev); 5062 err = pers->run(mddev);
4884 if (err) 5063 if (err)
4885 printk(KERN_ERR "md: pers->run() failed ...\n"); 5064 printk(KERN_ERR "md: pers->run() failed ...\n");
4886 else if (mddev->pers->size(mddev, 0, 0) < mddev->array_sectors) { 5065 else if (pers->size(mddev, 0, 0) < mddev->array_sectors) {
4887 WARN_ONCE(!mddev->external_size, "%s: default size too small," 5066 WARN_ONCE(!mddev->external_size, "%s: default size too small,"
4888 " but 'external_size' not in effect?\n", __func__); 5067 " but 'external_size' not in effect?\n", __func__);
4889 printk(KERN_ERR 5068 printk(KERN_ERR
4890 "md: invalid array_size %llu > default size %llu\n", 5069 "md: invalid array_size %llu > default size %llu\n",
4891 (unsigned long long)mddev->array_sectors / 2, 5070 (unsigned long long)mddev->array_sectors / 2,
4892 (unsigned long long)mddev->pers->size(mddev, 0, 0) / 2); 5071 (unsigned long long)pers->size(mddev, 0, 0) / 2);
4893 err = -EINVAL; 5072 err = -EINVAL;
4894 mddev->pers->stop(mddev);
4895 } 5073 }
4896 if (err == 0 && mddev->pers->sync_request && 5074 if (err == 0 && pers->sync_request &&
4897 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) { 5075 (mddev->bitmap_info.file || mddev->bitmap_info.offset)) {
4898 err = bitmap_create(mddev); 5076 err = bitmap_create(mddev);
4899 if (err) { 5077 if (err)
4900 printk(KERN_ERR "%s: failed to create bitmap (%d)\n", 5078 printk(KERN_ERR "%s: failed to create bitmap (%d)\n",
4901 mdname(mddev), err); 5079 mdname(mddev), err);
4902 mddev->pers->stop(mddev);
4903 }
4904 } 5080 }
4905 if (err) { 5081 if (err) {
4906 module_put(mddev->pers->owner); 5082 mddev_detach(mddev);
4907 mddev->pers = NULL; 5083 pers->free(mddev, mddev->private);
5084 module_put(pers->owner);
4908 bitmap_destroy(mddev); 5085 bitmap_destroy(mddev);
4909 return err; 5086 return err;
4910 } 5087 }
4911 if (mddev->pers->sync_request) { 5088 if (mddev->queue) {
5089 mddev->queue->backing_dev_info.congested_data = mddev;
5090 mddev->queue->backing_dev_info.congested_fn = md_congested;
5091 blk_queue_merge_bvec(mddev->queue, md_mergeable_bvec);
5092 }
5093 if (pers->sync_request) {
4912 if (mddev->kobj.sd && 5094 if (mddev->kobj.sd &&
4913 sysfs_create_group(&mddev->kobj, &md_redundancy_group)) 5095 sysfs_create_group(&mddev->kobj, &md_redundancy_group))
4914 printk(KERN_WARNING 5096 printk(KERN_WARNING
@@ -4927,7 +5109,10 @@ int md_run(struct mddev *mddev)
4927 mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */ 5109 mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
4928 mddev->in_sync = 1; 5110 mddev->in_sync = 1;
4929 smp_wmb(); 5111 smp_wmb();
5112 spin_lock(&mddev->lock);
5113 mddev->pers = pers;
4930 mddev->ready = 1; 5114 mddev->ready = 1;
5115 spin_unlock(&mddev->lock);
4931 rdev_for_each(rdev, mddev) 5116 rdev_for_each(rdev, mddev)
4932 if (rdev->raid_disk >= 0) 5117 if (rdev->raid_disk >= 0)
4933 if (sysfs_link_rdev(mddev, rdev)) 5118 if (sysfs_link_rdev(mddev, rdev))
@@ -5070,14 +5255,38 @@ void md_stop_writes(struct mddev *mddev)
5070} 5255}
5071EXPORT_SYMBOL_GPL(md_stop_writes); 5256EXPORT_SYMBOL_GPL(md_stop_writes);
5072 5257
5258static void mddev_detach(struct mddev *mddev)
5259{
5260 struct bitmap *bitmap = mddev->bitmap;
5261 /* wait for behind writes to complete */
5262 if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
5263 printk(KERN_INFO "md:%s: behind writes in progress - waiting to stop.\n",
5264 mdname(mddev));
5265 /* need to kick something here to make sure I/O goes? */
5266 wait_event(bitmap->behind_wait,
5267 atomic_read(&bitmap->behind_writes) == 0);
5268 }
5269 if (mddev->pers && mddev->pers->quiesce) {
5270 mddev->pers->quiesce(mddev, 1);
5271 mddev->pers->quiesce(mddev, 0);
5272 }
5273 md_unregister_thread(&mddev->thread);
5274 if (mddev->queue)
5275 blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
5276}
5277
5073static void __md_stop(struct mddev *mddev) 5278static void __md_stop(struct mddev *mddev)
5074{ 5279{
5280 struct md_personality *pers = mddev->pers;
5281 mddev_detach(mddev);
5282 spin_lock(&mddev->lock);
5075 mddev->ready = 0; 5283 mddev->ready = 0;
5076 mddev->pers->stop(mddev);
5077 if (mddev->pers->sync_request && mddev->to_remove == NULL)
5078 mddev->to_remove = &md_redundancy_group;
5079 module_put(mddev->pers->owner);
5080 mddev->pers = NULL; 5284 mddev->pers = NULL;
5285 spin_unlock(&mddev->lock);
5286 pers->free(mddev, mddev->private);
5287 if (pers->sync_request && mddev->to_remove == NULL)
5288 mddev->to_remove = &md_redundancy_group;
5289 module_put(pers->owner);
5081 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 5290 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5082} 5291}
5083 5292
@@ -5226,8 +5435,11 @@ static int do_md_stop(struct mddev *mddev, int mode,
5226 5435
5227 bitmap_destroy(mddev); 5436 bitmap_destroy(mddev);
5228 if (mddev->bitmap_info.file) { 5437 if (mddev->bitmap_info.file) {
5229 fput(mddev->bitmap_info.file); 5438 struct file *f = mddev->bitmap_info.file;
5439 spin_lock(&mddev->lock);
5230 mddev->bitmap_info.file = NULL; 5440 mddev->bitmap_info.file = NULL;
5441 spin_unlock(&mddev->lock);
5442 fput(f);
5231 } 5443 }
5232 mddev->bitmap_info.offset = 0; 5444 mddev->bitmap_info.offset = 0;
5233 5445
@@ -5436,37 +5648,31 @@ static int get_array_info(struct mddev *mddev, void __user *arg)
5436static int get_bitmap_file(struct mddev *mddev, void __user * arg) 5648static int get_bitmap_file(struct mddev *mddev, void __user * arg)
5437{ 5649{
5438 mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */ 5650 mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */
5439 char *ptr, *buf = NULL; 5651 char *ptr;
5440 int err = -ENOMEM; 5652 int err;
5441 5653
5442 file = kmalloc(sizeof(*file), GFP_NOIO); 5654 file = kmalloc(sizeof(*file), GFP_NOIO);
5443
5444 if (!file) 5655 if (!file)
5445 goto out; 5656 return -ENOMEM;
5446 5657
5658 err = 0;
5659 spin_lock(&mddev->lock);
5447 /* bitmap disabled, zero the first byte and copy out */ 5660 /* bitmap disabled, zero the first byte and copy out */
5448 if (!mddev->bitmap || !mddev->bitmap->storage.file) { 5661 if (!mddev->bitmap_info.file)
5449 file->pathname[0] = '\0'; 5662 file->pathname[0] = '\0';
5450 goto copy_out; 5663 else if ((ptr = d_path(&mddev->bitmap_info.file->f_path,
5451 } 5664 file->pathname, sizeof(file->pathname))),
5452 5665 IS_ERR(ptr))
5453 buf = kmalloc(sizeof(file->pathname), GFP_KERNEL); 5666 err = PTR_ERR(ptr);
5454 if (!buf) 5667 else
5455 goto out; 5668 memmove(file->pathname, ptr,
5456 5669 sizeof(file->pathname)-(ptr-file->pathname));
5457 ptr = d_path(&mddev->bitmap->storage.file->f_path, 5670 spin_unlock(&mddev->lock);
5458 buf, sizeof(file->pathname));
5459 if (IS_ERR(ptr))
5460 goto out;
5461
5462 strcpy(file->pathname, ptr);
5463 5671
5464copy_out: 5672 if (err == 0 &&
5465 err = 0; 5673 copy_to_user(arg, file, sizeof(*file)))
5466 if (copy_to_user(arg, file, sizeof(*file)))
5467 err = -EFAULT; 5674 err = -EFAULT;
5468out: 5675
5469 kfree(buf);
5470 kfree(file); 5676 kfree(file);
5471 return err; 5677 return err;
5472} 5678}
@@ -5789,22 +5995,24 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
5789 5995
5790 if (fd >= 0) { 5996 if (fd >= 0) {
5791 struct inode *inode; 5997 struct inode *inode;
5792 if (mddev->bitmap) 5998 struct file *f;
5999
6000 if (mddev->bitmap || mddev->bitmap_info.file)
5793 return -EEXIST; /* cannot add when bitmap is present */ 6001 return -EEXIST; /* cannot add when bitmap is present */
5794 mddev->bitmap_info.file = fget(fd); 6002 f = fget(fd);
5795 6003
5796 if (mddev->bitmap_info.file == NULL) { 6004 if (f == NULL) {
5797 printk(KERN_ERR "%s: error: failed to get bitmap file\n", 6005 printk(KERN_ERR "%s: error: failed to get bitmap file\n",
5798 mdname(mddev)); 6006 mdname(mddev));
5799 return -EBADF; 6007 return -EBADF;
5800 } 6008 }
5801 6009
5802 inode = mddev->bitmap_info.file->f_mapping->host; 6010 inode = f->f_mapping->host;
5803 if (!S_ISREG(inode->i_mode)) { 6011 if (!S_ISREG(inode->i_mode)) {
5804 printk(KERN_ERR "%s: error: bitmap file must be a regular file\n", 6012 printk(KERN_ERR "%s: error: bitmap file must be a regular file\n",
5805 mdname(mddev)); 6013 mdname(mddev));
5806 err = -EBADF; 6014 err = -EBADF;
5807 } else if (!(mddev->bitmap_info.file->f_mode & FMODE_WRITE)) { 6015 } else if (!(f->f_mode & FMODE_WRITE)) {
5808 printk(KERN_ERR "%s: error: bitmap file must open for write\n", 6016 printk(KERN_ERR "%s: error: bitmap file must open for write\n",
5809 mdname(mddev)); 6017 mdname(mddev));
5810 err = -EBADF; 6018 err = -EBADF;
@@ -5814,10 +6022,10 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
5814 err = -EBUSY; 6022 err = -EBUSY;
5815 } 6023 }
5816 if (err) { 6024 if (err) {
5817 fput(mddev->bitmap_info.file); 6025 fput(f);
5818 mddev->bitmap_info.file = NULL;
5819 return err; 6026 return err;
5820 } 6027 }
6028 mddev->bitmap_info.file = f;
5821 mddev->bitmap_info.offset = 0; /* file overrides offset */ 6029 mddev->bitmap_info.offset = 0; /* file overrides offset */
5822 } else if (mddev->bitmap == NULL) 6030 } else if (mddev->bitmap == NULL)
5823 return -ENOENT; /* cannot remove what isn't there */ 6031 return -ENOENT; /* cannot remove what isn't there */
@@ -5836,9 +6044,13 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
5836 mddev->pers->quiesce(mddev, 0); 6044 mddev->pers->quiesce(mddev, 0);
5837 } 6045 }
5838 if (fd < 0) { 6046 if (fd < 0) {
5839 if (mddev->bitmap_info.file) 6047 struct file *f = mddev->bitmap_info.file;
5840 fput(mddev->bitmap_info.file); 6048 if (f) {
5841 mddev->bitmap_info.file = NULL; 6049 spin_lock(&mddev->lock);
6050 mddev->bitmap_info.file = NULL;
6051 spin_unlock(&mddev->lock);
6052 fput(f);
6053 }
5842 } 6054 }
5843 6055
5844 return err; 6056 return err;
@@ -6251,6 +6463,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6251 case SET_DISK_FAULTY: 6463 case SET_DISK_FAULTY:
6252 err = set_disk_faulty(mddev, new_decode_dev(arg)); 6464 err = set_disk_faulty(mddev, new_decode_dev(arg));
6253 goto out; 6465 goto out;
6466
6467 case GET_BITMAP_FILE:
6468 err = get_bitmap_file(mddev, argp);
6469 goto out;
6470
6254 } 6471 }
6255 6472
6256 if (cmd == ADD_NEW_DISK) 6473 if (cmd == ADD_NEW_DISK)
@@ -6342,10 +6559,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
6342 * Commands even a read-only array can execute: 6559 * Commands even a read-only array can execute:
6343 */ 6560 */
6344 switch (cmd) { 6561 switch (cmd) {
6345 case GET_BITMAP_FILE:
6346 err = get_bitmap_file(mddev, argp);
6347 goto unlock;
6348
6349 case RESTART_ARRAY_RW: 6562 case RESTART_ARRAY_RW:
6350 err = restart_array(mddev); 6563 err = restart_array(mddev);
6351 goto unlock; 6564 goto unlock;
@@ -6873,9 +7086,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
6873 return 0; 7086 return 0;
6874 } 7087 }
6875 7088
6876 if (mddev_lock(mddev) < 0) 7089 spin_lock(&mddev->lock);
6877 return -EINTR;
6878
6879 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) { 7090 if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
6880 seq_printf(seq, "%s : %sactive", mdname(mddev), 7091 seq_printf(seq, "%s : %sactive", mdname(mddev),
6881 mddev->pers ? "" : "in"); 7092 mddev->pers ? "" : "in");
@@ -6888,7 +7099,8 @@ static int md_seq_show(struct seq_file *seq, void *v)
6888 } 7099 }
6889 7100
6890 sectors = 0; 7101 sectors = 0;
6891 rdev_for_each(rdev, mddev) { 7102 rcu_read_lock();
7103 rdev_for_each_rcu(rdev, mddev) {
6892 char b[BDEVNAME_SIZE]; 7104 char b[BDEVNAME_SIZE];
6893 seq_printf(seq, " %s[%d]", 7105 seq_printf(seq, " %s[%d]",
6894 bdevname(rdev->bdev,b), rdev->desc_nr); 7106 bdevname(rdev->bdev,b), rdev->desc_nr);
@@ -6904,6 +7116,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
6904 seq_printf(seq, "(R)"); 7116 seq_printf(seq, "(R)");
6905 sectors += rdev->sectors; 7117 sectors += rdev->sectors;
6906 } 7118 }
7119 rcu_read_unlock();
6907 7120
6908 if (!list_empty(&mddev->disks)) { 7121 if (!list_empty(&mddev->disks)) {
6909 if (mddev->pers) 7122 if (mddev->pers)
@@ -6946,7 +7159,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
6946 7159
6947 seq_printf(seq, "\n"); 7160 seq_printf(seq, "\n");
6948 } 7161 }
6949 mddev_unlock(mddev); 7162 spin_unlock(&mddev->lock);
6950 7163
6951 return 0; 7164 return 0;
6952} 7165}
@@ -7102,7 +7315,7 @@ void md_write_start(struct mddev *mddev, struct bio *bi)
7102 if (mddev->safemode == 1) 7315 if (mddev->safemode == 1)
7103 mddev->safemode = 0; 7316 mddev->safemode = 0;
7104 if (mddev->in_sync) { 7317 if (mddev->in_sync) {
7105 spin_lock_irq(&mddev->write_lock); 7318 spin_lock(&mddev->lock);
7106 if (mddev->in_sync) { 7319 if (mddev->in_sync) {
7107 mddev->in_sync = 0; 7320 mddev->in_sync = 0;
7108 set_bit(MD_CHANGE_CLEAN, &mddev->flags); 7321 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
@@ -7110,7 +7323,7 @@ void md_write_start(struct mddev *mddev, struct bio *bi)
7110 md_wakeup_thread(mddev->thread); 7323 md_wakeup_thread(mddev->thread);
7111 did_change = 1; 7324 did_change = 1;
7112 } 7325 }
7113 spin_unlock_irq(&mddev->write_lock); 7326 spin_unlock(&mddev->lock);
7114 } 7327 }
7115 if (did_change) 7328 if (did_change)
7116 sysfs_notify_dirent_safe(mddev->sysfs_state); 7329 sysfs_notify_dirent_safe(mddev->sysfs_state);
@@ -7148,7 +7361,7 @@ int md_allow_write(struct mddev *mddev)
7148 if (!mddev->pers->sync_request) 7361 if (!mddev->pers->sync_request)
7149 return 0; 7362 return 0;
7150 7363
7151 spin_lock_irq(&mddev->write_lock); 7364 spin_lock(&mddev->lock);
7152 if (mddev->in_sync) { 7365 if (mddev->in_sync) {
7153 mddev->in_sync = 0; 7366 mddev->in_sync = 0;
7154 set_bit(MD_CHANGE_CLEAN, &mddev->flags); 7367 set_bit(MD_CHANGE_CLEAN, &mddev->flags);
@@ -7156,11 +7369,11 @@ int md_allow_write(struct mddev *mddev)
7156 if (mddev->safemode_delay && 7369 if (mddev->safemode_delay &&
7157 mddev->safemode == 0) 7370 mddev->safemode == 0)
7158 mddev->safemode = 1; 7371 mddev->safemode = 1;
7159 spin_unlock_irq(&mddev->write_lock); 7372 spin_unlock(&mddev->lock);
7160 md_update_sb(mddev, 0); 7373 md_update_sb(mddev, 0);
7161 sysfs_notify_dirent_safe(mddev->sysfs_state); 7374 sysfs_notify_dirent_safe(mddev->sysfs_state);
7162 } else 7375 } else
7163 spin_unlock_irq(&mddev->write_lock); 7376 spin_unlock(&mddev->lock);
7164 7377
7165 if (test_bit(MD_CHANGE_PENDING, &mddev->flags)) 7378 if (test_bit(MD_CHANGE_PENDING, &mddev->flags))
7166 return -EAGAIN; 7379 return -EAGAIN;
@@ -7513,6 +7726,7 @@ void md_do_sync(struct md_thread *thread)
7513 skip: 7726 skip:
7514 set_bit(MD_CHANGE_DEVS, &mddev->flags); 7727 set_bit(MD_CHANGE_DEVS, &mddev->flags);
7515 7728
7729 spin_lock(&mddev->lock);
7516 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { 7730 if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
7517 /* We completed so min/max setting can be forgotten if used. */ 7731 /* We completed so min/max setting can be forgotten if used. */
7518 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) 7732 if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
@@ -7521,6 +7735,8 @@ void md_do_sync(struct md_thread *thread)
7521 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) 7735 } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
7522 mddev->resync_min = mddev->curr_resync_completed; 7736 mddev->resync_min = mddev->curr_resync_completed;
7523 mddev->curr_resync = 0; 7737 mddev->curr_resync = 0;
7738 spin_unlock(&mddev->lock);
7739
7524 wake_up(&resync_wait); 7740 wake_up(&resync_wait);
7525 set_bit(MD_RECOVERY_DONE, &mddev->recovery); 7741 set_bit(MD_RECOVERY_DONE, &mddev->recovery);
7526 md_wakeup_thread(mddev->thread); 7742 md_wakeup_thread(mddev->thread);
@@ -7688,7 +7904,7 @@ void md_check_recovery(struct mddev *mddev)
7688 7904
7689 if (!mddev->external) { 7905 if (!mddev->external) {
7690 int did_change = 0; 7906 int did_change = 0;
7691 spin_lock_irq(&mddev->write_lock); 7907 spin_lock(&mddev->lock);
7692 if (mddev->safemode && 7908 if (mddev->safemode &&
7693 !atomic_read(&mddev->writes_pending) && 7909 !atomic_read(&mddev->writes_pending) &&
7694 !mddev->in_sync && 7910 !mddev->in_sync &&
@@ -7699,7 +7915,7 @@ void md_check_recovery(struct mddev *mddev)
7699 } 7915 }
7700 if (mddev->safemode == 1) 7916 if (mddev->safemode == 1)
7701 mddev->safemode = 0; 7917 mddev->safemode = 0;
7702 spin_unlock_irq(&mddev->write_lock); 7918 spin_unlock(&mddev->lock);
7703 if (did_change) 7919 if (did_change)
7704 sysfs_notify_dirent_safe(mddev->sysfs_state); 7920 sysfs_notify_dirent_safe(mddev->sysfs_state);
7705 } 7921 }
@@ -7721,7 +7937,9 @@ void md_check_recovery(struct mddev *mddev)
7721 * any transients in the value of "sync_action". 7937 * any transients in the value of "sync_action".
7722 */ 7938 */
7723 mddev->curr_resync_completed = 0; 7939 mddev->curr_resync_completed = 0;
7940 spin_lock(&mddev->lock);
7724 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); 7941 set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
7942 spin_unlock(&mddev->lock);
7725 /* Clear some bits that don't mean anything, but 7943 /* Clear some bits that don't mean anything, but
7726 * might be left set 7944 * might be left set
7727 */ 7945 */