aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-08-13 13:59:29 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-08-13 13:59:29 -0400
commitd58d2d1adec90e7bc0c56e09b3ac0e9a5a471e68 (patch)
tree17044053cfbde45da9bf8654ab6272b92773181c
parent7334219c44826ae0ebe6f07555c6b97f978ce266 (diff)
parent4d484a4a7a5126410eed5f8dd329a33f6eeed068 (diff)
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: md: allow upper limit for resync/reshape to be set when array is read-only md/raid5: Properly remove excess drives after shrinking a raid5/6 md/raid5: make sure a reshape restarts at the correct address. md/raid5: allow new reshape modes to be restarted in the middle. md: never advance 'events' counter by more than 1. Remove deadlock potential in md_open
-rw-r--r--drivers/md/md.c32
-rw-r--r--drivers/md/md.h10
-rw-r--r--drivers/md/raid5.c34
3 files changed, 56 insertions, 20 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 5b98bea4ff9b..103f2d33fa89 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
359 else 359 else
360 new->md_minor = MINOR(unit) >> MdpMinorShift; 360 new->md_minor = MINOR(unit) >> MdpMinorShift;
361 361
362 mutex_init(&new->open_mutex);
362 mutex_init(&new->reconfig_mutex); 363 mutex_init(&new->reconfig_mutex);
363 INIT_LIST_HEAD(&new->disks); 364 INIT_LIST_HEAD(&new->disks);
364 INIT_LIST_HEAD(&new->all_mddevs); 365 INIT_LIST_HEAD(&new->all_mddevs);
@@ -1974,17 +1975,14 @@ repeat:
1974 /* otherwise we have to go forward and ... */ 1975 /* otherwise we have to go forward and ... */
1975 mddev->events ++; 1976 mddev->events ++;
1976 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ 1977 if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
1977 /* .. if the array isn't clean, insist on an odd 'events' */ 1978 /* .. if the array isn't clean, an 'even' event must also go
1978 if ((mddev->events&1)==0) { 1979 * to spares. */
1979 mddev->events++; 1980 if ((mddev->events&1)==0)
1980 nospares = 0; 1981 nospares = 0;
1981 }
1982 } else { 1982 } else {
1983 /* otherwise insist on an even 'events' (for clean states) */ 1983 /* otherwise an 'odd' event must go to spares */
1984 if ((mddev->events&1)) { 1984 if ((mddev->events&1))
1985 mddev->events++;
1986 nospares = 0; 1985 nospares = 0;
1987 }
1988 } 1986 }
1989 } 1987 }
1990 1988
@@ -3601,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
3601 if (max < mddev->resync_min) 3599 if (max < mddev->resync_min)
3602 return -EINVAL; 3600 return -EINVAL;
3603 if (max < mddev->resync_max && 3601 if (max < mddev->resync_max &&
3602 mddev->ro == 0 &&
3604 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) 3603 test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
3605 return -EBUSY; 3604 return -EBUSY;
3606 3605
@@ -4304,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4304 struct gendisk *disk = mddev->gendisk; 4303 struct gendisk *disk = mddev->gendisk;
4305 mdk_rdev_t *rdev; 4304 mdk_rdev_t *rdev;
4306 4305
4306 mutex_lock(&mddev->open_mutex);
4307 if (atomic_read(&mddev->openers) > is_open) { 4307 if (atomic_read(&mddev->openers) > is_open) {
4308 printk("md: %s still in use.\n",mdname(mddev)); 4308 printk("md: %s still in use.\n",mdname(mddev));
4309 return -EBUSY; 4309 err = -EBUSY;
4310 } 4310 } else if (mddev->pers) {
4311
4312 if (mddev->pers) {
4313 4311
4314 if (mddev->sync_thread) { 4312 if (mddev->sync_thread) {
4315 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4313 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4367,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4367 set_disk_ro(disk, 1); 4365 set_disk_ro(disk, 1);
4368 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4366 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4369 } 4367 }
4370 4368out:
4369 mutex_unlock(&mddev->open_mutex);
4370 if (err)
4371 return err;
4371 /* 4372 /*
4372 * Free resources if final stop 4373 * Free resources if final stop
4373 */ 4374 */
@@ -4433,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
4433 blk_integrity_unregister(disk); 4434 blk_integrity_unregister(disk);
4434 md_new_event(mddev); 4435 md_new_event(mddev);
4435 sysfs_notify_dirent(mddev->sysfs_state); 4436 sysfs_notify_dirent(mddev->sysfs_state);
4436out:
4437 return err; 4437 return err;
4438} 4438}
4439 4439
@@ -5518,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
5518 } 5518 }
5519 BUG_ON(mddev != bdev->bd_disk->private_data); 5519 BUG_ON(mddev != bdev->bd_disk->private_data);
5520 5520
5521 if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) 5521 if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
5522 goto out; 5522 goto out;
5523 5523
5524 err = 0; 5524 err = 0;
5525 atomic_inc(&mddev->openers); 5525 atomic_inc(&mddev->openers);
5526 mddev_unlock(mddev); 5526 mutex_unlock(&mddev->open_mutex);
5527 5527
5528 check_disk_change(bdev); 5528 check_disk_change(bdev);
5529 out: 5529 out:
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 78f03168baf9..f8fc188bc762 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -223,6 +223,16 @@ struct mddev_s
223 * so we don't loop trying */ 223 * so we don't loop trying */
224 224
225 int in_sync; /* know to not need resync */ 225 int in_sync; /* know to not need resync */
226 /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
227 * that we are never stopping an array while it is open.
228 * 'reconfig_mutex' protects all other reconfiguration.
229 * These locks are separate due to conflicting interactions
230 * with bdev->bd_mutex.
231 * Lock ordering is:
232 * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
233 * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
234 */
235 struct mutex open_mutex;
226 struct mutex reconfig_mutex; 236 struct mutex reconfig_mutex;
227 atomic_t active; /* general refcount */ 237 atomic_t active; /* general refcount */
228 atomic_t openers; /* number of active opens */ 238 atomic_t openers; /* number of active opens */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2b521ee67dfa..b8a2c5dc67ba 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
3785 conf->reshape_progress < raid5_size(mddev, 0, 0)) { 3785 conf->reshape_progress < raid5_size(mddev, 0, 0)) {
3786 sector_nr = raid5_size(mddev, 0, 0) 3786 sector_nr = raid5_size(mddev, 0, 0)
3787 - conf->reshape_progress; 3787 - conf->reshape_progress;
3788 } else if (mddev->delta_disks > 0 && 3788 } else if (mddev->delta_disks >= 0 &&
3789 conf->reshape_progress > 0) 3789 conf->reshape_progress > 0)
3790 sector_nr = conf->reshape_progress; 3790 sector_nr = conf->reshape_progress;
3791 sector_div(sector_nr, new_data_disks); 3791 sector_div(sector_nr, new_data_disks);
@@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev)
4509 (old_disks-max_degraded)); 4509 (old_disks-max_degraded));
4510 /* here_old is the first stripe that we might need to read 4510 /* here_old is the first stripe that we might need to read
4511 * from */ 4511 * from */
4512 if (here_new >= here_old) { 4512 if (mddev->delta_disks == 0) {
4513 /* We cannot be sure it is safe to start an in-place
4514 * reshape. It is only safe if user-space if monitoring
4515 * and taking constant backups.
4516 * mdadm always starts a situation like this in
4517 * readonly mode so it can take control before
4518 * allowing any writes. So just check for that.
4519 */
4520 if ((here_new * mddev->new_chunk_sectors !=
4521 here_old * mddev->chunk_sectors) ||
4522 mddev->ro == 0) {
4523 printk(KERN_ERR "raid5: in-place reshape must be started"
4524 " in read-only mode - aborting\n");
4525 return -EINVAL;
4526 }
4527 } else if (mddev->delta_disks < 0
4528 ? (here_new * mddev->new_chunk_sectors <=
4529 here_old * mddev->chunk_sectors)
4530 : (here_new * mddev->new_chunk_sectors >=
4531 here_old * mddev->chunk_sectors)) {
4513 /* Reading from the same stripe as writing to - bad */ 4532 /* Reading from the same stripe as writing to - bad */
4514 printk(KERN_ERR "raid5: reshape_position too early for " 4533 printk(KERN_ERR "raid5: reshape_position too early for "
4515 "auto-recovery - aborting.\n"); 4534 "auto-recovery - aborting.\n");
@@ -5078,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev)
5078 mddev->degraded--; 5097 mddev->degraded--;
5079 for (d = conf->raid_disks ; 5098 for (d = conf->raid_disks ;
5080 d < conf->raid_disks - mddev->delta_disks; 5099 d < conf->raid_disks - mddev->delta_disks;
5081 d++) 5100 d++) {
5082 raid5_remove_disk(mddev, d); 5101 mdk_rdev_t *rdev = conf->disks[d].rdev;
5102 if (rdev && raid5_remove_disk(mddev, d) == 0) {
5103 char nm[20];
5104 sprintf(nm, "rd%d", rdev->raid_disk);
5105 sysfs_remove_link(&mddev->kobj, nm);
5106 rdev->raid_disk = -1;
5107 }
5108 }
5083 } 5109 }
5084 mddev->layout = conf->algorithm; 5110 mddev->layout = conf->algorithm;
5085 mddev->chunk_sectors = conf->chunk_sectors; 5111 mddev->chunk_sectors = conf->chunk_sectors;