diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-08-13 13:59:29 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-08-13 13:59:29 -0400 |
commit | d58d2d1adec90e7bc0c56e09b3ac0e9a5a471e68 (patch) | |
tree | 17044053cfbde45da9bf8654ab6272b92773181c | |
parent | 7334219c44826ae0ebe6f07555c6b97f978ce266 (diff) | |
parent | 4d484a4a7a5126410eed5f8dd329a33f6eeed068 (diff) |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md:
md: allow upper limit for resync/reshape to be set when array is read-only
md/raid5: Properly remove excess drives after shrinking a raid5/6
md/raid5: make sure a reshape restarts at the correct address.
md/raid5: allow new reshape modes to be restarted in the middle.
md: never advance 'events' counter by more than 1.
Remove deadlock potential in md_open
-rw-r--r-- | drivers/md/md.c | 32 | ||||
-rw-r--r-- | drivers/md/md.h | 10 | ||||
-rw-r--r-- | drivers/md/raid5.c | 34 |
3 files changed, 56 insertions, 20 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 5b98bea4ff9b..103f2d33fa89 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit) | |||
359 | else | 359 | else |
360 | new->md_minor = MINOR(unit) >> MdpMinorShift; | 360 | new->md_minor = MINOR(unit) >> MdpMinorShift; |
361 | 361 | ||
362 | mutex_init(&new->open_mutex); | ||
362 | mutex_init(&new->reconfig_mutex); | 363 | mutex_init(&new->reconfig_mutex); |
363 | INIT_LIST_HEAD(&new->disks); | 364 | INIT_LIST_HEAD(&new->disks); |
364 | INIT_LIST_HEAD(&new->all_mddevs); | 365 | INIT_LIST_HEAD(&new->all_mddevs); |
@@ -1974,17 +1975,14 @@ repeat: | |||
1974 | /* otherwise we have to go forward and ... */ | 1975 | /* otherwise we have to go forward and ... */ |
1975 | mddev->events ++; | 1976 | mddev->events ++; |
1976 | if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ | 1977 | if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */ |
1977 | /* .. if the array isn't clean, insist on an odd 'events' */ | 1978 | /* .. if the array isn't clean, an 'even' event must also go |
1978 | if ((mddev->events&1)==0) { | 1979 | * to spares. */ |
1979 | mddev->events++; | 1980 | if ((mddev->events&1)==0) |
1980 | nospares = 0; | 1981 | nospares = 0; |
1981 | } | ||
1982 | } else { | 1982 | } else { |
1983 | /* otherwise insist on an even 'events' (for clean states) */ | 1983 | /* otherwise an 'odd' event must go to spares */ |
1984 | if ((mddev->events&1)) { | 1984 | if ((mddev->events&1)) |
1985 | mddev->events++; | ||
1986 | nospares = 0; | 1985 | nospares = 0; |
1987 | } | ||
1988 | } | 1986 | } |
1989 | } | 1987 | } |
1990 | 1988 | ||
@@ -3601,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len) | |||
3601 | if (max < mddev->resync_min) | 3599 | if (max < mddev->resync_min) |
3602 | return -EINVAL; | 3600 | return -EINVAL; |
3603 | if (max < mddev->resync_max && | 3601 | if (max < mddev->resync_max && |
3602 | mddev->ro == 0 && | ||
3604 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) | 3603 | test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) |
3605 | return -EBUSY; | 3604 | return -EBUSY; |
3606 | 3605 | ||
@@ -4304,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4304 | struct gendisk *disk = mddev->gendisk; | 4303 | struct gendisk *disk = mddev->gendisk; |
4305 | mdk_rdev_t *rdev; | 4304 | mdk_rdev_t *rdev; |
4306 | 4305 | ||
4306 | mutex_lock(&mddev->open_mutex); | ||
4307 | if (atomic_read(&mddev->openers) > is_open) { | 4307 | if (atomic_read(&mddev->openers) > is_open) { |
4308 | printk("md: %s still in use.\n",mdname(mddev)); | 4308 | printk("md: %s still in use.\n",mdname(mddev)); |
4309 | return -EBUSY; | 4309 | err = -EBUSY; |
4310 | } | 4310 | } else if (mddev->pers) { |
4311 | |||
4312 | if (mddev->pers) { | ||
4313 | 4311 | ||
4314 | if (mddev->sync_thread) { | 4312 | if (mddev->sync_thread) { |
4315 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4313 | set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
@@ -4367,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4367 | set_disk_ro(disk, 1); | 4365 | set_disk_ro(disk, 1); |
4368 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); | 4366 | clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); |
4369 | } | 4367 | } |
4370 | 4368 | out: | |
4369 | mutex_unlock(&mddev->open_mutex); | ||
4370 | if (err) | ||
4371 | return err; | ||
4371 | /* | 4372 | /* |
4372 | * Free resources if final stop | 4373 | * Free resources if final stop |
4373 | */ | 4374 | */ |
@@ -4433,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) | |||
4433 | blk_integrity_unregister(disk); | 4434 | blk_integrity_unregister(disk); |
4434 | md_new_event(mddev); | 4435 | md_new_event(mddev); |
4435 | sysfs_notify_dirent(mddev->sysfs_state); | 4436 | sysfs_notify_dirent(mddev->sysfs_state); |
4436 | out: | ||
4437 | return err; | 4437 | return err; |
4438 | } | 4438 | } |
4439 | 4439 | ||
@@ -5518,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode) | |||
5518 | } | 5518 | } |
5519 | BUG_ON(mddev != bdev->bd_disk->private_data); | 5519 | BUG_ON(mddev != bdev->bd_disk->private_data); |
5520 | 5520 | ||
5521 | if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1))) | 5521 | if ((err = mutex_lock_interruptible(&mddev->open_mutex))) |
5522 | goto out; | 5522 | goto out; |
5523 | 5523 | ||
5524 | err = 0; | 5524 | err = 0; |
5525 | atomic_inc(&mddev->openers); | 5525 | atomic_inc(&mddev->openers); |
5526 | mddev_unlock(mddev); | 5526 | mutex_unlock(&mddev->open_mutex); |
5527 | 5527 | ||
5528 | check_disk_change(bdev); | 5528 | check_disk_change(bdev); |
5529 | out: | 5529 | out: |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 78f03168baf9..f8fc188bc762 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -223,6 +223,16 @@ struct mddev_s | |||
223 | * so we don't loop trying */ | 223 | * so we don't loop trying */ |
224 | 224 | ||
225 | int in_sync; /* know to not need resync */ | 225 | int in_sync; /* know to not need resync */ |
226 | /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so | ||
227 | * that we are never stopping an array while it is open. | ||
228 | * 'reconfig_mutex' protects all other reconfiguration. | ||
229 | * These locks are separate due to conflicting interactions | ||
230 | * with bdev->bd_mutex. | ||
231 | * Lock ordering is: | ||
232 | * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk | ||
233 | * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open | ||
234 | */ | ||
235 | struct mutex open_mutex; | ||
226 | struct mutex reconfig_mutex; | 236 | struct mutex reconfig_mutex; |
227 | atomic_t active; /* general refcount */ | 237 | atomic_t active; /* general refcount */ |
228 | atomic_t openers; /* number of active opens */ | 238 | atomic_t openers; /* number of active opens */ |
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2b521ee67dfa..b8a2c5dc67ba 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3785 | conf->reshape_progress < raid5_size(mddev, 0, 0)) { | 3785 | conf->reshape_progress < raid5_size(mddev, 0, 0)) { |
3786 | sector_nr = raid5_size(mddev, 0, 0) | 3786 | sector_nr = raid5_size(mddev, 0, 0) |
3787 | - conf->reshape_progress; | 3787 | - conf->reshape_progress; |
3788 | } else if (mddev->delta_disks > 0 && | 3788 | } else if (mddev->delta_disks >= 0 && |
3789 | conf->reshape_progress > 0) | 3789 | conf->reshape_progress > 0) |
3790 | sector_nr = conf->reshape_progress; | 3790 | sector_nr = conf->reshape_progress; |
3791 | sector_div(sector_nr, new_data_disks); | 3791 | sector_div(sector_nr, new_data_disks); |
@@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev) | |||
4509 | (old_disks-max_degraded)); | 4509 | (old_disks-max_degraded)); |
4510 | /* here_old is the first stripe that we might need to read | 4510 | /* here_old is the first stripe that we might need to read |
4511 | * from */ | 4511 | * from */ |
4512 | if (here_new >= here_old) { | 4512 | if (mddev->delta_disks == 0) { |
4513 | /* We cannot be sure it is safe to start an in-place | ||
4514 | * reshape. It is only safe if user-space if monitoring | ||
4515 | * and taking constant backups. | ||
4516 | * mdadm always starts a situation like this in | ||
4517 | * readonly mode so it can take control before | ||
4518 | * allowing any writes. So just check for that. | ||
4519 | */ | ||
4520 | if ((here_new * mddev->new_chunk_sectors != | ||
4521 | here_old * mddev->chunk_sectors) || | ||
4522 | mddev->ro == 0) { | ||
4523 | printk(KERN_ERR "raid5: in-place reshape must be started" | ||
4524 | " in read-only mode - aborting\n"); | ||
4525 | return -EINVAL; | ||
4526 | } | ||
4527 | } else if (mddev->delta_disks < 0 | ||
4528 | ? (here_new * mddev->new_chunk_sectors <= | ||
4529 | here_old * mddev->chunk_sectors) | ||
4530 | : (here_new * mddev->new_chunk_sectors >= | ||
4531 | here_old * mddev->chunk_sectors)) { | ||
4513 | /* Reading from the same stripe as writing to - bad */ | 4532 | /* Reading from the same stripe as writing to - bad */ |
4514 | printk(KERN_ERR "raid5: reshape_position too early for " | 4533 | printk(KERN_ERR "raid5: reshape_position too early for " |
4515 | "auto-recovery - aborting.\n"); | 4534 | "auto-recovery - aborting.\n"); |
@@ -5078,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev) | |||
5078 | mddev->degraded--; | 5097 | mddev->degraded--; |
5079 | for (d = conf->raid_disks ; | 5098 | for (d = conf->raid_disks ; |
5080 | d < conf->raid_disks - mddev->delta_disks; | 5099 | d < conf->raid_disks - mddev->delta_disks; |
5081 | d++) | 5100 | d++) { |
5082 | raid5_remove_disk(mddev, d); | 5101 | mdk_rdev_t *rdev = conf->disks[d].rdev; |
5102 | if (rdev && raid5_remove_disk(mddev, d) == 0) { | ||
5103 | char nm[20]; | ||
5104 | sprintf(nm, "rd%d", rdev->raid_disk); | ||
5105 | sysfs_remove_link(&mddev->kobj, nm); | ||
5106 | rdev->raid_disk = -1; | ||
5107 | } | ||
5108 | } | ||
5083 | } | 5109 | } |
5084 | mddev->layout = conf->algorithm; | 5110 | mddev->layout = conf->algorithm; |
5085 | mddev->chunk_sectors = conf->chunk_sectors; | 5111 | mddev->chunk_sectors = conf->chunk_sectors; |