diff options
-rw-r--r-- | drivers/md/raid5.c | 267 | ||||
-rw-r--r-- | drivers/md/raid5.h | 5 |
2 files changed, 157 insertions, 115 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index edbc80c4d346..d019a85547b4 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -4164,95 +4164,49 @@ static struct attribute_group raid5_attrs_group = { | |||
4164 | .attrs = raid5_attrs, | 4164 | .attrs = raid5_attrs, |
4165 | }; | 4165 | }; |
4166 | 4166 | ||
4167 | static int run(mddev_t *mddev) | 4167 | static raid5_conf_t *setup_conf(mddev_t *mddev) |
4168 | { | 4168 | { |
4169 | raid5_conf_t *conf; | 4169 | raid5_conf_t *conf; |
4170 | int raid_disk, memory; | 4170 | int raid_disk, memory; |
4171 | mdk_rdev_t *rdev; | 4171 | mdk_rdev_t *rdev; |
4172 | struct disk_info *disk; | 4172 | struct disk_info *disk; |
4173 | int working_disks = 0; | ||
4174 | 4173 | ||
4175 | if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) { | 4174 | if (mddev->new_level != 5 |
4175 | && mddev->new_level != 4 | ||
4176 | && mddev->new_level != 6) { | ||
4176 | printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n", | 4177 | printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n", |
4177 | mdname(mddev), mddev->level); | 4178 | mdname(mddev), mddev->new_level); |
4178 | return -EIO; | 4179 | return ERR_PTR(-EIO); |
4179 | } | 4180 | } |
4180 | if ((mddev->level == 5 && !algorithm_valid_raid5(mddev->layout)) || | 4181 | if ((mddev->new_level == 5 |
4181 | (mddev->level == 6 && !algorithm_valid_raid6(mddev->layout))) { | 4182 | && !algorithm_valid_raid5(mddev->new_layout)) || |
4183 | (mddev->new_level == 6 | ||
4184 | && !algorithm_valid_raid6(mddev->new_layout))) { | ||
4182 | printk(KERN_ERR "raid5: %s: layout %d not supported\n", | 4185 | printk(KERN_ERR "raid5: %s: layout %d not supported\n", |
4183 | mdname(mddev), mddev->layout); | 4186 | mdname(mddev), mddev->new_layout); |
4184 | return -EIO; | 4187 | return ERR_PTR(-EIO); |
4185 | } | 4188 | } |
4186 | 4189 | if (mddev->new_level == 6 && mddev->raid_disks < 4) { | |
4187 | if (mddev->chunk_size < PAGE_SIZE) { | 4190 | printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n", |
4188 | printk(KERN_ERR "md/raid5: chunk_size must be at least " | 4191 | mdname(mddev), mddev->raid_disks); |
4189 | "PAGE_SIZE but %d < %ld\n", | 4192 | return ERR_PTR(-EINVAL); |
4190 | mddev->chunk_size, PAGE_SIZE); | ||
4191 | return -EINVAL; | ||
4192 | } | 4193 | } |
4193 | 4194 | ||
4194 | if (mddev->reshape_position != MaxSector) { | 4195 | if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) { |
4195 | /* Check that we can continue the reshape. | 4196 | printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", |
4196 | * Currently only disks can change, it must | 4197 | mddev->new_chunk, mdname(mddev)); |
4197 | * increase, and we must be past the point where | 4198 | return ERR_PTR(-EINVAL); |
4198 | * a stripe over-writes itself | ||
4199 | */ | ||
4200 | sector_t here_new, here_old; | ||
4201 | int old_disks; | ||
4202 | int max_degraded = (mddev->level == 5 ? 1 : 2); | ||
4203 | |||
4204 | if (mddev->new_level != mddev->level || | ||
4205 | mddev->new_layout != mddev->layout || | ||
4206 | mddev->new_chunk != mddev->chunk_size) { | ||
4207 | printk(KERN_ERR "raid5: %s: unsupported reshape " | ||
4208 | "required - aborting.\n", | ||
4209 | mdname(mddev)); | ||
4210 | return -EINVAL; | ||
4211 | } | ||
4212 | if (mddev->delta_disks <= 0) { | ||
4213 | printk(KERN_ERR "raid5: %s: unsupported reshape " | ||
4214 | "(reduce disks) required - aborting.\n", | ||
4215 | mdname(mddev)); | ||
4216 | return -EINVAL; | ||
4217 | } | ||
4218 | old_disks = mddev->raid_disks - mddev->delta_disks; | ||
4219 | /* reshape_position must be on a new-stripe boundary, and one | ||
4220 | * further up in new geometry must map after here in old | ||
4221 | * geometry. | ||
4222 | */ | ||
4223 | here_new = mddev->reshape_position; | ||
4224 | if (sector_div(here_new, (mddev->chunk_size>>9)* | ||
4225 | (mddev->raid_disks - max_degraded))) { | ||
4226 | printk(KERN_ERR "raid5: reshape_position not " | ||
4227 | "on a stripe boundary\n"); | ||
4228 | return -EINVAL; | ||
4229 | } | ||
4230 | /* here_new is the stripe we will write to */ | ||
4231 | here_old = mddev->reshape_position; | ||
4232 | sector_div(here_old, (mddev->chunk_size>>9)* | ||
4233 | (old_disks-max_degraded)); | ||
4234 | /* here_old is the first stripe that we might need to read | ||
4235 | * from */ | ||
4236 | if (here_new >= here_old) { | ||
4237 | /* Reading from the same stripe as writing to - bad */ | ||
4238 | printk(KERN_ERR "raid5: reshape_position too early for " | ||
4239 | "auto-recovery - aborting.\n"); | ||
4240 | return -EINVAL; | ||
4241 | } | ||
4242 | printk(KERN_INFO "raid5: reshape will continue\n"); | ||
4243 | /* OK, we should be able to continue; */ | ||
4244 | } | 4199 | } |
4245 | 4200 | ||
4246 | 4201 | conf = kzalloc(sizeof(raid5_conf_t), GFP_KERNEL); | |
4247 | mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL); | 4202 | if (conf == NULL) |
4248 | if ((conf = mddev->private) == NULL) | ||
4249 | goto abort; | 4203 | goto abort; |
4250 | if (mddev->reshape_position == MaxSector) { | 4204 | |
4251 | conf->previous_raid_disks = conf->raid_disks = mddev->raid_disks; | 4205 | conf->raid_disks = mddev->raid_disks; |
4252 | } else { | 4206 | if (mddev->reshape_position == MaxSector) |
4253 | conf->raid_disks = mddev->raid_disks; | 4207 | conf->previous_raid_disks = mddev->raid_disks; |
4208 | else | ||
4254 | conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; | 4209 | conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; |
4255 | } | ||
4256 | 4210 | ||
4257 | conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info), | 4211 | conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info), |
4258 | GFP_KERNEL); | 4212 | GFP_KERNEL); |
@@ -4264,13 +4218,12 @@ static int run(mddev_t *mddev) | |||
4264 | if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) | 4218 | if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) |
4265 | goto abort; | 4219 | goto abort; |
4266 | 4220 | ||
4267 | if (mddev->level == 6) { | 4221 | if (mddev->new_level == 6) { |
4268 | conf->spare_page = alloc_page(GFP_KERNEL); | 4222 | conf->spare_page = alloc_page(GFP_KERNEL); |
4269 | if (!conf->spare_page) | 4223 | if (!conf->spare_page) |
4270 | goto abort; | 4224 | goto abort; |
4271 | } | 4225 | } |
4272 | spin_lock_init(&conf->device_lock); | 4226 | spin_lock_init(&conf->device_lock); |
4273 | mddev->queue->queue_lock = &conf->device_lock; | ||
4274 | init_waitqueue_head(&conf->wait_for_stripe); | 4227 | init_waitqueue_head(&conf->wait_for_stripe); |
4275 | init_waitqueue_head(&conf->wait_for_overlap); | 4228 | init_waitqueue_head(&conf->wait_for_overlap); |
4276 | INIT_LIST_HEAD(&conf->handle_list); | 4229 | INIT_LIST_HEAD(&conf->handle_list); |
@@ -4299,41 +4252,136 @@ static int run(mddev_t *mddev) | |||
4299 | printk(KERN_INFO "raid5: device %s operational as raid" | 4252 | printk(KERN_INFO "raid5: device %s operational as raid" |
4300 | " disk %d\n", bdevname(rdev->bdev,b), | 4253 | " disk %d\n", bdevname(rdev->bdev,b), |
4301 | raid_disk); | 4254 | raid_disk); |
4302 | working_disks++; | ||
4303 | } else | 4255 | } else |
4304 | /* Cannot rely on bitmap to complete recovery */ | 4256 | /* Cannot rely on bitmap to complete recovery */ |
4305 | conf->fullsync = 1; | 4257 | conf->fullsync = 1; |
4306 | } | 4258 | } |
4307 | 4259 | ||
4308 | /* | 4260 | conf->chunk_size = mddev->new_chunk; |
4309 | * 0 for a fully functional array, 1 or 2 for a degraded array. | 4261 | conf->level = mddev->new_level; |
4310 | */ | ||
4311 | mddev->degraded = conf->raid_disks - working_disks; | ||
4312 | conf->mddev = mddev; | ||
4313 | conf->chunk_size = mddev->chunk_size; | ||
4314 | conf->level = mddev->level; | ||
4315 | if (conf->level == 6) | 4262 | if (conf->level == 6) |
4316 | conf->max_degraded = 2; | 4263 | conf->max_degraded = 2; |
4317 | else | 4264 | else |
4318 | conf->max_degraded = 1; | 4265 | conf->max_degraded = 1; |
4319 | conf->algorithm = mddev->layout; | 4266 | conf->algorithm = mddev->new_layout; |
4320 | conf->max_nr_stripes = NR_STRIPES; | 4267 | conf->max_nr_stripes = NR_STRIPES; |
4321 | conf->expand_progress = mddev->reshape_position; | 4268 | conf->expand_progress = mddev->reshape_position; |
4322 | 4269 | ||
4323 | /* device size must be a multiple of chunk size */ | 4270 | memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + |
4324 | mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1); | 4271 | conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; |
4325 | mddev->resync_max_sectors = mddev->dev_sectors; | 4272 | if (grow_stripes(conf, conf->max_nr_stripes)) { |
4273 | printk(KERN_ERR | ||
4274 | "raid5: couldn't allocate %dkB for buffers\n", memory); | ||
4275 | goto abort; | ||
4276 | } else | ||
4277 | printk(KERN_INFO "raid5: allocated %dkB for %s\n", | ||
4278 | memory, mdname(mddev)); | ||
4326 | 4279 | ||
4327 | if (conf->level == 6 && conf->raid_disks < 4) { | 4280 | conf->thread = md_register_thread(raid5d, mddev, "%s_raid5"); |
4328 | printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n", | 4281 | if (!conf->thread) { |
4329 | mdname(mddev), conf->raid_disks); | 4282 | printk(KERN_ERR |
4283 | "raid5: couldn't allocate thread for %s\n", | ||
4284 | mdname(mddev)); | ||
4330 | goto abort; | 4285 | goto abort; |
4331 | } | 4286 | } |
4332 | if (!conf->chunk_size || conf->chunk_size % 4) { | 4287 | |
4333 | printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", | 4288 | return conf; |
4334 | conf->chunk_size, mdname(mddev)); | 4289 | |
4335 | goto abort; | 4290 | abort: |
4291 | if (conf) { | ||
4292 | shrink_stripes(conf); | ||
4293 | safe_put_page(conf->spare_page); | ||
4294 | kfree(conf->disks); | ||
4295 | kfree(conf->stripe_hashtbl); | ||
4296 | kfree(conf); | ||
4297 | return ERR_PTR(-EIO); | ||
4298 | } else | ||
4299 | return ERR_PTR(-ENOMEM); | ||
4300 | } | ||
4301 | |||
4302 | static int run(mddev_t *mddev) | ||
4303 | { | ||
4304 | raid5_conf_t *conf; | ||
4305 | int working_disks = 0; | ||
4306 | mdk_rdev_t *rdev; | ||
4307 | |||
4308 | if (mddev->reshape_position != MaxSector) { | ||
4309 | /* Check that we can continue the reshape. | ||
4310 | * Currently only disks can change, it must | ||
4311 | * increase, and we must be past the point where | ||
4312 | * a stripe over-writes itself | ||
4313 | */ | ||
4314 | sector_t here_new, here_old; | ||
4315 | int old_disks; | ||
4316 | int max_degraded = (mddev->level == 5 ? 1 : 2); | ||
4317 | |||
4318 | if (mddev->new_level != mddev->level || | ||
4319 | mddev->new_layout != mddev->layout || | ||
4320 | mddev->new_chunk != mddev->chunk_size) { | ||
4321 | printk(KERN_ERR "raid5: %s: unsupported reshape " | ||
4322 | "required - aborting.\n", | ||
4323 | mdname(mddev)); | ||
4324 | return -EINVAL; | ||
4325 | } | ||
4326 | if (mddev->delta_disks <= 0) { | ||
4327 | printk(KERN_ERR "raid5: %s: unsupported reshape " | ||
4328 | "(reduce disks) required - aborting.\n", | ||
4329 | mdname(mddev)); | ||
4330 | return -EINVAL; | ||
4331 | } | ||
4332 | old_disks = mddev->raid_disks - mddev->delta_disks; | ||
4333 | /* reshape_position must be on a new-stripe boundary, and one | ||
4334 | * further up in new geometry must map after here in old | ||
4335 | * geometry. | ||
4336 | */ | ||
4337 | here_new = mddev->reshape_position; | ||
4338 | if (sector_div(here_new, (mddev->chunk_size>>9)* | ||
4339 | (mddev->raid_disks - max_degraded))) { | ||
4340 | printk(KERN_ERR "raid5: reshape_position not " | ||
4341 | "on a stripe boundary\n"); | ||
4342 | return -EINVAL; | ||
4343 | } | ||
4344 | /* here_new is the stripe we will write to */ | ||
4345 | here_old = mddev->reshape_position; | ||
4346 | sector_div(here_old, (mddev->chunk_size>>9)* | ||
4347 | (old_disks-max_degraded)); | ||
4348 | /* here_old is the first stripe that we might need to read | ||
4349 | * from */ | ||
4350 | if (here_new >= here_old) { | ||
4351 | /* Reading from the same stripe as writing to - bad */ | ||
4352 | printk(KERN_ERR "raid5: reshape_position too early for " | ||
4353 | "auto-recovery - aborting.\n"); | ||
4354 | return -EINVAL; | ||
4355 | } | ||
4356 | printk(KERN_INFO "raid5: reshape will continue\n"); | ||
4357 | /* OK, we should be able to continue; */ | ||
4358 | } else { | ||
4359 | BUG_ON(mddev->level != mddev->new_level); | ||
4360 | BUG_ON(mddev->layout != mddev->new_layout); | ||
4361 | BUG_ON(mddev->chunk_size != mddev->new_chunk); | ||
4362 | BUG_ON(mddev->delta_disks != 0); | ||
4336 | } | 4363 | } |
4364 | conf = setup_conf(mddev); | ||
4365 | |||
4366 | if (conf == NULL) | ||
4367 | return -EIO; | ||
4368 | if (IS_ERR(conf)) | ||
4369 | return PTR_ERR(conf); | ||
4370 | |||
4371 | mddev->thread = conf->thread; | ||
4372 | conf->thread = NULL; | ||
4373 | mddev->private = conf; | ||
4374 | |||
4375 | /* | ||
4376 | * 0 for a fully functional array, 1 or 2 for a degraded array. | ||
4377 | */ | ||
4378 | list_for_each_entry(rdev, &mddev->disks, same_set) | ||
4379 | if (rdev->raid_disk >= 0 && | ||
4380 | test_bit(In_sync, &rdev->flags)) | ||
4381 | working_disks++; | ||
4382 | |||
4383 | mddev->degraded = conf->raid_disks - working_disks; | ||
4384 | |||
4337 | if (mddev->degraded > conf->max_degraded) { | 4385 | if (mddev->degraded > conf->max_degraded) { |
4338 | printk(KERN_ERR "raid5: not enough operational devices for %s" | 4386 | printk(KERN_ERR "raid5: not enough operational devices for %s" |
4339 | " (%d/%d failed)\n", | 4387 | " (%d/%d failed)\n", |
@@ -4341,6 +4389,10 @@ static int run(mddev_t *mddev) | |||
4341 | goto abort; | 4389 | goto abort; |
4342 | } | 4390 | } |
4343 | 4391 | ||
4392 | /* device size must be a multiple of chunk size */ | ||
4393 | mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1); | ||
4394 | mddev->resync_max_sectors = mddev->dev_sectors; | ||
4395 | |||
4344 | if (mddev->degraded > 0 && | 4396 | if (mddev->degraded > 0 && |
4345 | mddev->recovery_cp != MaxSector) { | 4397 | mddev->recovery_cp != MaxSector) { |
4346 | if (mddev->ok_start_degraded) | 4398 | if (mddev->ok_start_degraded) |
@@ -4356,27 +4408,6 @@ static int run(mddev_t *mddev) | |||
4356 | } | 4408 | } |
4357 | } | 4409 | } |
4358 | 4410 | ||
4359 | { | ||
4360 | mddev->thread = md_register_thread(raid5d, mddev, "%s_raid5"); | ||
4361 | if (!mddev->thread) { | ||
4362 | printk(KERN_ERR | ||
4363 | "raid5: couldn't allocate thread for %s\n", | ||
4364 | mdname(mddev)); | ||
4365 | goto abort; | ||
4366 | } | ||
4367 | } | ||
4368 | memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + | ||
4369 | conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; | ||
4370 | if (grow_stripes(conf, conf->max_nr_stripes)) { | ||
4371 | printk(KERN_ERR | ||
4372 | "raid5: couldn't allocate %dkB for buffers\n", memory); | ||
4373 | shrink_stripes(conf); | ||
4374 | md_unregister_thread(mddev->thread); | ||
4375 | goto abort; | ||
4376 | } else | ||
4377 | printk(KERN_INFO "raid5: allocated %dkB for %s\n", | ||
4378 | memory, mdname(mddev)); | ||
4379 | |||
4380 | if (mddev->degraded == 0) | 4411 | if (mddev->degraded == 0) |
4381 | printk("raid5: raid level %d set %s active with %d out of %d" | 4412 | printk("raid5: raid level %d set %s active with %d out of %d" |
4382 | " devices, algorithm %d\n", conf->level, mdname(mddev), | 4413 | " devices, algorithm %d\n", conf->level, mdname(mddev), |
@@ -4419,6 +4450,8 @@ static int run(mddev_t *mddev) | |||
4419 | "raid5: failed to create sysfs attributes for %s\n", | 4450 | "raid5: failed to create sysfs attributes for %s\n", |
4420 | mdname(mddev)); | 4451 | mdname(mddev)); |
4421 | 4452 | ||
4453 | mddev->queue->queue_lock = &conf->device_lock; | ||
4454 | |||
4422 | mddev->queue->unplug_fn = raid5_unplug_device; | 4455 | mddev->queue->unplug_fn = raid5_unplug_device; |
4423 | mddev->queue->backing_dev_info.congested_data = mddev; | 4456 | mddev->queue->backing_dev_info.congested_data = mddev; |
4424 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; | 4457 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; |
@@ -4430,7 +4463,11 @@ static int run(mddev_t *mddev) | |||
4430 | 4463 | ||
4431 | return 0; | 4464 | return 0; |
4432 | abort: | 4465 | abort: |
4466 | if (mddev->thread) | ||
4467 | md_unregister_thread(mddev->thread); | ||
4468 | mddev->thread = NULL; | ||
4433 | if (conf) { | 4469 | if (conf) { |
4470 | shrink_stripes(conf); | ||
4434 | print_raid5_conf(conf); | 4471 | print_raid5_conf(conf); |
4435 | safe_put_page(conf->spare_page); | 4472 | safe_put_page(conf->spare_page); |
4436 | kfree(conf->disks); | 4473 | kfree(conf->disks); |
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 84456b1af204..c172371481c7 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h | |||
@@ -386,6 +386,11 @@ struct raid5_private_data { | |||
386 | int pool_size; /* number of disks in stripeheads in pool */ | 386 | int pool_size; /* number of disks in stripeheads in pool */ |
387 | spinlock_t device_lock; | 387 | spinlock_t device_lock; |
388 | struct disk_info *disks; | 388 | struct disk_info *disks; |
389 | |||
390 | /* When taking over an array from a different personality, we store | ||
391 | * the new thread here until we fully activate the array. | ||
392 | */ | ||
393 | struct mdk_thread_s *thread; | ||
389 | }; | 394 | }; |
390 | 395 | ||
391 | typedef struct raid5_private_data raid5_conf_t; | 396 | typedef struct raid5_private_data raid5_conf_t; |