aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2009-03-30 23:39:39 -0400
committerNeilBrown <neilb@suse.de>2009-03-30 23:39:39 -0400
commit91adb56473febeeb3ef657bb5147ddd355465700 (patch)
tree9ed414dd2bf999d91a536449c01273ee23ddbcfa
parent34817e8c3948ea20316dfa8fd8947d6d0ee82ba9 (diff)
md/raid5: refactor raid5 "run"
.. so that the code to create the private data structures is separate. This will help with future code to change the level of an active array. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid5.c267
-rw-r--r--drivers/md/raid5.h5
2 files changed, 157 insertions, 115 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index edbc80c4d346..d019a85547b4 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -4164,95 +4164,49 @@ static struct attribute_group raid5_attrs_group = {
4164 .attrs = raid5_attrs, 4164 .attrs = raid5_attrs,
4165}; 4165};
4166 4166
4167static int run(mddev_t *mddev) 4167static raid5_conf_t *setup_conf(mddev_t *mddev)
4168{ 4168{
4169 raid5_conf_t *conf; 4169 raid5_conf_t *conf;
4170 int raid_disk, memory; 4170 int raid_disk, memory;
4171 mdk_rdev_t *rdev; 4171 mdk_rdev_t *rdev;
4172 struct disk_info *disk; 4172 struct disk_info *disk;
4173 int working_disks = 0;
4174 4173
4175 if (mddev->level != 5 && mddev->level != 4 && mddev->level != 6) { 4174 if (mddev->new_level != 5
4175 && mddev->new_level != 4
4176 && mddev->new_level != 6) {
4176 printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n", 4177 printk(KERN_ERR "raid5: %s: raid level not set to 4/5/6 (%d)\n",
4177 mdname(mddev), mddev->level); 4178 mdname(mddev), mddev->new_level);
4178 return -EIO; 4179 return ERR_PTR(-EIO);
4179 } 4180 }
4180 if ((mddev->level == 5 && !algorithm_valid_raid5(mddev->layout)) || 4181 if ((mddev->new_level == 5
4181 (mddev->level == 6 && !algorithm_valid_raid6(mddev->layout))) { 4182 && !algorithm_valid_raid5(mddev->new_layout)) ||
4183 (mddev->new_level == 6
4184 && !algorithm_valid_raid6(mddev->new_layout))) {
4182 printk(KERN_ERR "raid5: %s: layout %d not supported\n", 4185 printk(KERN_ERR "raid5: %s: layout %d not supported\n",
4183 mdname(mddev), mddev->layout); 4186 mdname(mddev), mddev->new_layout);
4184 return -EIO; 4187 return ERR_PTR(-EIO);
4185 } 4188 }
4186 4189 if (mddev->new_level == 6 && mddev->raid_disks < 4) {
4187 if (mddev->chunk_size < PAGE_SIZE) { 4190 printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n",
4188 printk(KERN_ERR "md/raid5: chunk_size must be at least " 4191 mdname(mddev), mddev->raid_disks);
4189 "PAGE_SIZE but %d < %ld\n", 4192 return ERR_PTR(-EINVAL);
4190 mddev->chunk_size, PAGE_SIZE);
4191 return -EINVAL;
4192 } 4193 }
4193 4194
4194 if (mddev->reshape_position != MaxSector) { 4195 if (!mddev->new_chunk || mddev->new_chunk % PAGE_SIZE) {
4195 /* Check that we can continue the reshape. 4196 printk(KERN_ERR "raid5: invalid chunk size %d for %s\n",
4196 * Currently only disks can change, it must 4197 mddev->new_chunk, mdname(mddev));
4197 * increase, and we must be past the point where 4198 return ERR_PTR(-EINVAL);
4198 * a stripe over-writes itself
4199 */
4200 sector_t here_new, here_old;
4201 int old_disks;
4202 int max_degraded = (mddev->level == 5 ? 1 : 2);
4203
4204 if (mddev->new_level != mddev->level ||
4205 mddev->new_layout != mddev->layout ||
4206 mddev->new_chunk != mddev->chunk_size) {
4207 printk(KERN_ERR "raid5: %s: unsupported reshape "
4208 "required - aborting.\n",
4209 mdname(mddev));
4210 return -EINVAL;
4211 }
4212 if (mddev->delta_disks <= 0) {
4213 printk(KERN_ERR "raid5: %s: unsupported reshape "
4214 "(reduce disks) required - aborting.\n",
4215 mdname(mddev));
4216 return -EINVAL;
4217 }
4218 old_disks = mddev->raid_disks - mddev->delta_disks;
4219 /* reshape_position must be on a new-stripe boundary, and one
4220 * further up in new geometry must map after here in old
4221 * geometry.
4222 */
4223 here_new = mddev->reshape_position;
4224 if (sector_div(here_new, (mddev->chunk_size>>9)*
4225 (mddev->raid_disks - max_degraded))) {
4226 printk(KERN_ERR "raid5: reshape_position not "
4227 "on a stripe boundary\n");
4228 return -EINVAL;
4229 }
4230 /* here_new is the stripe we will write to */
4231 here_old = mddev->reshape_position;
4232 sector_div(here_old, (mddev->chunk_size>>9)*
4233 (old_disks-max_degraded));
4234 /* here_old is the first stripe that we might need to read
4235 * from */
4236 if (here_new >= here_old) {
4237 /* Reading from the same stripe as writing to - bad */
4238 printk(KERN_ERR "raid5: reshape_position too early for "
4239 "auto-recovery - aborting.\n");
4240 return -EINVAL;
4241 }
4242 printk(KERN_INFO "raid5: reshape will continue\n");
4243 /* OK, we should be able to continue; */
4244 } 4199 }
4245 4200
4246 4201 conf = kzalloc(sizeof(raid5_conf_t), GFP_KERNEL);
4247 mddev->private = kzalloc(sizeof (raid5_conf_t), GFP_KERNEL); 4202 if (conf == NULL)
4248 if ((conf = mddev->private) == NULL)
4249 goto abort; 4203 goto abort;
4250 if (mddev->reshape_position == MaxSector) { 4204
4251 conf->previous_raid_disks = conf->raid_disks = mddev->raid_disks; 4205 conf->raid_disks = mddev->raid_disks;
4252 } else { 4206 if (mddev->reshape_position == MaxSector)
4253 conf->raid_disks = mddev->raid_disks; 4207 conf->previous_raid_disks = mddev->raid_disks;
4208 else
4254 conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks; 4209 conf->previous_raid_disks = mddev->raid_disks - mddev->delta_disks;
4255 }
4256 4210
4257 conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info), 4211 conf->disks = kzalloc(conf->raid_disks * sizeof(struct disk_info),
4258 GFP_KERNEL); 4212 GFP_KERNEL);
@@ -4264,13 +4218,12 @@ static int run(mddev_t *mddev)
4264 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) 4218 if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
4265 goto abort; 4219 goto abort;
4266 4220
4267 if (mddev->level == 6) { 4221 if (mddev->new_level == 6) {
4268 conf->spare_page = alloc_page(GFP_KERNEL); 4222 conf->spare_page = alloc_page(GFP_KERNEL);
4269 if (!conf->spare_page) 4223 if (!conf->spare_page)
4270 goto abort; 4224 goto abort;
4271 } 4225 }
4272 spin_lock_init(&conf->device_lock); 4226 spin_lock_init(&conf->device_lock);
4273 mddev->queue->queue_lock = &conf->device_lock;
4274 init_waitqueue_head(&conf->wait_for_stripe); 4227 init_waitqueue_head(&conf->wait_for_stripe);
4275 init_waitqueue_head(&conf->wait_for_overlap); 4228 init_waitqueue_head(&conf->wait_for_overlap);
4276 INIT_LIST_HEAD(&conf->handle_list); 4229 INIT_LIST_HEAD(&conf->handle_list);
@@ -4299,41 +4252,136 @@ static int run(mddev_t *mddev)
4299 printk(KERN_INFO "raid5: device %s operational as raid" 4252 printk(KERN_INFO "raid5: device %s operational as raid"
4300 " disk %d\n", bdevname(rdev->bdev,b), 4253 " disk %d\n", bdevname(rdev->bdev,b),
4301 raid_disk); 4254 raid_disk);
4302 working_disks++;
4303 } else 4255 } else
4304 /* Cannot rely on bitmap to complete recovery */ 4256 /* Cannot rely on bitmap to complete recovery */
4305 conf->fullsync = 1; 4257 conf->fullsync = 1;
4306 } 4258 }
4307 4259
4308 /* 4260 conf->chunk_size = mddev->new_chunk;
4309 * 0 for a fully functional array, 1 or 2 for a degraded array. 4261 conf->level = mddev->new_level;
4310 */
4311 mddev->degraded = conf->raid_disks - working_disks;
4312 conf->mddev = mddev;
4313 conf->chunk_size = mddev->chunk_size;
4314 conf->level = mddev->level;
4315 if (conf->level == 6) 4262 if (conf->level == 6)
4316 conf->max_degraded = 2; 4263 conf->max_degraded = 2;
4317 else 4264 else
4318 conf->max_degraded = 1; 4265 conf->max_degraded = 1;
4319 conf->algorithm = mddev->layout; 4266 conf->algorithm = mddev->new_layout;
4320 conf->max_nr_stripes = NR_STRIPES; 4267 conf->max_nr_stripes = NR_STRIPES;
4321 conf->expand_progress = mddev->reshape_position; 4268 conf->expand_progress = mddev->reshape_position;
4322 4269
4323 /* device size must be a multiple of chunk size */ 4270 memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
4324 mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1); 4271 conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
4325 mddev->resync_max_sectors = mddev->dev_sectors; 4272 if (grow_stripes(conf, conf->max_nr_stripes)) {
4273 printk(KERN_ERR
4274 "raid5: couldn't allocate %dkB for buffers\n", memory);
4275 goto abort;
4276 } else
4277 printk(KERN_INFO "raid5: allocated %dkB for %s\n",
4278 memory, mdname(mddev));
4326 4279
4327 if (conf->level == 6 && conf->raid_disks < 4) { 4280 conf->thread = md_register_thread(raid5d, mddev, "%s_raid5");
4328 printk(KERN_ERR "raid6: not enough configured devices for %s (%d, minimum 4)\n", 4281 if (!conf->thread) {
4329 mdname(mddev), conf->raid_disks); 4282 printk(KERN_ERR
4283 "raid5: couldn't allocate thread for %s\n",
4284 mdname(mddev));
4330 goto abort; 4285 goto abort;
4331 } 4286 }
4332 if (!conf->chunk_size || conf->chunk_size % 4) { 4287
4333 printk(KERN_ERR "raid5: invalid chunk size %d for %s\n", 4288 return conf;
4334 conf->chunk_size, mdname(mddev)); 4289
4335 goto abort; 4290 abort:
4291 if (conf) {
4292 shrink_stripes(conf);
4293 safe_put_page(conf->spare_page);
4294 kfree(conf->disks);
4295 kfree(conf->stripe_hashtbl);
4296 kfree(conf);
4297 return ERR_PTR(-EIO);
4298 } else
4299 return ERR_PTR(-ENOMEM);
4300}
4301
4302static int run(mddev_t *mddev)
4303{
4304 raid5_conf_t *conf;
4305 int working_disks = 0;
4306 mdk_rdev_t *rdev;
4307
4308 if (mddev->reshape_position != MaxSector) {
4309 /* Check that we can continue the reshape.
4310 * Currently only disks can change, it must
4311 * increase, and we must be past the point where
4312 * a stripe over-writes itself
4313 */
4314 sector_t here_new, here_old;
4315 int old_disks;
4316 int max_degraded = (mddev->level == 5 ? 1 : 2);
4317
4318 if (mddev->new_level != mddev->level ||
4319 mddev->new_layout != mddev->layout ||
4320 mddev->new_chunk != mddev->chunk_size) {
4321 printk(KERN_ERR "raid5: %s: unsupported reshape "
4322 "required - aborting.\n",
4323 mdname(mddev));
4324 return -EINVAL;
4325 }
4326 if (mddev->delta_disks <= 0) {
4327 printk(KERN_ERR "raid5: %s: unsupported reshape "
4328 "(reduce disks) required - aborting.\n",
4329 mdname(mddev));
4330 return -EINVAL;
4331 }
4332 old_disks = mddev->raid_disks - mddev->delta_disks;
4333 /* reshape_position must be on a new-stripe boundary, and one
4334 * further up in new geometry must map after here in old
4335 * geometry.
4336 */
4337 here_new = mddev->reshape_position;
4338 if (sector_div(here_new, (mddev->chunk_size>>9)*
4339 (mddev->raid_disks - max_degraded))) {
4340 printk(KERN_ERR "raid5: reshape_position not "
4341 "on a stripe boundary\n");
4342 return -EINVAL;
4343 }
4344 /* here_new is the stripe we will write to */
4345 here_old = mddev->reshape_position;
4346 sector_div(here_old, (mddev->chunk_size>>9)*
4347 (old_disks-max_degraded));
4348 /* here_old is the first stripe that we might need to read
4349 * from */
4350 if (here_new >= here_old) {
4351 /* Reading from the same stripe as writing to - bad */
4352 printk(KERN_ERR "raid5: reshape_position too early for "
4353 "auto-recovery - aborting.\n");
4354 return -EINVAL;
4355 }
4356 printk(KERN_INFO "raid5: reshape will continue\n");
4357 /* OK, we should be able to continue; */
4358 } else {
4359 BUG_ON(mddev->level != mddev->new_level);
4360 BUG_ON(mddev->layout != mddev->new_layout);
4361 BUG_ON(mddev->chunk_size != mddev->new_chunk);
4362 BUG_ON(mddev->delta_disks != 0);
4336 } 4363 }
4364 conf = setup_conf(mddev);
4365
4366 if (conf == NULL)
4367 return -EIO;
4368 if (IS_ERR(conf))
4369 return PTR_ERR(conf);
4370
4371 mddev->thread = conf->thread;
4372 conf->thread = NULL;
4373 mddev->private = conf;
4374
4375 /*
4376 * 0 for a fully functional array, 1 or 2 for a degraded array.
4377 */
4378 list_for_each_entry(rdev, &mddev->disks, same_set)
4379 if (rdev->raid_disk >= 0 &&
4380 test_bit(In_sync, &rdev->flags))
4381 working_disks++;
4382
4383 mddev->degraded = conf->raid_disks - working_disks;
4384
4337 if (mddev->degraded > conf->max_degraded) { 4385 if (mddev->degraded > conf->max_degraded) {
4338 printk(KERN_ERR "raid5: not enough operational devices for %s" 4386 printk(KERN_ERR "raid5: not enough operational devices for %s"
4339 " (%d/%d failed)\n", 4387 " (%d/%d failed)\n",
@@ -4341,6 +4389,10 @@ static int run(mddev_t *mddev)
4341 goto abort; 4389 goto abort;
4342 } 4390 }
4343 4391
4392 /* device size must be a multiple of chunk size */
4393 mddev->dev_sectors &= ~(mddev->chunk_size / 512 - 1);
4394 mddev->resync_max_sectors = mddev->dev_sectors;
4395
4344 if (mddev->degraded > 0 && 4396 if (mddev->degraded > 0 &&
4345 mddev->recovery_cp != MaxSector) { 4397 mddev->recovery_cp != MaxSector) {
4346 if (mddev->ok_start_degraded) 4398 if (mddev->ok_start_degraded)
@@ -4356,27 +4408,6 @@ static int run(mddev_t *mddev)
4356 } 4408 }
4357 } 4409 }
4358 4410
4359 {
4360 mddev->thread = md_register_thread(raid5d, mddev, "%s_raid5");
4361 if (!mddev->thread) {
4362 printk(KERN_ERR
4363 "raid5: couldn't allocate thread for %s\n",
4364 mdname(mddev));
4365 goto abort;
4366 }
4367 }
4368 memory = conf->max_nr_stripes * (sizeof(struct stripe_head) +
4369 conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
4370 if (grow_stripes(conf, conf->max_nr_stripes)) {
4371 printk(KERN_ERR
4372 "raid5: couldn't allocate %dkB for buffers\n", memory);
4373 shrink_stripes(conf);
4374 md_unregister_thread(mddev->thread);
4375 goto abort;
4376 } else
4377 printk(KERN_INFO "raid5: allocated %dkB for %s\n",
4378 memory, mdname(mddev));
4379
4380 if (mddev->degraded == 0) 4411 if (mddev->degraded == 0)
4381 printk("raid5: raid level %d set %s active with %d out of %d" 4412 printk("raid5: raid level %d set %s active with %d out of %d"
4382 " devices, algorithm %d\n", conf->level, mdname(mddev), 4413 " devices, algorithm %d\n", conf->level, mdname(mddev),
@@ -4419,6 +4450,8 @@ static int run(mddev_t *mddev)
4419 "raid5: failed to create sysfs attributes for %s\n", 4450 "raid5: failed to create sysfs attributes for %s\n",
4420 mdname(mddev)); 4451 mdname(mddev));
4421 4452
4453 mddev->queue->queue_lock = &conf->device_lock;
4454
4422 mddev->queue->unplug_fn = raid5_unplug_device; 4455 mddev->queue->unplug_fn = raid5_unplug_device;
4423 mddev->queue->backing_dev_info.congested_data = mddev; 4456 mddev->queue->backing_dev_info.congested_data = mddev;
4424 mddev->queue->backing_dev_info.congested_fn = raid5_congested; 4457 mddev->queue->backing_dev_info.congested_fn = raid5_congested;
@@ -4430,7 +4463,11 @@ static int run(mddev_t *mddev)
4430 4463
4431 return 0; 4464 return 0;
4432abort: 4465abort:
4466 if (mddev->thread)
4467 md_unregister_thread(mddev->thread);
4468 mddev->thread = NULL;
4433 if (conf) { 4469 if (conf) {
4470 shrink_stripes(conf);
4434 print_raid5_conf(conf); 4471 print_raid5_conf(conf);
4435 safe_put_page(conf->spare_page); 4472 safe_put_page(conf->spare_page);
4436 kfree(conf->disks); 4473 kfree(conf->disks);
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index 84456b1af204..c172371481c7 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -386,6 +386,11 @@ struct raid5_private_data {
386 int pool_size; /* number of disks in stripeheads in pool */ 386 int pool_size; /* number of disks in stripeheads in pool */
387 spinlock_t device_lock; 387 spinlock_t device_lock;
388 struct disk_info *disks; 388 struct disk_info *disks;
389
390 /* When taking over an array from a different personality, we store
391 * the new thread here until we fully activate the array.
392 */
393 struct mdk_thread_s *thread;
389}; 394};
390 395
391typedef struct raid5_private_data raid5_conf_t; 396typedef struct raid5_private_data raid5_conf_t;