diff options
-rw-r--r-- | drivers/md/md.c | 2 | ||||
-rw-r--r-- | drivers/md/raid5.c | 131 | ||||
-rw-r--r-- | drivers/md/raid6main.c | 4 | ||||
-rw-r--r-- | include/linux/raid/raid5.h | 9 |
4 files changed, 137 insertions, 9 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index a3ecaf8ed30a..c7b7656f9aa5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -2775,7 +2775,6 @@ static void autorun_array(mddev_t *mddev) | |||
2775 | */ | 2775 | */ |
2776 | static void autorun_devices(int part) | 2776 | static void autorun_devices(int part) |
2777 | { | 2777 | { |
2778 | struct list_head candidates; | ||
2779 | struct list_head *tmp; | 2778 | struct list_head *tmp; |
2780 | mdk_rdev_t *rdev0, *rdev; | 2779 | mdk_rdev_t *rdev0, *rdev; |
2781 | mddev_t *mddev; | 2780 | mddev_t *mddev; |
@@ -2784,6 +2783,7 @@ static void autorun_devices(int part) | |||
2784 | printk(KERN_INFO "md: autorun ...\n"); | 2783 | printk(KERN_INFO "md: autorun ...\n"); |
2785 | while (!list_empty(&pending_raid_disks)) { | 2784 | while (!list_empty(&pending_raid_disks)) { |
2786 | dev_t dev; | 2785 | dev_t dev; |
2786 | LIST_HEAD(candidates); | ||
2787 | rdev0 = list_entry(pending_raid_disks.next, | 2787 | rdev0 = list_entry(pending_raid_disks.next, |
2788 | mdk_rdev_t, same_set); | 2788 | mdk_rdev_t, same_set); |
2789 | 2789 | ||
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 03f31379cebb..6c20b44509d8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -313,20 +313,143 @@ static int grow_stripes(raid5_conf_t *conf, int num) | |||
313 | kmem_cache_t *sc; | 313 | kmem_cache_t *sc; |
314 | int devs = conf->raid_disks; | 314 | int devs = conf->raid_disks; |
315 | 315 | ||
316 | sprintf(conf->cache_name, "raid5/%s", mdname(conf->mddev)); | 316 | sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev)); |
317 | 317 | sprintf(conf->cache_name[1], "raid5/%s-alt", mdname(conf->mddev)); | |
318 | sc = kmem_cache_create(conf->cache_name, | 318 | conf->active_name = 0; |
319 | sc = kmem_cache_create(conf->cache_name[conf->active_name], | ||
319 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), | 320 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), |
320 | 0, 0, NULL, NULL); | 321 | 0, 0, NULL, NULL); |
321 | if (!sc) | 322 | if (!sc) |
322 | return 1; | 323 | return 1; |
323 | conf->slab_cache = sc; | 324 | conf->slab_cache = sc; |
325 | conf->pool_size = devs; | ||
324 | while (num--) { | 326 | while (num--) { |
325 | if (!grow_one_stripe(conf)) | 327 | if (!grow_one_stripe(conf)) |
326 | return 1; | 328 | return 1; |
327 | } | 329 | } |
328 | return 0; | 330 | return 0; |
329 | } | 331 | } |
332 | static int resize_stripes(raid5_conf_t *conf, int newsize) | ||
333 | { | ||
334 | /* Make all the stripes able to hold 'newsize' devices. | ||
335 | * New slots in each stripe get 'page' set to a new page. | ||
336 | * | ||
337 | * This happens in stages: | ||
338 | * 1/ create a new kmem_cache and allocate the required number of | ||
339 | * stripe_heads. | ||
340 | * 2/ gather all the old stripe_heads and tranfer the pages across | ||
341 | * to the new stripe_heads. This will have the side effect of | ||
342 | * freezing the array as once all stripe_heads have been collected, | ||
343 | * no IO will be possible. Old stripe heads are freed once their | ||
344 | * pages have been transferred over, and the old kmem_cache is | ||
345 | * freed when all stripes are done. | ||
346 | * 3/ reallocate conf->disks to be suitable bigger. If this fails, | ||
347 | * we simple return a failre status - no need to clean anything up. | ||
348 | * 4/ allocate new pages for the new slots in the new stripe_heads. | ||
349 | * If this fails, we don't bother trying the shrink the | ||
350 | * stripe_heads down again, we just leave them as they are. | ||
351 | * As each stripe_head is processed the new one is released into | ||
352 | * active service. | ||
353 | * | ||
354 | * Once step2 is started, we cannot afford to wait for a write, | ||
355 | * so we use GFP_NOIO allocations. | ||
356 | */ | ||
357 | struct stripe_head *osh, *nsh; | ||
358 | LIST_HEAD(newstripes); | ||
359 | struct disk_info *ndisks; | ||
360 | int err = 0; | ||
361 | kmem_cache_t *sc; | ||
362 | int i; | ||
363 | |||
364 | if (newsize <= conf->pool_size) | ||
365 | return 0; /* never bother to shrink */ | ||
366 | |||
367 | /* Step 1 */ | ||
368 | sc = kmem_cache_create(conf->cache_name[1-conf->active_name], | ||
369 | sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev), | ||
370 | 0, 0, NULL, NULL); | ||
371 | if (!sc) | ||
372 | return -ENOMEM; | ||
373 | |||
374 | for (i = conf->max_nr_stripes; i; i--) { | ||
375 | nsh = kmem_cache_alloc(sc, GFP_KERNEL); | ||
376 | if (!nsh) | ||
377 | break; | ||
378 | |||
379 | memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev)); | ||
380 | |||
381 | nsh->raid_conf = conf; | ||
382 | spin_lock_init(&nsh->lock); | ||
383 | |||
384 | list_add(&nsh->lru, &newstripes); | ||
385 | } | ||
386 | if (i) { | ||
387 | /* didn't get enough, give up */ | ||
388 | while (!list_empty(&newstripes)) { | ||
389 | nsh = list_entry(newstripes.next, struct stripe_head, lru); | ||
390 | list_del(&nsh->lru); | ||
391 | kmem_cache_free(sc, nsh); | ||
392 | } | ||
393 | kmem_cache_destroy(sc); | ||
394 | return -ENOMEM; | ||
395 | } | ||
396 | /* Step 2 - Must use GFP_NOIO now. | ||
397 | * OK, we have enough stripes, start collecting inactive | ||
398 | * stripes and copying them over | ||
399 | */ | ||
400 | list_for_each_entry(nsh, &newstripes, lru) { | ||
401 | spin_lock_irq(&conf->device_lock); | ||
402 | wait_event_lock_irq(conf->wait_for_stripe, | ||
403 | !list_empty(&conf->inactive_list), | ||
404 | conf->device_lock, | ||
405 | unplug_slaves(conf->mddev); | ||
406 | ); | ||
407 | osh = get_free_stripe(conf); | ||
408 | spin_unlock_irq(&conf->device_lock); | ||
409 | atomic_set(&nsh->count, 1); | ||
410 | for(i=0; i<conf->pool_size; i++) | ||
411 | nsh->dev[i].page = osh->dev[i].page; | ||
412 | for( ; i<newsize; i++) | ||
413 | nsh->dev[i].page = NULL; | ||
414 | kmem_cache_free(conf->slab_cache, osh); | ||
415 | } | ||
416 | kmem_cache_destroy(conf->slab_cache); | ||
417 | |||
418 | /* Step 3. | ||
419 | * At this point, we are holding all the stripes so the array | ||
420 | * is completely stalled, so now is a good time to resize | ||
421 | * conf->disks. | ||
422 | */ | ||
423 | ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); | ||
424 | if (ndisks) { | ||
425 | for (i=0; i<conf->raid_disks; i++) | ||
426 | ndisks[i] = conf->disks[i]; | ||
427 | kfree(conf->disks); | ||
428 | conf->disks = ndisks; | ||
429 | } else | ||
430 | err = -ENOMEM; | ||
431 | |||
432 | /* Step 4, return new stripes to service */ | ||
433 | while(!list_empty(&newstripes)) { | ||
434 | nsh = list_entry(newstripes.next, struct stripe_head, lru); | ||
435 | list_del_init(&nsh->lru); | ||
436 | for (i=conf->raid_disks; i < newsize; i++) | ||
437 | if (nsh->dev[i].page == NULL) { | ||
438 | struct page *p = alloc_page(GFP_NOIO); | ||
439 | nsh->dev[i].page = p; | ||
440 | if (!p) | ||
441 | err = -ENOMEM; | ||
442 | } | ||
443 | release_stripe(nsh); | ||
444 | } | ||
445 | /* critical section pass, GFP_NOIO no longer needed */ | ||
446 | |||
447 | conf->slab_cache = sc; | ||
448 | conf->active_name = 1-conf->active_name; | ||
449 | conf->pool_size = newsize; | ||
450 | return err; | ||
451 | } | ||
452 | |||
330 | 453 | ||
331 | static int drop_one_stripe(raid5_conf_t *conf) | 454 | static int drop_one_stripe(raid5_conf_t *conf) |
332 | { | 455 | { |
@@ -339,7 +462,7 @@ static int drop_one_stripe(raid5_conf_t *conf) | |||
339 | return 0; | 462 | return 0; |
340 | if (atomic_read(&sh->count)) | 463 | if (atomic_read(&sh->count)) |
341 | BUG(); | 464 | BUG(); |
342 | shrink_buffers(sh, conf->raid_disks); | 465 | shrink_buffers(sh, conf->pool_size); |
343 | kmem_cache_free(conf->slab_cache, sh); | 466 | kmem_cache_free(conf->slab_cache, sh); |
344 | atomic_dec(&conf->active_stripes); | 467 | atomic_dec(&conf->active_stripes); |
345 | return 1; | 468 | return 1; |
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index c7632f6cc487..6df4930fddec 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c | |||
@@ -331,9 +331,9 @@ static int grow_stripes(raid6_conf_t *conf, int num) | |||
331 | kmem_cache_t *sc; | 331 | kmem_cache_t *sc; |
332 | int devs = conf->raid_disks; | 332 | int devs = conf->raid_disks; |
333 | 333 | ||
334 | sprintf(conf->cache_name, "raid6/%s", mdname(conf->mddev)); | 334 | sprintf(conf->cache_name[0], "raid6/%s", mdname(conf->mddev)); |
335 | 335 | ||
336 | sc = kmem_cache_create(conf->cache_name, | 336 | sc = kmem_cache_create(conf->cache_name[0], |
337 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), | 337 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), |
338 | 0, 0, NULL, NULL); | 338 | 0, 0, NULL, NULL); |
339 | if (!sc) | 339 | if (!sc) |
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 94dbdd406f12..b7b2653af7bb 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h | |||
@@ -216,7 +216,11 @@ struct raid5_private_data { | |||
216 | struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ | 216 | struct list_head bitmap_list; /* stripes delaying awaiting bitmap update */ |
217 | atomic_t preread_active_stripes; /* stripes with scheduled io */ | 217 | atomic_t preread_active_stripes; /* stripes with scheduled io */ |
218 | 218 | ||
219 | char cache_name[20]; | 219 | /* unfortunately we need two cache names as we temporarily have |
220 | * two caches. | ||
221 | */ | ||
222 | int active_name; | ||
223 | char cache_name[2][20]; | ||
220 | kmem_cache_t *slab_cache; /* for allocating stripes */ | 224 | kmem_cache_t *slab_cache; /* for allocating stripes */ |
221 | 225 | ||
222 | int seq_flush, seq_write; | 226 | int seq_flush, seq_write; |
@@ -238,7 +242,8 @@ struct raid5_private_data { | |||
238 | wait_queue_head_t wait_for_overlap; | 242 | wait_queue_head_t wait_for_overlap; |
239 | int inactive_blocked; /* release of inactive stripes blocked, | 243 | int inactive_blocked; /* release of inactive stripes blocked, |
240 | * waiting for 25% to be free | 244 | * waiting for 25% to be free |
241 | */ | 245 | */ |
246 | int pool_size; /* number of disks in stripeheads in pool */ | ||
242 | spinlock_t device_lock; | 247 | spinlock_t device_lock; |
243 | struct disk_info *disks; | 248 | struct disk_info *disks; |
244 | }; | 249 | }; |