diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 131 |
1 files changed, 127 insertions, 4 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 03f31379cebb..6c20b44509d8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -313,20 +313,143 @@ static int grow_stripes(raid5_conf_t *conf, int num) | |||
313 | kmem_cache_t *sc; | 313 | kmem_cache_t *sc; |
314 | int devs = conf->raid_disks; | 314 | int devs = conf->raid_disks; |
315 | 315 | ||
316 | sprintf(conf->cache_name, "raid5/%s", mdname(conf->mddev)); | 316 | sprintf(conf->cache_name[0], "raid5/%s", mdname(conf->mddev)); |
317 | 317 | sprintf(conf->cache_name[1], "raid5/%s-alt", mdname(conf->mddev)); | |
318 | sc = kmem_cache_create(conf->cache_name, | 318 | conf->active_name = 0; |
319 | sc = kmem_cache_create(conf->cache_name[conf->active_name], | ||
319 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), | 320 | sizeof(struct stripe_head)+(devs-1)*sizeof(struct r5dev), |
320 | 0, 0, NULL, NULL); | 321 | 0, 0, NULL, NULL); |
321 | if (!sc) | 322 | if (!sc) |
322 | return 1; | 323 | return 1; |
323 | conf->slab_cache = sc; | 324 | conf->slab_cache = sc; |
325 | conf->pool_size = devs; | ||
324 | while (num--) { | 326 | while (num--) { |
325 | if (!grow_one_stripe(conf)) | 327 | if (!grow_one_stripe(conf)) |
326 | return 1; | 328 | return 1; |
327 | } | 329 | } |
328 | return 0; | 330 | return 0; |
329 | } | 331 | } |
332 | static int resize_stripes(raid5_conf_t *conf, int newsize) | ||
333 | { | ||
334 | /* Make all the stripes able to hold 'newsize' devices. | ||
335 | * New slots in each stripe get 'page' set to a new page. | ||
336 | * | ||
337 | * This happens in stages: | ||
338 | * 1/ create a new kmem_cache and allocate the required number of | ||
339 | * stripe_heads. | ||
340 | * 2/ gather all the old stripe_heads and tranfer the pages across | ||
341 | * to the new stripe_heads. This will have the side effect of | ||
342 | * freezing the array as once all stripe_heads have been collected, | ||
343 | * no IO will be possible. Old stripe heads are freed once their | ||
344 | * pages have been transferred over, and the old kmem_cache is | ||
345 | * freed when all stripes are done. | ||
346 | * 3/ reallocate conf->disks to be suitable bigger. If this fails, | ||
347 | * we simple return a failre status - no need to clean anything up. | ||
348 | * 4/ allocate new pages for the new slots in the new stripe_heads. | ||
349 | * If this fails, we don't bother trying the shrink the | ||
350 | * stripe_heads down again, we just leave them as they are. | ||
351 | * As each stripe_head is processed the new one is released into | ||
352 | * active service. | ||
353 | * | ||
354 | * Once step2 is started, we cannot afford to wait for a write, | ||
355 | * so we use GFP_NOIO allocations. | ||
356 | */ | ||
357 | struct stripe_head *osh, *nsh; | ||
358 | LIST_HEAD(newstripes); | ||
359 | struct disk_info *ndisks; | ||
360 | int err = 0; | ||
361 | kmem_cache_t *sc; | ||
362 | int i; | ||
363 | |||
364 | if (newsize <= conf->pool_size) | ||
365 | return 0; /* never bother to shrink */ | ||
366 | |||
367 | /* Step 1 */ | ||
368 | sc = kmem_cache_create(conf->cache_name[1-conf->active_name], | ||
369 | sizeof(struct stripe_head)+(newsize-1)*sizeof(struct r5dev), | ||
370 | 0, 0, NULL, NULL); | ||
371 | if (!sc) | ||
372 | return -ENOMEM; | ||
373 | |||
374 | for (i = conf->max_nr_stripes; i; i--) { | ||
375 | nsh = kmem_cache_alloc(sc, GFP_KERNEL); | ||
376 | if (!nsh) | ||
377 | break; | ||
378 | |||
379 | memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev)); | ||
380 | |||
381 | nsh->raid_conf = conf; | ||
382 | spin_lock_init(&nsh->lock); | ||
383 | |||
384 | list_add(&nsh->lru, &newstripes); | ||
385 | } | ||
386 | if (i) { | ||
387 | /* didn't get enough, give up */ | ||
388 | while (!list_empty(&newstripes)) { | ||
389 | nsh = list_entry(newstripes.next, struct stripe_head, lru); | ||
390 | list_del(&nsh->lru); | ||
391 | kmem_cache_free(sc, nsh); | ||
392 | } | ||
393 | kmem_cache_destroy(sc); | ||
394 | return -ENOMEM; | ||
395 | } | ||
396 | /* Step 2 - Must use GFP_NOIO now. | ||
397 | * OK, we have enough stripes, start collecting inactive | ||
398 | * stripes and copying them over | ||
399 | */ | ||
400 | list_for_each_entry(nsh, &newstripes, lru) { | ||
401 | spin_lock_irq(&conf->device_lock); | ||
402 | wait_event_lock_irq(conf->wait_for_stripe, | ||
403 | !list_empty(&conf->inactive_list), | ||
404 | conf->device_lock, | ||
405 | unplug_slaves(conf->mddev); | ||
406 | ); | ||
407 | osh = get_free_stripe(conf); | ||
408 | spin_unlock_irq(&conf->device_lock); | ||
409 | atomic_set(&nsh->count, 1); | ||
410 | for(i=0; i<conf->pool_size; i++) | ||
411 | nsh->dev[i].page = osh->dev[i].page; | ||
412 | for( ; i<newsize; i++) | ||
413 | nsh->dev[i].page = NULL; | ||
414 | kmem_cache_free(conf->slab_cache, osh); | ||
415 | } | ||
416 | kmem_cache_destroy(conf->slab_cache); | ||
417 | |||
418 | /* Step 3. | ||
419 | * At this point, we are holding all the stripes so the array | ||
420 | * is completely stalled, so now is a good time to resize | ||
421 | * conf->disks. | ||
422 | */ | ||
423 | ndisks = kzalloc(newsize * sizeof(struct disk_info), GFP_NOIO); | ||
424 | if (ndisks) { | ||
425 | for (i=0; i<conf->raid_disks; i++) | ||
426 | ndisks[i] = conf->disks[i]; | ||
427 | kfree(conf->disks); | ||
428 | conf->disks = ndisks; | ||
429 | } else | ||
430 | err = -ENOMEM; | ||
431 | |||
432 | /* Step 4, return new stripes to service */ | ||
433 | while(!list_empty(&newstripes)) { | ||
434 | nsh = list_entry(newstripes.next, struct stripe_head, lru); | ||
435 | list_del_init(&nsh->lru); | ||
436 | for (i=conf->raid_disks; i < newsize; i++) | ||
437 | if (nsh->dev[i].page == NULL) { | ||
438 | struct page *p = alloc_page(GFP_NOIO); | ||
439 | nsh->dev[i].page = p; | ||
440 | if (!p) | ||
441 | err = -ENOMEM; | ||
442 | } | ||
443 | release_stripe(nsh); | ||
444 | } | ||
445 | /* critical section pass, GFP_NOIO no longer needed */ | ||
446 | |||
447 | conf->slab_cache = sc; | ||
448 | conf->active_name = 1-conf->active_name; | ||
449 | conf->pool_size = newsize; | ||
450 | return err; | ||
451 | } | ||
452 | |||
330 | 453 | ||
331 | static int drop_one_stripe(raid5_conf_t *conf) | 454 | static int drop_one_stripe(raid5_conf_t *conf) |
332 | { | 455 | { |
@@ -339,7 +462,7 @@ static int drop_one_stripe(raid5_conf_t *conf) | |||
339 | return 0; | 462 | return 0; |
340 | if (atomic_read(&sh->count)) | 463 | if (atomic_read(&sh->count)) |
341 | BUG(); | 464 | BUG(); |
342 | shrink_buffers(sh, conf->raid_disks); | 465 | shrink_buffers(sh, conf->pool_size); |
343 | kmem_cache_free(conf->slab_cache, sh); | 466 | kmem_cache_free(conf->slab_cache, sh); |
344 | atomic_dec(&conf->active_stripes); | 467 | atomic_dec(&conf->active_stripes); |
345 | return 1; | 468 | return 1; |