aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2015-02-25 20:47:56 -0500
committerNeilBrown <neilb@suse.de>2015-04-21 18:00:43 -0400
commitedbe83ab4c27ea6669eb57adb5ed7eaec1118ceb (patch)
tree0bfa3622e7c297cd7fc2b42a56bc5006ff87bfdc /drivers/md
parent5423399a84ee1d92d29d763029ed40e4905cf50f (diff)
md/raid5: allow the stripe_cache to grow and shrink.
The default setting of 256 stripe_heads is probably much too small for many configurations. So it is best to make it auto-configure. Shrinking the cache under memory pressure is easy. The only interesting part here is that we put a fairly high cost ('seeks') on shrinking the cache as the cost is greater than just having to read more data, it reduces parallelism. Growing the cache on demand needs to be done carefully. If we allow fast growth, that can upset memory balance as lots of dirty memory can quickly turn into lots of memory queued in the stripe_cache. It is important for the raid5 block device to appear congested to allow write-throttling to work. So we only add stripes slowly. We set a flag when an allocation fails because all stripes are in use, allocate at a convenient time when that flag is set, and don't allow it to be set again until at least one stripe_head has been released for re-use. This means that a spurt of requests will only cause one stripe_head to be allocated, but a steady stream of requests will slowly increase the cache size - until memory pressure puts it back again. It could take hours to reach a steady state. The value written to, and displayed in, stripe_cache_size is used as a minimum. The cache can grow above this and shrink back down to it. The actual size is not directly visible, though it can be deduced to some extent by watching stripe_cache_active. Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid5.c68
-rw-r--r--drivers/md/raid5.h11
2 files changed, 71 insertions, 8 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b7cd32e7f29e..9716319cc477 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -672,8 +672,13 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
672 *(conf->hash_locks + hash)); 672 *(conf->hash_locks + hash));
673 sh = __find_stripe(conf, sector, conf->generation - previous); 673 sh = __find_stripe(conf, sector, conf->generation - previous);
674 if (!sh) { 674 if (!sh) {
675 if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) 675 if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
676 sh = get_free_stripe(conf, hash); 676 sh = get_free_stripe(conf, hash);
677 if (!sh && llist_empty(&conf->released_stripes) &&
678 !test_bit(R5_DID_ALLOC, &conf->cache_state))
679 set_bit(R5_ALLOC_MORE,
680 &conf->cache_state);
681 }
677 if (noblock && sh == NULL) 682 if (noblock && sh == NULL)
678 break; 683 break;
679 if (!sh) { 684 if (!sh) {
@@ -5761,6 +5766,8 @@ static void raid5d(struct md_thread *thread)
5761 int batch_size, released; 5766 int batch_size, released;
5762 5767
5763 released = release_stripe_list(conf, conf->temp_inactive_list); 5768 released = release_stripe_list(conf, conf->temp_inactive_list);
5769 if (released)
5770 clear_bit(R5_DID_ALLOC, &conf->cache_state);
5764 5771
5765 if ( 5772 if (
5766 !list_empty(&conf->bitmap_list)) { 5773 !list_empty(&conf->bitmap_list)) {
@@ -5799,6 +5806,13 @@ static void raid5d(struct md_thread *thread)
5799 pr_debug("%d stripes handled\n", handled); 5806 pr_debug("%d stripes handled\n", handled);
5800 5807
5801 spin_unlock_irq(&conf->device_lock); 5808 spin_unlock_irq(&conf->device_lock);
5809 if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) {
5810 grow_one_stripe(conf, __GFP_NOWARN);
5811 /* Set flag even if allocation failed. This helps
5812 * slow down allocation requests when mem is short
5813 */
5814 set_bit(R5_DID_ALLOC, &conf->cache_state);
5815 }
5802 5816
5803 async_tx_issue_pending_all(); 5817 async_tx_issue_pending_all();
5804 blk_finish_plug(&plug); 5818 blk_finish_plug(&plug);
@@ -5814,7 +5828,7 @@ raid5_show_stripe_cache_size(struct mddev *mddev, char *page)
5814 spin_lock(&mddev->lock); 5828 spin_lock(&mddev->lock);
5815 conf = mddev->private; 5829 conf = mddev->private;
5816 if (conf) 5830 if (conf)
5817 ret = sprintf(page, "%d\n", conf->max_nr_stripes); 5831 ret = sprintf(page, "%d\n", conf->min_nr_stripes);
5818 spin_unlock(&mddev->lock); 5832 spin_unlock(&mddev->lock);
5819 return ret; 5833 return ret;
5820} 5834}
@@ -5828,10 +5842,12 @@ raid5_set_cache_size(struct mddev *mddev, int size)
5828 if (size <= 16 || size > 32768) 5842 if (size <= 16 || size > 32768)
5829 return -EINVAL; 5843 return -EINVAL;
5830 5844
5845 conf->min_nr_stripes = size;
5831 while (size < conf->max_nr_stripes && 5846 while (size < conf->max_nr_stripes &&
5832 drop_one_stripe(conf)) 5847 drop_one_stripe(conf))
5833 ; 5848 ;
5834 5849
5850
5835 err = md_allow_write(mddev); 5851 err = md_allow_write(mddev);
5836 if (err) 5852 if (err)
5837 return err; 5853 return err;
@@ -5947,7 +5963,7 @@ raid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len)
5947 conf = mddev->private; 5963 conf = mddev->private;
5948 if (!conf) 5964 if (!conf)
5949 err = -ENODEV; 5965 err = -ENODEV;
5950 else if (new > conf->max_nr_stripes) 5966 else if (new > conf->min_nr_stripes)
5951 err = -EINVAL; 5967 err = -EINVAL;
5952 else 5968 else
5953 conf->bypass_threshold = new; 5969 conf->bypass_threshold = new;
@@ -6228,6 +6244,8 @@ static void raid5_free_percpu(struct r5conf *conf)
6228 6244
6229static void free_conf(struct r5conf *conf) 6245static void free_conf(struct r5conf *conf)
6230{ 6246{
6247 if (conf->shrinker.seeks)
6248 unregister_shrinker(&conf->shrinker);
6231 free_thread_groups(conf); 6249 free_thread_groups(conf);
6232 shrink_stripes(conf); 6250 shrink_stripes(conf);
6233 raid5_free_percpu(conf); 6251 raid5_free_percpu(conf);
@@ -6295,6 +6313,30 @@ static int raid5_alloc_percpu(struct r5conf *conf)
6295 return err; 6313 return err;
6296} 6314}
6297 6315
6316static unsigned long raid5_cache_scan(struct shrinker *shrink,
6317 struct shrink_control *sc)
6318{
6319 struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
6320 int ret = 0;
6321 while (ret < sc->nr_to_scan) {
6322 if (drop_one_stripe(conf) == 0)
6323 return SHRINK_STOP;
6324 ret++;
6325 }
6326 return ret;
6327}
6328
6329static unsigned long raid5_cache_count(struct shrinker *shrink,
6330 struct shrink_control *sc)
6331{
6332 struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
6333
6334 if (conf->max_nr_stripes < conf->min_nr_stripes)
6335 /* unlikely, but not impossible */
6336 return 0;
6337 return conf->max_nr_stripes - conf->min_nr_stripes;
6338}
6339
6298static struct r5conf *setup_conf(struct mddev *mddev) 6340static struct r5conf *setup_conf(struct mddev *mddev)
6299{ 6341{
6300 struct r5conf *conf; 6342 struct r5conf *conf;
@@ -6445,10 +6487,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
6445 conf->prev_algo = mddev->layout; 6487 conf->prev_algo = mddev->layout;
6446 } 6488 }
6447 6489
6448 memory = NR_STRIPES * (sizeof(struct stripe_head) + 6490 conf->min_nr_stripes = NR_STRIPES;
6491 memory = conf->min_nr_stripes * (sizeof(struct stripe_head) +
6449 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; 6492 max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024;
6450 atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); 6493 atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS);
6451 if (grow_stripes(conf, NR_STRIPES)) { 6494 if (grow_stripes(conf, conf->min_nr_stripes)) {
6452 printk(KERN_ERR 6495 printk(KERN_ERR
6453 "md/raid:%s: couldn't allocate %dkB for buffers\n", 6496 "md/raid:%s: couldn't allocate %dkB for buffers\n",
6454 mdname(mddev), memory); 6497 mdname(mddev), memory);
@@ -6456,6 +6499,17 @@ static struct r5conf *setup_conf(struct mddev *mddev)
6456 } else 6499 } else
6457 printk(KERN_INFO "md/raid:%s: allocated %dkB\n", 6500 printk(KERN_INFO "md/raid:%s: allocated %dkB\n",
6458 mdname(mddev), memory); 6501 mdname(mddev), memory);
6502 /*
6503 * Losing a stripe head costs more than the time to refill it,
6504 * it reduces the queue depth and so can hurt throughput.
6505 * So set it rather large, scaled by number of devices.
6506 */
6507 conf->shrinker.seeks = DEFAULT_SEEKS * conf->raid_disks * 4;
6508 conf->shrinker.scan_objects = raid5_cache_scan;
6509 conf->shrinker.count_objects = raid5_cache_count;
6510 conf->shrinker.batch = 128;
6511 conf->shrinker.flags = 0;
6512 register_shrinker(&conf->shrinker);
6459 6513
6460 sprintf(pers_name, "raid%d", mddev->new_level); 6514 sprintf(pers_name, "raid%d", mddev->new_level);
6461 conf->thread = md_register_thread(raid5d, mddev, pers_name); 6515 conf->thread = md_register_thread(raid5d, mddev, pers_name);
@@ -7097,9 +7151,9 @@ static int check_stripe_cache(struct mddev *mddev)
7097 */ 7151 */
7098 struct r5conf *conf = mddev->private; 7152 struct r5conf *conf = mddev->private;
7099 if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4 7153 if (((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4
7100 > conf->max_nr_stripes || 7154 > conf->min_nr_stripes ||
7101 ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4 7155 ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4
7102 > conf->max_nr_stripes) { 7156 > conf->min_nr_stripes) {
7103 printk(KERN_WARNING "md/raid:%s: reshape: not enough stripes. Needed %lu\n", 7157 printk(KERN_WARNING "md/raid:%s: reshape: not enough stripes. Needed %lu\n",
7104 mdname(mddev), 7158 mdname(mddev),
7105 ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9) 7159 ((max(mddev->chunk_sectors, mddev->new_chunk_sectors) << 9)
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index ebe4e24bc14d..7dc0dd86074b 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -433,6 +433,7 @@ struct r5conf {
433 int max_degraded; 433 int max_degraded;
434 int raid_disks; 434 int raid_disks;
435 int max_nr_stripes; 435 int max_nr_stripes;
436 int min_nr_stripes;
436 437
437 /* reshape_progress is the leading edge of a 'reshape' 438 /* reshape_progress is the leading edge of a 'reshape'
438 * It has value MaxSector when no reshape is happening 439 * It has value MaxSector when no reshape is happening
@@ -513,7 +514,15 @@ struct r5conf {
513#define R5_INACTIVE_BLOCKED 1 /* release of inactive stripes blocked, 514#define R5_INACTIVE_BLOCKED 1 /* release of inactive stripes blocked,
514 * waiting for 25% to be free 515 * waiting for 25% to be free
515 */ 516 */
516 517#define R5_ALLOC_MORE 2 /* It might help to allocate another
518 * stripe.
519 */
520#define R5_DID_ALLOC 4 /* A stripe was allocated, don't allocate
521 * more until at least one has been
522 * released. This avoids flooding
523 * the cache.
524 */
525 struct shrinker shrinker;
517 int pool_size; /* number of disks in stripeheads in pool */ 526 int pool_size; /* number of disks in stripeheads in pool */
518 spinlock_t device_lock; 527 spinlock_t device_lock;
519 struct disk_info *disks; 528 struct disk_info *disks;