diff options
author | Artur Paszkiewicz <artur.paszkiewicz@intel.com> | 2017-04-04 07:13:57 -0400 |
---|---|---|
committer | Shaohua Li <shli@fb.com> | 2017-04-10 15:00:49 -0400 |
commit | 845b9e229fe0716ab6b4d94b4364c99069667b59 (patch) | |
tree | 8c0ac896475505669b3f8665d053d2660d997e29 /drivers/md/raid5.c | |
parent | 94568f64af50bb37c418b200449698cfe7e1da5f (diff) |
raid5-ppl: use resize_stripes() when enabling or disabling ppl
Use resize_stripes() instead of raid5_reset_stripe_cache() to allocate
or free sh->ppl_page at runtime for all stripes in the stripe cache.
raid5_reset_stripe_cache() required suspending the mddev and could
deadlock because of GFP_KERNEL allocations.
Move the 'newsize' check to check_reshape() to allow reallocating the
stripes with the same number of disks. Allocate sh->ppl_page in
alloc_stripe() instead of grow_buffers(). Pass 'struct r5conf *conf' as
a parameter to alloc_stripe() because it is needed to check whether to
allocate ppl_page. Add free_stripe() and use it to free stripes rather
than directly call kmem_cache_free(). Also free sh->ppl_page in
free_stripe().
Set MD_HAS_PPL at the end of ppl_init_log() instead of explicitly
setting it in advance and add another parameter to log_init() to allow
calling ppl_init_log() without the bit set. Don't try to calculate
partial parity or add a stripe to log if it does not have ppl_page set.
Enabling ppl can now be performed without suspending the mddev, because
the log won't be used until new stripes are allocated with ppl_page.
Calling mddev_suspend/resume is still necessary when disabling ppl,
because we want all stripes to finish before stopping the log, but
resize_stripes() can be called after mddev_resume() when ppl is no
longer active.
Suggested-by: NeilBrown <neilb@suse.com>
Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@intel.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 88 |
1 files changed, 38 insertions, 50 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 09d94ad5e52b..e04d7b11bc87 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -471,11 +471,6 @@ static void shrink_buffers(struct stripe_head *sh) | |||
471 | sh->dev[i].page = NULL; | 471 | sh->dev[i].page = NULL; |
472 | put_page(p); | 472 | put_page(p); |
473 | } | 473 | } |
474 | |||
475 | if (sh->ppl_page) { | ||
476 | put_page(sh->ppl_page); | ||
477 | sh->ppl_page = NULL; | ||
478 | } | ||
479 | } | 474 | } |
480 | 475 | ||
481 | static int grow_buffers(struct stripe_head *sh, gfp_t gfp) | 476 | static int grow_buffers(struct stripe_head *sh, gfp_t gfp) |
@@ -493,12 +488,6 @@ static int grow_buffers(struct stripe_head *sh, gfp_t gfp) | |||
493 | sh->dev[i].orig_page = page; | 488 | sh->dev[i].orig_page = page; |
494 | } | 489 | } |
495 | 490 | ||
496 | if (raid5_has_ppl(sh->raid_conf)) { | ||
497 | sh->ppl_page = alloc_page(gfp); | ||
498 | if (!sh->ppl_page) | ||
499 | return 1; | ||
500 | } | ||
501 | |||
502 | return 0; | 491 | return 0; |
503 | } | 492 | } |
504 | 493 | ||
@@ -2132,8 +2121,15 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) | |||
2132 | put_cpu(); | 2121 | put_cpu(); |
2133 | } | 2122 | } |
2134 | 2123 | ||
2124 | static void free_stripe(struct kmem_cache *sc, struct stripe_head *sh) | ||
2125 | { | ||
2126 | if (sh->ppl_page) | ||
2127 | __free_page(sh->ppl_page); | ||
2128 | kmem_cache_free(sc, sh); | ||
2129 | } | ||
2130 | |||
2135 | static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, | 2131 | static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, |
2136 | int disks) | 2132 | int disks, struct r5conf *conf) |
2137 | { | 2133 | { |
2138 | struct stripe_head *sh; | 2134 | struct stripe_head *sh; |
2139 | int i; | 2135 | int i; |
@@ -2147,6 +2143,7 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, | |||
2147 | INIT_LIST_HEAD(&sh->r5c); | 2143 | INIT_LIST_HEAD(&sh->r5c); |
2148 | INIT_LIST_HEAD(&sh->log_list); | 2144 | INIT_LIST_HEAD(&sh->log_list); |
2149 | atomic_set(&sh->count, 1); | 2145 | atomic_set(&sh->count, 1); |
2146 | sh->raid_conf = conf; | ||
2150 | sh->log_start = MaxSector; | 2147 | sh->log_start = MaxSector; |
2151 | for (i = 0; i < disks; i++) { | 2148 | for (i = 0; i < disks; i++) { |
2152 | struct r5dev *dev = &sh->dev[i]; | 2149 | struct r5dev *dev = &sh->dev[i]; |
@@ -2154,6 +2151,14 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, | |||
2154 | bio_init(&dev->req, &dev->vec, 1); | 2151 | bio_init(&dev->req, &dev->vec, 1); |
2155 | bio_init(&dev->rreq, &dev->rvec, 1); | 2152 | bio_init(&dev->rreq, &dev->rvec, 1); |
2156 | } | 2153 | } |
2154 | |||
2155 | if (raid5_has_ppl(conf)) { | ||
2156 | sh->ppl_page = alloc_page(gfp); | ||
2157 | if (!sh->ppl_page) { | ||
2158 | free_stripe(sc, sh); | ||
2159 | sh = NULL; | ||
2160 | } | ||
2161 | } | ||
2157 | } | 2162 | } |
2158 | return sh; | 2163 | return sh; |
2159 | } | 2164 | } |
@@ -2161,15 +2166,13 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) | |||
2161 | { | 2166 | { |
2162 | struct stripe_head *sh; | 2167 | struct stripe_head *sh; |
2163 | 2168 | ||
2164 | sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size); | 2169 | sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size, conf); |
2165 | if (!sh) | 2170 | if (!sh) |
2166 | return 0; | 2171 | return 0; |
2167 | 2172 | ||
2168 | sh->raid_conf = conf; | ||
2169 | |||
2170 | if (grow_buffers(sh, gfp)) { | 2173 | if (grow_buffers(sh, gfp)) { |
2171 | shrink_buffers(sh); | 2174 | shrink_buffers(sh); |
2172 | kmem_cache_free(conf->slab_cache, sh); | 2175 | free_stripe(conf->slab_cache, sh); |
2173 | return 0; | 2176 | return 0; |
2174 | } | 2177 | } |
2175 | sh->hash_lock_index = | 2178 | sh->hash_lock_index = |
@@ -2314,9 +2317,6 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
2314 | int i; | 2317 | int i; |
2315 | int hash, cnt; | 2318 | int hash, cnt; |
2316 | 2319 | ||
2317 | if (newsize <= conf->pool_size) | ||
2318 | return 0; /* never bother to shrink */ | ||
2319 | |||
2320 | err = md_allow_write(conf->mddev); | 2320 | err = md_allow_write(conf->mddev); |
2321 | if (err) | 2321 | if (err) |
2322 | return err; | 2322 | return err; |
@@ -2332,11 +2332,10 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
2332 | mutex_lock(&conf->cache_size_mutex); | 2332 | mutex_lock(&conf->cache_size_mutex); |
2333 | 2333 | ||
2334 | for (i = conf->max_nr_stripes; i; i--) { | 2334 | for (i = conf->max_nr_stripes; i; i--) { |
2335 | nsh = alloc_stripe(sc, GFP_KERNEL, newsize); | 2335 | nsh = alloc_stripe(sc, GFP_KERNEL, newsize, conf); |
2336 | if (!nsh) | 2336 | if (!nsh) |
2337 | break; | 2337 | break; |
2338 | 2338 | ||
2339 | nsh->raid_conf = conf; | ||
2340 | list_add(&nsh->lru, &newstripes); | 2339 | list_add(&nsh->lru, &newstripes); |
2341 | } | 2340 | } |
2342 | if (i) { | 2341 | if (i) { |
@@ -2344,7 +2343,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
2344 | while (!list_empty(&newstripes)) { | 2343 | while (!list_empty(&newstripes)) { |
2345 | nsh = list_entry(newstripes.next, struct stripe_head, lru); | 2344 | nsh = list_entry(newstripes.next, struct stripe_head, lru); |
2346 | list_del(&nsh->lru); | 2345 | list_del(&nsh->lru); |
2347 | kmem_cache_free(sc, nsh); | 2346 | free_stripe(sc, nsh); |
2348 | } | 2347 | } |
2349 | kmem_cache_destroy(sc); | 2348 | kmem_cache_destroy(sc); |
2350 | mutex_unlock(&conf->cache_size_mutex); | 2349 | mutex_unlock(&conf->cache_size_mutex); |
@@ -2370,7 +2369,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
2370 | nsh->dev[i].orig_page = osh->dev[i].page; | 2369 | nsh->dev[i].orig_page = osh->dev[i].page; |
2371 | } | 2370 | } |
2372 | nsh->hash_lock_index = hash; | 2371 | nsh->hash_lock_index = hash; |
2373 | kmem_cache_free(conf->slab_cache, osh); | 2372 | free_stripe(conf->slab_cache, osh); |
2374 | cnt++; | 2373 | cnt++; |
2375 | if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS + | 2374 | if (cnt >= conf->max_nr_stripes / NR_STRIPE_HASH_LOCKS + |
2376 | !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) { | 2375 | !!((conf->max_nr_stripes % NR_STRIPE_HASH_LOCKS) > hash)) { |
@@ -2447,7 +2446,7 @@ static int drop_one_stripe(struct r5conf *conf) | |||
2447 | return 0; | 2446 | return 0; |
2448 | BUG_ON(atomic_read(&sh->count)); | 2447 | BUG_ON(atomic_read(&sh->count)); |
2449 | shrink_buffers(sh); | 2448 | shrink_buffers(sh); |
2450 | kmem_cache_free(conf->slab_cache, sh); | 2449 | free_stripe(conf->slab_cache, sh); |
2451 | atomic_dec(&conf->active_stripes); | 2450 | atomic_dec(&conf->active_stripes); |
2452 | conf->max_nr_stripes--; | 2451 | conf->max_nr_stripes--; |
2453 | return 1; | 2452 | return 1; |
@@ -3170,7 +3169,7 @@ schedule_reconstruction(struct stripe_head *sh, struct stripe_head_state *s, | |||
3170 | s->locked++; | 3169 | s->locked++; |
3171 | } | 3170 | } |
3172 | 3171 | ||
3173 | if (raid5_has_ppl(sh->raid_conf) && | 3172 | if (raid5_has_ppl(sh->raid_conf) && sh->ppl_page && |
3174 | test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) && | 3173 | test_bit(STRIPE_OP_BIODRAIN, &s->ops_request) && |
3175 | !test_bit(STRIPE_FULL_WRITE, &sh->state) && | 3174 | !test_bit(STRIPE_FULL_WRITE, &sh->state) && |
3176 | test_bit(R5_Insync, &sh->dev[pd_idx].flags)) | 3175 | test_bit(R5_Insync, &sh->dev[pd_idx].flags)) |
@@ -7427,7 +7426,7 @@ static int raid5_run(struct mddev *mddev) | |||
7427 | blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); | 7426 | blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); |
7428 | } | 7427 | } |
7429 | 7428 | ||
7430 | if (log_init(conf, journal_dev)) | 7429 | if (log_init(conf, journal_dev, raid5_has_ppl(conf))) |
7431 | goto abort; | 7430 | goto abort; |
7432 | 7431 | ||
7433 | return 0; | 7432 | return 0; |
@@ -7636,7 +7635,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) | |||
7636 | * The array is in readonly mode if journal is missing, so no | 7635 | * The array is in readonly mode if journal is missing, so no |
7637 | * write requests running. We should be safe | 7636 | * write requests running. We should be safe |
7638 | */ | 7637 | */ |
7639 | log_init(conf, rdev); | 7638 | log_init(conf, rdev, false); |
7640 | return 0; | 7639 | return 0; |
7641 | } | 7640 | } |
7642 | if (mddev->recovery_disabled == conf->recovery_disabled) | 7641 | if (mddev->recovery_disabled == conf->recovery_disabled) |
@@ -7786,6 +7785,9 @@ static int check_reshape(struct mddev *mddev) | |||
7786 | mddev->chunk_sectors) | 7785 | mddev->chunk_sectors) |
7787 | ) < 0) | 7786 | ) < 0) |
7788 | return -ENOMEM; | 7787 | return -ENOMEM; |
7788 | |||
7789 | if (conf->previous_raid_disks + mddev->delta_disks <= conf->pool_size) | ||
7790 | return 0; /* never bother to shrink */ | ||
7789 | return resize_stripes(conf, (conf->previous_raid_disks | 7791 | return resize_stripes(conf, (conf->previous_raid_disks |
7790 | + mddev->delta_disks)); | 7792 | + mddev->delta_disks)); |
7791 | } | 7793 | } |
@@ -8276,20 +8278,6 @@ static void *raid6_takeover(struct mddev *mddev) | |||
8276 | return setup_conf(mddev); | 8278 | return setup_conf(mddev); |
8277 | } | 8279 | } |
8278 | 8280 | ||
8279 | static void raid5_reset_stripe_cache(struct mddev *mddev) | ||
8280 | { | ||
8281 | struct r5conf *conf = mddev->private; | ||
8282 | |||
8283 | mutex_lock(&conf->cache_size_mutex); | ||
8284 | while (conf->max_nr_stripes && | ||
8285 | drop_one_stripe(conf)) | ||
8286 | ; | ||
8287 | while (conf->min_nr_stripes > conf->max_nr_stripes && | ||
8288 | grow_one_stripe(conf, GFP_KERNEL)) | ||
8289 | ; | ||
8290 | mutex_unlock(&conf->cache_size_mutex); | ||
8291 | } | ||
8292 | |||
8293 | static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf) | 8281 | static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf) |
8294 | { | 8282 | { |
8295 | struct r5conf *conf; | 8283 | struct r5conf *conf; |
@@ -8304,23 +8292,23 @@ static int raid5_change_consistency_policy(struct mddev *mddev, const char *buf) | |||
8304 | return -ENODEV; | 8292 | return -ENODEV; |
8305 | } | 8293 | } |
8306 | 8294 | ||
8307 | if (strncmp(buf, "ppl", 3) == 0 && !raid5_has_ppl(conf)) { | 8295 | if (strncmp(buf, "ppl", 3) == 0) { |
8308 | /* ppl only works with RAID 5 */ | 8296 | /* ppl only works with RAID 5 */ |
8309 | if (conf->level == 5) { | 8297 | if (!raid5_has_ppl(conf) && conf->level == 5) { |
8310 | mddev_suspend(mddev); | 8298 | err = log_init(conf, NULL, true); |
8311 | set_bit(MD_HAS_PPL, &mddev->flags); | 8299 | if (!err) { |
8312 | err = log_init(conf, NULL); | 8300 | err = resize_stripes(conf, conf->pool_size); |
8313 | if (!err) | 8301 | if (err) |
8314 | raid5_reset_stripe_cache(mddev); | 8302 | log_exit(conf); |
8315 | mddev_resume(mddev); | 8303 | } |
8316 | } else | 8304 | } else |
8317 | err = -EINVAL; | 8305 | err = -EINVAL; |
8318 | } else if (strncmp(buf, "resync", 6) == 0) { | 8306 | } else if (strncmp(buf, "resync", 6) == 0) { |
8319 | if (raid5_has_ppl(conf)) { | 8307 | if (raid5_has_ppl(conf)) { |
8320 | mddev_suspend(mddev); | 8308 | mddev_suspend(mddev); |
8321 | log_exit(conf); | 8309 | log_exit(conf); |
8322 | raid5_reset_stripe_cache(mddev); | ||
8323 | mddev_resume(mddev); | 8310 | mddev_resume(mddev); |
8311 | err = resize_stripes(conf, conf->pool_size); | ||
8324 | } else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) && | 8312 | } else if (test_bit(MD_HAS_JOURNAL, &conf->mddev->flags) && |
8325 | r5l_log_disk_error(conf)) { | 8313 | r5l_log_disk_error(conf)) { |
8326 | bool journal_dev_exists = false; | 8314 | bool journal_dev_exists = false; |