diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 84 |
1 files changed, 60 insertions, 24 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b6793d2e051f..f757023fc458 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -344,7 +344,8 @@ static void release_inactive_stripe_list(struct r5conf *conf, | |||
344 | int hash) | 344 | int hash) |
345 | { | 345 | { |
346 | int size; | 346 | int size; |
347 | bool do_wakeup = false; | 347 | unsigned long do_wakeup = 0; |
348 | int i = 0; | ||
348 | unsigned long flags; | 349 | unsigned long flags; |
349 | 350 | ||
350 | if (hash == NR_STRIPE_HASH_LOCKS) { | 351 | if (hash == NR_STRIPE_HASH_LOCKS) { |
@@ -365,15 +366,21 @@ static void release_inactive_stripe_list(struct r5conf *conf, | |||
365 | !list_empty(list)) | 366 | !list_empty(list)) |
366 | atomic_dec(&conf->empty_inactive_list_nr); | 367 | atomic_dec(&conf->empty_inactive_list_nr); |
367 | list_splice_tail_init(list, conf->inactive_list + hash); | 368 | list_splice_tail_init(list, conf->inactive_list + hash); |
368 | do_wakeup = true; | 369 | do_wakeup |= 1 << hash; |
369 | spin_unlock_irqrestore(conf->hash_locks + hash, flags); | 370 | spin_unlock_irqrestore(conf->hash_locks + hash, flags); |
370 | } | 371 | } |
371 | size--; | 372 | size--; |
372 | hash--; | 373 | hash--; |
373 | } | 374 | } |
374 | 375 | ||
376 | for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { | ||
377 | if (do_wakeup & (1 << i)) | ||
378 | wake_up(&conf->wait_for_stripe[i]); | ||
379 | } | ||
380 | |||
375 | if (do_wakeup) { | 381 | if (do_wakeup) { |
376 | wake_up(&conf->wait_for_stripe); | 382 | if (atomic_read(&conf->active_stripes) == 0) |
383 | wake_up(&conf->wait_for_quiescent); | ||
377 | if (conf->retry_read_aligned) | 384 | if (conf->retry_read_aligned) |
378 | md_wakeup_thread(conf->mddev->thread); | 385 | md_wakeup_thread(conf->mddev->thread); |
379 | } | 386 | } |
@@ -667,15 +674,15 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
667 | spin_lock_irq(conf->hash_locks + hash); | 674 | spin_lock_irq(conf->hash_locks + hash); |
668 | 675 | ||
669 | do { | 676 | do { |
670 | wait_event_lock_irq(conf->wait_for_stripe, | 677 | wait_event_lock_irq(conf->wait_for_quiescent, |
671 | conf->quiesce == 0 || noquiesce, | 678 | conf->quiesce == 0 || noquiesce, |
672 | *(conf->hash_locks + hash)); | 679 | *(conf->hash_locks + hash)); |
673 | sh = __find_stripe(conf, sector, conf->generation - previous); | 680 | sh = __find_stripe(conf, sector, conf->generation - previous); |
674 | if (!sh) { | 681 | if (!sh) { |
675 | if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) { | 682 | if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) { |
676 | sh = get_free_stripe(conf, hash); | 683 | sh = get_free_stripe(conf, hash); |
677 | if (!sh && llist_empty(&conf->released_stripes) && | 684 | if (!sh && !test_bit(R5_DID_ALLOC, |
678 | !test_bit(R5_DID_ALLOC, &conf->cache_state)) | 685 | &conf->cache_state)) |
679 | set_bit(R5_ALLOC_MORE, | 686 | set_bit(R5_ALLOC_MORE, |
680 | &conf->cache_state); | 687 | &conf->cache_state); |
681 | } | 688 | } |
@@ -684,14 +691,15 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
684 | if (!sh) { | 691 | if (!sh) { |
685 | set_bit(R5_INACTIVE_BLOCKED, | 692 | set_bit(R5_INACTIVE_BLOCKED, |
686 | &conf->cache_state); | 693 | &conf->cache_state); |
687 | wait_event_lock_irq( | 694 | wait_event_exclusive_cmd( |
688 | conf->wait_for_stripe, | 695 | conf->wait_for_stripe[hash], |
689 | !list_empty(conf->inactive_list + hash) && | 696 | !list_empty(conf->inactive_list + hash) && |
690 | (atomic_read(&conf->active_stripes) | 697 | (atomic_read(&conf->active_stripes) |
691 | < (conf->max_nr_stripes * 3 / 4) | 698 | < (conf->max_nr_stripes * 3 / 4) |
692 | || !test_bit(R5_INACTIVE_BLOCKED, | 699 | || !test_bit(R5_INACTIVE_BLOCKED, |
693 | &conf->cache_state)), | 700 | &conf->cache_state)), |
694 | *(conf->hash_locks + hash)); | 701 | spin_unlock_irq(conf->hash_locks + hash), |
702 | spin_lock_irq(conf->hash_locks + hash)); | ||
695 | clear_bit(R5_INACTIVE_BLOCKED, | 703 | clear_bit(R5_INACTIVE_BLOCKED, |
696 | &conf->cache_state); | 704 | &conf->cache_state); |
697 | } else { | 705 | } else { |
@@ -716,6 +724,9 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
716 | } | 724 | } |
717 | } while (sh == NULL); | 725 | } while (sh == NULL); |
718 | 726 | ||
727 | if (!list_empty(conf->inactive_list + hash)) | ||
728 | wake_up(&conf->wait_for_stripe[hash]); | ||
729 | |||
719 | spin_unlock_irq(conf->hash_locks + hash); | 730 | spin_unlock_irq(conf->hash_locks + hash); |
720 | return sh; | 731 | return sh; |
721 | } | 732 | } |
@@ -2151,6 +2162,9 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
2151 | if (!sc) | 2162 | if (!sc) |
2152 | return -ENOMEM; | 2163 | return -ENOMEM; |
2153 | 2164 | ||
2165 | /* Need to ensure auto-resizing doesn't interfere */ | ||
2166 | mutex_lock(&conf->cache_size_mutex); | ||
2167 | |||
2154 | for (i = conf->max_nr_stripes; i; i--) { | 2168 | for (i = conf->max_nr_stripes; i; i--) { |
2155 | nsh = alloc_stripe(sc, GFP_KERNEL); | 2169 | nsh = alloc_stripe(sc, GFP_KERNEL); |
2156 | if (!nsh) | 2170 | if (!nsh) |
@@ -2167,6 +2181,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
2167 | kmem_cache_free(sc, nsh); | 2181 | kmem_cache_free(sc, nsh); |
2168 | } | 2182 | } |
2169 | kmem_cache_destroy(sc); | 2183 | kmem_cache_destroy(sc); |
2184 | mutex_unlock(&conf->cache_size_mutex); | ||
2170 | return -ENOMEM; | 2185 | return -ENOMEM; |
2171 | } | 2186 | } |
2172 | /* Step 2 - Must use GFP_NOIO now. | 2187 | /* Step 2 - Must use GFP_NOIO now. |
@@ -2177,7 +2192,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
2177 | cnt = 0; | 2192 | cnt = 0; |
2178 | list_for_each_entry(nsh, &newstripes, lru) { | 2193 | list_for_each_entry(nsh, &newstripes, lru) { |
2179 | lock_device_hash_lock(conf, hash); | 2194 | lock_device_hash_lock(conf, hash); |
2180 | wait_event_cmd(conf->wait_for_stripe, | 2195 | wait_event_exclusive_cmd(conf->wait_for_stripe[hash], |
2181 | !list_empty(conf->inactive_list + hash), | 2196 | !list_empty(conf->inactive_list + hash), |
2182 | unlock_device_hash_lock(conf, hash), | 2197 | unlock_device_hash_lock(conf, hash), |
2183 | lock_device_hash_lock(conf, hash)); | 2198 | lock_device_hash_lock(conf, hash)); |
@@ -2213,6 +2228,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
2213 | } else | 2228 | } else |
2214 | err = -ENOMEM; | 2229 | err = -ENOMEM; |
2215 | 2230 | ||
2231 | mutex_unlock(&conf->cache_size_mutex); | ||
2216 | /* Step 4, return new stripes to service */ | 2232 | /* Step 4, return new stripes to service */ |
2217 | while(!list_empty(&newstripes)) { | 2233 | while(!list_empty(&newstripes)) { |
2218 | nsh = list_entry(newstripes.next, struct stripe_head, lru); | 2234 | nsh = list_entry(newstripes.next, struct stripe_head, lru); |
@@ -2240,7 +2256,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) | |||
2240 | static int drop_one_stripe(struct r5conf *conf) | 2256 | static int drop_one_stripe(struct r5conf *conf) |
2241 | { | 2257 | { |
2242 | struct stripe_head *sh; | 2258 | struct stripe_head *sh; |
2243 | int hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS; | 2259 | int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK; |
2244 | 2260 | ||
2245 | spin_lock_irq(conf->hash_locks + hash); | 2261 | spin_lock_irq(conf->hash_locks + hash); |
2246 | sh = get_free_stripe(conf, hash); | 2262 | sh = get_free_stripe(conf, hash); |
@@ -4050,8 +4066,10 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) | |||
4050 | &first_bad, &bad_sectors)) | 4066 | &first_bad, &bad_sectors)) |
4051 | set_bit(R5_ReadRepl, &dev->flags); | 4067 | set_bit(R5_ReadRepl, &dev->flags); |
4052 | else { | 4068 | else { |
4053 | if (rdev) | 4069 | if (rdev && !test_bit(Faulty, &rdev->flags)) |
4054 | set_bit(R5_NeedReplace, &dev->flags); | 4070 | set_bit(R5_NeedReplace, &dev->flags); |
4071 | else | ||
4072 | clear_bit(R5_NeedReplace, &dev->flags); | ||
4055 | rdev = rcu_dereference(conf->disks[i].rdev); | 4073 | rdev = rcu_dereference(conf->disks[i].rdev); |
4056 | clear_bit(R5_ReadRepl, &dev->flags); | 4074 | clear_bit(R5_ReadRepl, &dev->flags); |
4057 | } | 4075 | } |
@@ -4760,7 +4778,7 @@ static void raid5_align_endio(struct bio *bi, int error) | |||
4760 | raid_bi, 0); | 4778 | raid_bi, 0); |
4761 | bio_endio(raid_bi, 0); | 4779 | bio_endio(raid_bi, 0); |
4762 | if (atomic_dec_and_test(&conf->active_aligned_reads)) | 4780 | if (atomic_dec_and_test(&conf->active_aligned_reads)) |
4763 | wake_up(&conf->wait_for_stripe); | 4781 | wake_up(&conf->wait_for_quiescent); |
4764 | return; | 4782 | return; |
4765 | } | 4783 | } |
4766 | 4784 | ||
@@ -4855,7 +4873,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio) | |||
4855 | align_bi->bi_iter.bi_sector += rdev->data_offset; | 4873 | align_bi->bi_iter.bi_sector += rdev->data_offset; |
4856 | 4874 | ||
4857 | spin_lock_irq(&conf->device_lock); | 4875 | spin_lock_irq(&conf->device_lock); |
4858 | wait_event_lock_irq(conf->wait_for_stripe, | 4876 | wait_event_lock_irq(conf->wait_for_quiescent, |
4859 | conf->quiesce == 0, | 4877 | conf->quiesce == 0, |
4860 | conf->device_lock); | 4878 | conf->device_lock); |
4861 | atomic_inc(&conf->active_aligned_reads); | 4879 | atomic_inc(&conf->active_aligned_reads); |
@@ -5699,7 +5717,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) | |||
5699 | bio_endio(raid_bio, 0); | 5717 | bio_endio(raid_bio, 0); |
5700 | } | 5718 | } |
5701 | if (atomic_dec_and_test(&conf->active_aligned_reads)) | 5719 | if (atomic_dec_and_test(&conf->active_aligned_reads)) |
5702 | wake_up(&conf->wait_for_stripe); | 5720 | wake_up(&conf->wait_for_quiescent); |
5703 | return handled; | 5721 | return handled; |
5704 | } | 5722 | } |
5705 | 5723 | ||
@@ -5846,12 +5864,14 @@ static void raid5d(struct md_thread *thread) | |||
5846 | pr_debug("%d stripes handled\n", handled); | 5864 | pr_debug("%d stripes handled\n", handled); |
5847 | 5865 | ||
5848 | spin_unlock_irq(&conf->device_lock); | 5866 | spin_unlock_irq(&conf->device_lock); |
5849 | if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) { | 5867 | if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) && |
5868 | mutex_trylock(&conf->cache_size_mutex)) { | ||
5850 | grow_one_stripe(conf, __GFP_NOWARN); | 5869 | grow_one_stripe(conf, __GFP_NOWARN); |
5851 | /* Set flag even if allocation failed. This helps | 5870 | /* Set flag even if allocation failed. This helps |
5852 | * slow down allocation requests when mem is short | 5871 | * slow down allocation requests when mem is short |
5853 | */ | 5872 | */ |
5854 | set_bit(R5_DID_ALLOC, &conf->cache_state); | 5873 | set_bit(R5_DID_ALLOC, &conf->cache_state); |
5874 | mutex_unlock(&conf->cache_size_mutex); | ||
5855 | } | 5875 | } |
5856 | 5876 | ||
5857 | async_tx_issue_pending_all(); | 5877 | async_tx_issue_pending_all(); |
@@ -5883,18 +5903,22 @@ raid5_set_cache_size(struct mddev *mddev, int size) | |||
5883 | return -EINVAL; | 5903 | return -EINVAL; |
5884 | 5904 | ||
5885 | conf->min_nr_stripes = size; | 5905 | conf->min_nr_stripes = size; |
5906 | mutex_lock(&conf->cache_size_mutex); | ||
5886 | while (size < conf->max_nr_stripes && | 5907 | while (size < conf->max_nr_stripes && |
5887 | drop_one_stripe(conf)) | 5908 | drop_one_stripe(conf)) |
5888 | ; | 5909 | ; |
5910 | mutex_unlock(&conf->cache_size_mutex); | ||
5889 | 5911 | ||
5890 | 5912 | ||
5891 | err = md_allow_write(mddev); | 5913 | err = md_allow_write(mddev); |
5892 | if (err) | 5914 | if (err) |
5893 | return err; | 5915 | return err; |
5894 | 5916 | ||
5917 | mutex_lock(&conf->cache_size_mutex); | ||
5895 | while (size > conf->max_nr_stripes) | 5918 | while (size > conf->max_nr_stripes) |
5896 | if (!grow_one_stripe(conf, GFP_KERNEL)) | 5919 | if (!grow_one_stripe(conf, GFP_KERNEL)) |
5897 | break; | 5920 | break; |
5921 | mutex_unlock(&conf->cache_size_mutex); | ||
5898 | 5922 | ||
5899 | return 0; | 5923 | return 0; |
5900 | } | 5924 | } |
@@ -6360,11 +6384,19 @@ static unsigned long raid5_cache_scan(struct shrinker *shrink, | |||
6360 | struct shrink_control *sc) | 6384 | struct shrink_control *sc) |
6361 | { | 6385 | { |
6362 | struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); | 6386 | struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); |
6363 | int ret = 0; | 6387 | unsigned long ret = SHRINK_STOP; |
6364 | while (ret < sc->nr_to_scan) { | 6388 | |
6365 | if (drop_one_stripe(conf) == 0) | 6389 | if (mutex_trylock(&conf->cache_size_mutex)) { |
6366 | return SHRINK_STOP; | 6390 | ret= 0; |
6367 | ret++; | 6391 | while (ret < sc->nr_to_scan && |
6392 | conf->max_nr_stripes > conf->min_nr_stripes) { | ||
6393 | if (drop_one_stripe(conf) == 0) { | ||
6394 | ret = SHRINK_STOP; | ||
6395 | break; | ||
6396 | } | ||
6397 | ret++; | ||
6398 | } | ||
6399 | mutex_unlock(&conf->cache_size_mutex); | ||
6368 | } | 6400 | } |
6369 | return ret; | 6401 | return ret; |
6370 | } | 6402 | } |
@@ -6433,7 +6465,11 @@ static struct r5conf *setup_conf(struct mddev *mddev) | |||
6433 | goto abort; | 6465 | goto abort; |
6434 | spin_lock_init(&conf->device_lock); | 6466 | spin_lock_init(&conf->device_lock); |
6435 | seqcount_init(&conf->gen_lock); | 6467 | seqcount_init(&conf->gen_lock); |
6436 | init_waitqueue_head(&conf->wait_for_stripe); | 6468 | mutex_init(&conf->cache_size_mutex); |
6469 | init_waitqueue_head(&conf->wait_for_quiescent); | ||
6470 | for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) { | ||
6471 | init_waitqueue_head(&conf->wait_for_stripe[i]); | ||
6472 | } | ||
6437 | init_waitqueue_head(&conf->wait_for_overlap); | 6473 | init_waitqueue_head(&conf->wait_for_overlap); |
6438 | INIT_LIST_HEAD(&conf->handle_list); | 6474 | INIT_LIST_HEAD(&conf->handle_list); |
6439 | INIT_LIST_HEAD(&conf->hold_list); | 6475 | INIT_LIST_HEAD(&conf->hold_list); |
@@ -7466,7 +7502,7 @@ static void raid5_quiesce(struct mddev *mddev, int state) | |||
7466 | * active stripes can drain | 7502 | * active stripes can drain |
7467 | */ | 7503 | */ |
7468 | conf->quiesce = 2; | 7504 | conf->quiesce = 2; |
7469 | wait_event_cmd(conf->wait_for_stripe, | 7505 | wait_event_cmd(conf->wait_for_quiescent, |
7470 | atomic_read(&conf->active_stripes) == 0 && | 7506 | atomic_read(&conf->active_stripes) == 0 && |
7471 | atomic_read(&conf->active_aligned_reads) == 0, | 7507 | atomic_read(&conf->active_aligned_reads) == 0, |
7472 | unlock_all_device_hash_locks_irq(conf), | 7508 | unlock_all_device_hash_locks_irq(conf), |
@@ -7480,7 +7516,7 @@ static void raid5_quiesce(struct mddev *mddev, int state) | |||
7480 | case 0: /* re-enable writes */ | 7516 | case 0: /* re-enable writes */ |
7481 | lock_all_device_hash_locks_irq(conf); | 7517 | lock_all_device_hash_locks_irq(conf); |
7482 | conf->quiesce = 0; | 7518 | conf->quiesce = 0; |
7483 | wake_up(&conf->wait_for_stripe); | 7519 | wake_up(&conf->wait_for_quiescent); |
7484 | wake_up(&conf->wait_for_overlap); | 7520 | wake_up(&conf->wait_for_overlap); |
7485 | unlock_all_device_hash_locks_irq(conf); | 7521 | unlock_all_device_hash_locks_irq(conf); |
7486 | break; | 7522 | break; |