aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c84
1 files changed, 60 insertions, 24 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b6793d2e051f..f757023fc458 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -344,7 +344,8 @@ static void release_inactive_stripe_list(struct r5conf *conf,
344 int hash) 344 int hash)
345{ 345{
346 int size; 346 int size;
347 bool do_wakeup = false; 347 unsigned long do_wakeup = 0;
348 int i = 0;
348 unsigned long flags; 349 unsigned long flags;
349 350
350 if (hash == NR_STRIPE_HASH_LOCKS) { 351 if (hash == NR_STRIPE_HASH_LOCKS) {
@@ -365,15 +366,21 @@ static void release_inactive_stripe_list(struct r5conf *conf,
365 !list_empty(list)) 366 !list_empty(list))
366 atomic_dec(&conf->empty_inactive_list_nr); 367 atomic_dec(&conf->empty_inactive_list_nr);
367 list_splice_tail_init(list, conf->inactive_list + hash); 368 list_splice_tail_init(list, conf->inactive_list + hash);
368 do_wakeup = true; 369 do_wakeup |= 1 << hash;
369 spin_unlock_irqrestore(conf->hash_locks + hash, flags); 370 spin_unlock_irqrestore(conf->hash_locks + hash, flags);
370 } 371 }
371 size--; 372 size--;
372 hash--; 373 hash--;
373 } 374 }
374 375
376 for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
377 if (do_wakeup & (1 << i))
378 wake_up(&conf->wait_for_stripe[i]);
379 }
380
375 if (do_wakeup) { 381 if (do_wakeup) {
376 wake_up(&conf->wait_for_stripe); 382 if (atomic_read(&conf->active_stripes) == 0)
383 wake_up(&conf->wait_for_quiescent);
377 if (conf->retry_read_aligned) 384 if (conf->retry_read_aligned)
378 md_wakeup_thread(conf->mddev->thread); 385 md_wakeup_thread(conf->mddev->thread);
379 } 386 }
@@ -667,15 +674,15 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
667 spin_lock_irq(conf->hash_locks + hash); 674 spin_lock_irq(conf->hash_locks + hash);
668 675
669 do { 676 do {
670 wait_event_lock_irq(conf->wait_for_stripe, 677 wait_event_lock_irq(conf->wait_for_quiescent,
671 conf->quiesce == 0 || noquiesce, 678 conf->quiesce == 0 || noquiesce,
672 *(conf->hash_locks + hash)); 679 *(conf->hash_locks + hash));
673 sh = __find_stripe(conf, sector, conf->generation - previous); 680 sh = __find_stripe(conf, sector, conf->generation - previous);
674 if (!sh) { 681 if (!sh) {
675 if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) { 682 if (!test_bit(R5_INACTIVE_BLOCKED, &conf->cache_state)) {
676 sh = get_free_stripe(conf, hash); 683 sh = get_free_stripe(conf, hash);
677 if (!sh && llist_empty(&conf->released_stripes) && 684 if (!sh && !test_bit(R5_DID_ALLOC,
678 !test_bit(R5_DID_ALLOC, &conf->cache_state)) 685 &conf->cache_state))
679 set_bit(R5_ALLOC_MORE, 686 set_bit(R5_ALLOC_MORE,
680 &conf->cache_state); 687 &conf->cache_state);
681 } 688 }
@@ -684,14 +691,15 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
684 if (!sh) { 691 if (!sh) {
685 set_bit(R5_INACTIVE_BLOCKED, 692 set_bit(R5_INACTIVE_BLOCKED,
686 &conf->cache_state); 693 &conf->cache_state);
687 wait_event_lock_irq( 694 wait_event_exclusive_cmd(
688 conf->wait_for_stripe, 695 conf->wait_for_stripe[hash],
689 !list_empty(conf->inactive_list + hash) && 696 !list_empty(conf->inactive_list + hash) &&
690 (atomic_read(&conf->active_stripes) 697 (atomic_read(&conf->active_stripes)
691 < (conf->max_nr_stripes * 3 / 4) 698 < (conf->max_nr_stripes * 3 / 4)
692 || !test_bit(R5_INACTIVE_BLOCKED, 699 || !test_bit(R5_INACTIVE_BLOCKED,
693 &conf->cache_state)), 700 &conf->cache_state)),
694 *(conf->hash_locks + hash)); 701 spin_unlock_irq(conf->hash_locks + hash),
702 spin_lock_irq(conf->hash_locks + hash));
695 clear_bit(R5_INACTIVE_BLOCKED, 703 clear_bit(R5_INACTIVE_BLOCKED,
696 &conf->cache_state); 704 &conf->cache_state);
697 } else { 705 } else {
@@ -716,6 +724,9 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
716 } 724 }
717 } while (sh == NULL); 725 } while (sh == NULL);
718 726
727 if (!list_empty(conf->inactive_list + hash))
728 wake_up(&conf->wait_for_stripe[hash]);
729
719 spin_unlock_irq(conf->hash_locks + hash); 730 spin_unlock_irq(conf->hash_locks + hash);
720 return sh; 731 return sh;
721} 732}
@@ -2151,6 +2162,9 @@ static int resize_stripes(struct r5conf *conf, int newsize)
2151 if (!sc) 2162 if (!sc)
2152 return -ENOMEM; 2163 return -ENOMEM;
2153 2164
2165 /* Need to ensure auto-resizing doesn't interfere */
2166 mutex_lock(&conf->cache_size_mutex);
2167
2154 for (i = conf->max_nr_stripes; i; i--) { 2168 for (i = conf->max_nr_stripes; i; i--) {
2155 nsh = alloc_stripe(sc, GFP_KERNEL); 2169 nsh = alloc_stripe(sc, GFP_KERNEL);
2156 if (!nsh) 2170 if (!nsh)
@@ -2167,6 +2181,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
2167 kmem_cache_free(sc, nsh); 2181 kmem_cache_free(sc, nsh);
2168 } 2182 }
2169 kmem_cache_destroy(sc); 2183 kmem_cache_destroy(sc);
2184 mutex_unlock(&conf->cache_size_mutex);
2170 return -ENOMEM; 2185 return -ENOMEM;
2171 } 2186 }
2172 /* Step 2 - Must use GFP_NOIO now. 2187 /* Step 2 - Must use GFP_NOIO now.
@@ -2177,7 +2192,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
2177 cnt = 0; 2192 cnt = 0;
2178 list_for_each_entry(nsh, &newstripes, lru) { 2193 list_for_each_entry(nsh, &newstripes, lru) {
2179 lock_device_hash_lock(conf, hash); 2194 lock_device_hash_lock(conf, hash);
2180 wait_event_cmd(conf->wait_for_stripe, 2195 wait_event_exclusive_cmd(conf->wait_for_stripe[hash],
2181 !list_empty(conf->inactive_list + hash), 2196 !list_empty(conf->inactive_list + hash),
2182 unlock_device_hash_lock(conf, hash), 2197 unlock_device_hash_lock(conf, hash),
2183 lock_device_hash_lock(conf, hash)); 2198 lock_device_hash_lock(conf, hash));
@@ -2213,6 +2228,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
2213 } else 2228 } else
2214 err = -ENOMEM; 2229 err = -ENOMEM;
2215 2230
2231 mutex_unlock(&conf->cache_size_mutex);
2216 /* Step 4, return new stripes to service */ 2232 /* Step 4, return new stripes to service */
2217 while(!list_empty(&newstripes)) { 2233 while(!list_empty(&newstripes)) {
2218 nsh = list_entry(newstripes.next, struct stripe_head, lru); 2234 nsh = list_entry(newstripes.next, struct stripe_head, lru);
@@ -2240,7 +2256,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
2240static int drop_one_stripe(struct r5conf *conf) 2256static int drop_one_stripe(struct r5conf *conf)
2241{ 2257{
2242 struct stripe_head *sh; 2258 struct stripe_head *sh;
2243 int hash = (conf->max_nr_stripes - 1) % NR_STRIPE_HASH_LOCKS; 2259 int hash = (conf->max_nr_stripes - 1) & STRIPE_HASH_LOCKS_MASK;
2244 2260
2245 spin_lock_irq(conf->hash_locks + hash); 2261 spin_lock_irq(conf->hash_locks + hash);
2246 sh = get_free_stripe(conf, hash); 2262 sh = get_free_stripe(conf, hash);
@@ -4050,8 +4066,10 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
4050 &first_bad, &bad_sectors)) 4066 &first_bad, &bad_sectors))
4051 set_bit(R5_ReadRepl, &dev->flags); 4067 set_bit(R5_ReadRepl, &dev->flags);
4052 else { 4068 else {
4053 if (rdev) 4069 if (rdev && !test_bit(Faulty, &rdev->flags))
4054 set_bit(R5_NeedReplace, &dev->flags); 4070 set_bit(R5_NeedReplace, &dev->flags);
4071 else
4072 clear_bit(R5_NeedReplace, &dev->flags);
4055 rdev = rcu_dereference(conf->disks[i].rdev); 4073 rdev = rcu_dereference(conf->disks[i].rdev);
4056 clear_bit(R5_ReadRepl, &dev->flags); 4074 clear_bit(R5_ReadRepl, &dev->flags);
4057 } 4075 }
@@ -4760,7 +4778,7 @@ static void raid5_align_endio(struct bio *bi, int error)
4760 raid_bi, 0); 4778 raid_bi, 0);
4761 bio_endio(raid_bi, 0); 4779 bio_endio(raid_bi, 0);
4762 if (atomic_dec_and_test(&conf->active_aligned_reads)) 4780 if (atomic_dec_and_test(&conf->active_aligned_reads))
4763 wake_up(&conf->wait_for_stripe); 4781 wake_up(&conf->wait_for_quiescent);
4764 return; 4782 return;
4765 } 4783 }
4766 4784
@@ -4855,7 +4873,7 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
4855 align_bi->bi_iter.bi_sector += rdev->data_offset; 4873 align_bi->bi_iter.bi_sector += rdev->data_offset;
4856 4874
4857 spin_lock_irq(&conf->device_lock); 4875 spin_lock_irq(&conf->device_lock);
4858 wait_event_lock_irq(conf->wait_for_stripe, 4876 wait_event_lock_irq(conf->wait_for_quiescent,
4859 conf->quiesce == 0, 4877 conf->quiesce == 0,
4860 conf->device_lock); 4878 conf->device_lock);
4861 atomic_inc(&conf->active_aligned_reads); 4879 atomic_inc(&conf->active_aligned_reads);
@@ -5699,7 +5717,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
5699 bio_endio(raid_bio, 0); 5717 bio_endio(raid_bio, 0);
5700 } 5718 }
5701 if (atomic_dec_and_test(&conf->active_aligned_reads)) 5719 if (atomic_dec_and_test(&conf->active_aligned_reads))
5702 wake_up(&conf->wait_for_stripe); 5720 wake_up(&conf->wait_for_quiescent);
5703 return handled; 5721 return handled;
5704} 5722}
5705 5723
@@ -5846,12 +5864,14 @@ static void raid5d(struct md_thread *thread)
5846 pr_debug("%d stripes handled\n", handled); 5864 pr_debug("%d stripes handled\n", handled);
5847 5865
5848 spin_unlock_irq(&conf->device_lock); 5866 spin_unlock_irq(&conf->device_lock);
5849 if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) { 5867 if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) &&
5868 mutex_trylock(&conf->cache_size_mutex)) {
5850 grow_one_stripe(conf, __GFP_NOWARN); 5869 grow_one_stripe(conf, __GFP_NOWARN);
5851 /* Set flag even if allocation failed. This helps 5870 /* Set flag even if allocation failed. This helps
5852 * slow down allocation requests when mem is short 5871 * slow down allocation requests when mem is short
5853 */ 5872 */
5854 set_bit(R5_DID_ALLOC, &conf->cache_state); 5873 set_bit(R5_DID_ALLOC, &conf->cache_state);
5874 mutex_unlock(&conf->cache_size_mutex);
5855 } 5875 }
5856 5876
5857 async_tx_issue_pending_all(); 5877 async_tx_issue_pending_all();
@@ -5883,18 +5903,22 @@ raid5_set_cache_size(struct mddev *mddev, int size)
5883 return -EINVAL; 5903 return -EINVAL;
5884 5904
5885 conf->min_nr_stripes = size; 5905 conf->min_nr_stripes = size;
5906 mutex_lock(&conf->cache_size_mutex);
5886 while (size < conf->max_nr_stripes && 5907 while (size < conf->max_nr_stripes &&
5887 drop_one_stripe(conf)) 5908 drop_one_stripe(conf))
5888 ; 5909 ;
5910 mutex_unlock(&conf->cache_size_mutex);
5889 5911
5890 5912
5891 err = md_allow_write(mddev); 5913 err = md_allow_write(mddev);
5892 if (err) 5914 if (err)
5893 return err; 5915 return err;
5894 5916
5917 mutex_lock(&conf->cache_size_mutex);
5895 while (size > conf->max_nr_stripes) 5918 while (size > conf->max_nr_stripes)
5896 if (!grow_one_stripe(conf, GFP_KERNEL)) 5919 if (!grow_one_stripe(conf, GFP_KERNEL))
5897 break; 5920 break;
5921 mutex_unlock(&conf->cache_size_mutex);
5898 5922
5899 return 0; 5923 return 0;
5900} 5924}
@@ -6360,11 +6384,19 @@ static unsigned long raid5_cache_scan(struct shrinker *shrink,
6360 struct shrink_control *sc) 6384 struct shrink_control *sc)
6361{ 6385{
6362 struct r5conf *conf = container_of(shrink, struct r5conf, shrinker); 6386 struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
6363 int ret = 0; 6387 unsigned long ret = SHRINK_STOP;
6364 while (ret < sc->nr_to_scan) { 6388
6365 if (drop_one_stripe(conf) == 0) 6389 if (mutex_trylock(&conf->cache_size_mutex)) {
6366 return SHRINK_STOP; 6390 ret= 0;
6367 ret++; 6391 while (ret < sc->nr_to_scan &&
6392 conf->max_nr_stripes > conf->min_nr_stripes) {
6393 if (drop_one_stripe(conf) == 0) {
6394 ret = SHRINK_STOP;
6395 break;
6396 }
6397 ret++;
6398 }
6399 mutex_unlock(&conf->cache_size_mutex);
6368 } 6400 }
6369 return ret; 6401 return ret;
6370} 6402}
@@ -6433,7 +6465,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
6433 goto abort; 6465 goto abort;
6434 spin_lock_init(&conf->device_lock); 6466 spin_lock_init(&conf->device_lock);
6435 seqcount_init(&conf->gen_lock); 6467 seqcount_init(&conf->gen_lock);
6436 init_waitqueue_head(&conf->wait_for_stripe); 6468 mutex_init(&conf->cache_size_mutex);
6469 init_waitqueue_head(&conf->wait_for_quiescent);
6470 for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
6471 init_waitqueue_head(&conf->wait_for_stripe[i]);
6472 }
6437 init_waitqueue_head(&conf->wait_for_overlap); 6473 init_waitqueue_head(&conf->wait_for_overlap);
6438 INIT_LIST_HEAD(&conf->handle_list); 6474 INIT_LIST_HEAD(&conf->handle_list);
6439 INIT_LIST_HEAD(&conf->hold_list); 6475 INIT_LIST_HEAD(&conf->hold_list);
@@ -7466,7 +7502,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
7466 * active stripes can drain 7502 * active stripes can drain
7467 */ 7503 */
7468 conf->quiesce = 2; 7504 conf->quiesce = 2;
7469 wait_event_cmd(conf->wait_for_stripe, 7505 wait_event_cmd(conf->wait_for_quiescent,
7470 atomic_read(&conf->active_stripes) == 0 && 7506 atomic_read(&conf->active_stripes) == 0 &&
7471 atomic_read(&conf->active_aligned_reads) == 0, 7507 atomic_read(&conf->active_aligned_reads) == 0,
7472 unlock_all_device_hash_locks_irq(conf), 7508 unlock_all_device_hash_locks_irq(conf),
@@ -7480,7 +7516,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
7480 case 0: /* re-enable writes */ 7516 case 0: /* re-enable writes */
7481 lock_all_device_hash_locks_irq(conf); 7517 lock_all_device_hash_locks_irq(conf);
7482 conf->quiesce = 0; 7518 conf->quiesce = 0;
7483 wake_up(&conf->wait_for_stripe); 7519 wake_up(&conf->wait_for_quiescent);
7484 wake_up(&conf->wait_for_overlap); 7520 wake_up(&conf->wait_for_overlap);
7485 unlock_all_device_hash_locks_irq(conf); 7521 unlock_all_device_hash_locks_irq(conf);
7486 break; 7522 break;