aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2013-11-27 18:55:27 -0500
committerNeilBrown <neilb@suse.de>2013-11-27 19:00:15 -0500
commit6d183de4077191d1201283a9035ce57a9b05254d (patch)
tree2fcae1c346ab7cdb7c9cd3c45f78596d3876017f /drivers/md
parent142d44c310819e1965ca70b4d55d7679f5797e25 (diff)
md/raid5: fix newly-broken locking in get_active_stripe.
commit 566c09c53455d7c4f1 raid5: relieve lock contention in get_active_stripe() modified the locking in get_active_stripe() reducing the range protected by the (highly contended) device_lock. Unfortunately it reduced the range too much opening up some races. One race can occur if get_priority_stripe runs between the test on sh->count and device_lock being taken. This will mean that sh->lru is not empty while get_active_stripe thinks ->count is zero resulting in a 'BUG' firing. Another race happens if __release_stripe is called immediately after sh->count is tested and found to be non-zero. If STRIPE_HANDLE is not set, get_active_stripe should increment ->active_stripes when it increments ->count from 0, but as it didn't think it was 0, it doesn't. Extending device_lock to cover the test on sh->count close these races. While we are here, fix the two BUG tests: -If count is zero, then lru really must not be empty, or we've lock the stripe_head somehow - no other tests are relevant. -STRIPE_ON_RELEASE_LIST is completely independent of ->lru so testing it is pointless. Reported-and-tested-by: Brassow Jonathan <jbrassow@redhat.com> Reviewed-by: Shaohua Li <shli@kernel.org> Fixes: 566c09c53455d7c4f1 Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/raid5.c11
1 files changed, 4 insertions, 7 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 676d8b7c5117..cc055da02e2a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -678,26 +678,23 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
678 } else 678 } else
679 init_stripe(sh, sector, previous); 679 init_stripe(sh, sector, previous);
680 } else { 680 } else {
681 spin_lock(&conf->device_lock);
681 if (atomic_read(&sh->count)) { 682 if (atomic_read(&sh->count)) {
682 BUG_ON(!list_empty(&sh->lru) 683 BUG_ON(!list_empty(&sh->lru)
683 && !test_bit(STRIPE_EXPANDING, &sh->state) 684 && !test_bit(STRIPE_EXPANDING, &sh->state)
684 && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state) 685 && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)
685 && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state)); 686 );
686 } else { 687 } else {
687 spin_lock(&conf->device_lock);
688 if (!test_bit(STRIPE_HANDLE, &sh->state)) 688 if (!test_bit(STRIPE_HANDLE, &sh->state))
689 atomic_inc(&conf->active_stripes); 689 atomic_inc(&conf->active_stripes);
690 if (list_empty(&sh->lru) && 690 BUG_ON(list_empty(&sh->lru));
691 !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state) &&
692 !test_bit(STRIPE_EXPANDING, &sh->state))
693 BUG();
694 list_del_init(&sh->lru); 691 list_del_init(&sh->lru);
695 if (sh->group) { 692 if (sh->group) {
696 sh->group->stripes_cnt--; 693 sh->group->stripes_cnt--;
697 sh->group = NULL; 694 sh->group = NULL;
698 } 695 }
699 spin_unlock(&conf->device_lock);
700 } 696 }
697 spin_unlock(&conf->device_lock);
701 } 698 }
702 } while (sh == NULL); 699 } while (sh == NULL);
703 700