diff options
| author | NeilBrown <neilb@suse.de> | 2013-11-27 18:55:27 -0500 |
|---|---|---|
| committer | NeilBrown <neilb@suse.de> | 2013-11-27 19:00:15 -0500 |
| commit | 6d183de4077191d1201283a9035ce57a9b05254d (patch) | |
| tree | 2fcae1c346ab7cdb7c9cd3c45f78596d3876017f | |
| parent | 142d44c310819e1965ca70b4d55d7679f5797e25 (diff) | |
md/raid5: fix newly-broken locking in get_active_stripe.
commit 566c09c53455d7c4f1 raid5: relieve lock contention in get_active_stripe()
modified the locking in get_active_stripe() reducing the range
protected by the (highly contended) device_lock.
Unfortunately it reduced the range too much opening up some races.
One race can occur if get_priority_stripe runs between the
test on sh->count and device_lock being taken.
This will mean that sh->lru is not empty while get_active_stripe
thinks ->count is zero resulting in a 'BUG' firing.
Another race happens if __release_stripe is called immediately
after sh->count is tested and found to be non-zero. If STRIPE_HANDLE
is not set, get_active_stripe should increment ->active_stripes
when it increments ->count from 0, but as it didn't think it was 0,
it doesn't.
Extending device_lock to cover the test on sh->count close these
races.
While we are here, fix the two BUG tests:
-If count is zero, then lru really must not be empty, or we've
lock the stripe_head somehow - no other tests are relevant.
-STRIPE_ON_RELEASE_LIST is completely independent of ->lru so
testing it is pointless.
Reported-and-tested-by: Brassow Jonathan <jbrassow@redhat.com>
Reviewed-by: Shaohua Li <shli@kernel.org>
Fixes: 566c09c53455d7c4f1
Signed-off-by: NeilBrown <neilb@suse.de>
| -rw-r--r-- | drivers/md/raid5.c | 11 |
1 files changed, 4 insertions, 7 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 676d8b7c5117..cc055da02e2a 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
| @@ -678,26 +678,23 @@ get_active_stripe(struct r5conf *conf, sector_t sector, | |||
| 678 | } else | 678 | } else |
| 679 | init_stripe(sh, sector, previous); | 679 | init_stripe(sh, sector, previous); |
| 680 | } else { | 680 | } else { |
| 681 | spin_lock(&conf->device_lock); | ||
| 681 | if (atomic_read(&sh->count)) { | 682 | if (atomic_read(&sh->count)) { |
| 682 | BUG_ON(!list_empty(&sh->lru) | 683 | BUG_ON(!list_empty(&sh->lru) |
| 683 | && !test_bit(STRIPE_EXPANDING, &sh->state) | 684 | && !test_bit(STRIPE_EXPANDING, &sh->state) |
| 684 | && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state) | 685 | && !test_bit(STRIPE_ON_UNPLUG_LIST, &sh->state) |
| 685 | && !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state)); | 686 | ); |
| 686 | } else { | 687 | } else { |
| 687 | spin_lock(&conf->device_lock); | ||
| 688 | if (!test_bit(STRIPE_HANDLE, &sh->state)) | 688 | if (!test_bit(STRIPE_HANDLE, &sh->state)) |
| 689 | atomic_inc(&conf->active_stripes); | 689 | atomic_inc(&conf->active_stripes); |
| 690 | if (list_empty(&sh->lru) && | 690 | BUG_ON(list_empty(&sh->lru)); |
| 691 | !test_bit(STRIPE_ON_RELEASE_LIST, &sh->state) && | ||
| 692 | !test_bit(STRIPE_EXPANDING, &sh->state)) | ||
| 693 | BUG(); | ||
| 694 | list_del_init(&sh->lru); | 691 | list_del_init(&sh->lru); |
| 695 | if (sh->group) { | 692 | if (sh->group) { |
| 696 | sh->group->stripes_cnt--; | 693 | sh->group->stripes_cnt--; |
| 697 | sh->group = NULL; | 694 | sh->group = NULL; |
| 698 | } | 695 | } |
| 699 | spin_unlock(&conf->device_lock); | ||
| 700 | } | 696 | } |
| 697 | spin_unlock(&conf->device_lock); | ||
| 701 | } | 698 | } |
| 702 | } while (sh == NULL); | 699 | } while (sh == NULL); |
| 703 | 700 | ||
