diff options
author | xuejiufei <xuejiufei@huawei.com> | 2016-01-14 18:17:38 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-01-14 19:00:49 -0500 |
commit | bef5502de074b6f6fa647b94b73155d675694420 (patch) | |
tree | 3c6c516e6cd7923dfc079e3cbaa27f83b2dc554c /fs/ocfs2/dlm | |
parent | 1cce4df04f37d3f7b969e85528fa54f918a06f03 (diff) |
ocfs2/dlm: ignore cleaning the migration mle that is inuse
We have found that migration source will trigger a BUG that the refcount
of mle is already zero before put when the target is down during
migration. The situation is as follows:
dlm_migrate_lockres
dlm_add_migration_mle
dlm_mark_lockres_migrating
dlm_get_mle_inuse
<<<<<< Now the refcount of the mle is 2.
dlm_send_one_lockres and wait for the target to become the
new master.
<<<<<< o2hb detect the target down and clean the migration
mle. Now the refcount is 1.
dlm_migrate_lockres woken, and put the mle twice when found the target
goes down which trigger the BUG with the following message:
"ERROR: bad mle: ".
Signed-off-by: Jiufei Xue <xuejiufei@huawei.com>
Reviewed-by: Joseph Qi <joseph.qi@huawei.com>
Cc: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ocfs2/dlm')
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 26 |
1 files changed, 15 insertions, 11 deletions
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 6f0748122117..8b9d483e94a6 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -2519,6 +2519,11 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | |||
2519 | spin_lock(&dlm->master_lock); | 2519 | spin_lock(&dlm->master_lock); |
2520 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, | 2520 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, |
2521 | namelen, target, dlm->node_num); | 2521 | namelen, target, dlm->node_num); |
2522 | /* get an extra reference on the mle. | ||
2523 | * otherwise the assert_master from the new | ||
2524 | * master will destroy this. | ||
2525 | */ | ||
2526 | dlm_get_mle_inuse(mle); | ||
2522 | spin_unlock(&dlm->master_lock); | 2527 | spin_unlock(&dlm->master_lock); |
2523 | spin_unlock(&dlm->spinlock); | 2528 | spin_unlock(&dlm->spinlock); |
2524 | 2529 | ||
@@ -2554,6 +2559,7 @@ fail: | |||
2554 | if (mle_added) { | 2559 | if (mle_added) { |
2555 | dlm_mle_detach_hb_events(dlm, mle); | 2560 | dlm_mle_detach_hb_events(dlm, mle); |
2556 | dlm_put_mle(mle); | 2561 | dlm_put_mle(mle); |
2562 | dlm_put_mle_inuse(mle); | ||
2557 | } else if (mle) { | 2563 | } else if (mle) { |
2558 | kmem_cache_free(dlm_mle_cache, mle); | 2564 | kmem_cache_free(dlm_mle_cache, mle); |
2559 | mle = NULL; | 2565 | mle = NULL; |
@@ -2571,17 +2577,6 @@ fail: | |||
2571 | * ensure that all assert_master work is flushed. */ | 2577 | * ensure that all assert_master work is flushed. */ |
2572 | flush_workqueue(dlm->dlm_worker); | 2578 | flush_workqueue(dlm->dlm_worker); |
2573 | 2579 | ||
2574 | /* get an extra reference on the mle. | ||
2575 | * otherwise the assert_master from the new | ||
2576 | * master will destroy this. | ||
2577 | * also, make sure that all callers of dlm_get_mle | ||
2578 | * take both dlm->spinlock and dlm->master_lock */ | ||
2579 | spin_lock(&dlm->spinlock); | ||
2580 | spin_lock(&dlm->master_lock); | ||
2581 | dlm_get_mle_inuse(mle); | ||
2582 | spin_unlock(&dlm->master_lock); | ||
2583 | spin_unlock(&dlm->spinlock); | ||
2584 | |||
2585 | /* notify new node and send all lock state */ | 2580 | /* notify new node and send all lock state */ |
2586 | /* call send_one_lockres with migration flag. | 2581 | /* call send_one_lockres with migration flag. |
2587 | * this serves as notice to the target node that a | 2582 | * this serves as notice to the target node that a |
@@ -3312,6 +3307,15 @@ top: | |||
3312 | mle->new_master != dead_node) | 3307 | mle->new_master != dead_node) |
3313 | continue; | 3308 | continue; |
3314 | 3309 | ||
3310 | if (mle->new_master == dead_node && mle->inuse) { | ||
3311 | mlog(ML_NOTICE, "%s: target %u died during " | ||
3312 | "migration from %u, the MLE is " | ||
3313 | "still keep used, ignore it!\n", | ||
3314 | dlm->name, dead_node, | ||
3315 | mle->master); | ||
3316 | continue; | ||
3317 | } | ||
3318 | |||
3315 | /* If we have reached this point, this mle needs to be | 3319 | /* If we have reached this point, this mle needs to be |
3316 | * removed from the list and freed. */ | 3320 | * removed from the list and freed. */ |
3317 | dlm_clean_migration_mle(dlm, mle); | 3321 | dlm_clean_migration_mle(dlm, mle); |