diff options
author | Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com> | 2019-01-31 23:42:11 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2019-01-31 23:42:11 -0500 |
commit | 53cf978457325d8fb2cdecd7981b31a8229e446e (patch) | |
tree | f090389c633b762bdcfe55aafa796fc46dc06cbb /fs/jbd2/checkpoint.c | |
parent | 8fdd60f2ae3682caf2a7258626abc21eb4711892 (diff) |
jbd2: fix deadlock while checkpoint thread waits commit thread to finish
This issue was found when I tried to put checkpoint work in a separate thread,
the deadlock below happened:
Thread1 | Thread2
__jbd2_log_wait_for_space |
jbd2_log_do_checkpoint (hold j_checkpoint_mutex)|
if (jh->b_transaction != NULL) |
... |
jbd2_log_start_commit(journal, tid); |jbd2_update_log_tail
| will lock j_checkpoint_mutex,
| but will be blocked here.
|
jbd2_log_wait_commit(journal, tid); |
wait_event(journal->j_wait_done_commit, |
!tid_gt(tid, journal->j_commit_sequence)); |
... |wake_up(j_wait_done_commit)
} |
then deadlock occurs, Thread1 will never be waken up.
To fix this issue, drop j_checkpoint_mutex in jbd2_log_do_checkpoint()
when we are going to wait for transaction commit.
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2/checkpoint.c')
-rw-r--r-- | fs/jbd2/checkpoint.c | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 26f8d7e46462..02e0b79753e7 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -113,7 +113,7 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
113 | nblocks = jbd2_space_needed(journal); | 113 | nblocks = jbd2_space_needed(journal); |
114 | while (jbd2_log_space_left(journal) < nblocks) { | 114 | while (jbd2_log_space_left(journal) < nblocks) { |
115 | write_unlock(&journal->j_state_lock); | 115 | write_unlock(&journal->j_state_lock); |
116 | mutex_lock(&journal->j_checkpoint_mutex); | 116 | mutex_lock_io(&journal->j_checkpoint_mutex); |
117 | 117 | ||
118 | /* | 118 | /* |
119 | * Test again, another process may have checkpointed while we | 119 | * Test again, another process may have checkpointed while we |
@@ -276,9 +276,22 @@ restart: | |||
276 | "JBD2: %s: Waiting for Godot: block %llu\n", | 276 | "JBD2: %s: Waiting for Godot: block %llu\n", |
277 | journal->j_devname, (unsigned long long) bh->b_blocknr); | 277 | journal->j_devname, (unsigned long long) bh->b_blocknr); |
278 | 278 | ||
279 | if (batch_count) | ||
280 | __flush_batch(journal, &batch_count); | ||
279 | jbd2_log_start_commit(journal, tid); | 281 | jbd2_log_start_commit(journal, tid); |
282 | /* | ||
283 | * jbd2_journal_commit_transaction() may want | ||
284 | * to take the checkpoint_mutex if JBD2_FLUSHED | ||
285 | * is set, jbd2_update_log_tail() called by | ||
286 | * jbd2_journal_commit_transaction() may also take | ||
287 | * checkpoint_mutex. So we need to temporarily | ||
288 | * drop it. | ||
289 | */ | ||
290 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
280 | jbd2_log_wait_commit(journal, tid); | 291 | jbd2_log_wait_commit(journal, tid); |
281 | goto retry; | 292 | mutex_lock_io(&journal->j_checkpoint_mutex); |
293 | spin_lock(&journal->j_list_lock); | ||
294 | goto restart; | ||
282 | } | 295 | } |
283 | if (!buffer_dirty(bh)) { | 296 | if (!buffer_dirty(bh)) { |
284 | if (unlikely(buffer_write_io_error(bh)) && !result) | 297 | if (unlikely(buffer_write_io_error(bh)) && !result) |