summaryrefslogtreecommitdiffstats
path: root/fs/jbd2/checkpoint.c
diff options
context:
space:
mode:
authorXiaoguang Wang <xiaoguang.wang@linux.alibaba.com>2019-01-31 23:42:11 -0500
committerTheodore Ts'o <tytso@mit.edu>2019-01-31 23:42:11 -0500
commit53cf978457325d8fb2cdecd7981b31a8229e446e (patch)
treef090389c633b762bdcfe55aafa796fc46dc06cbb /fs/jbd2/checkpoint.c
parent8fdd60f2ae3682caf2a7258626abc21eb4711892 (diff)
jbd2: fix deadlock while checkpoint thread waits commit thread to finish
This issue was found when I tried to put checkpoint work in a separate thread, the deadlock below happened: Thread1 | Thread2 __jbd2_log_wait_for_space | jbd2_log_do_checkpoint (hold j_checkpoint_mutex)| if (jh->b_transaction != NULL) | ... | jbd2_log_start_commit(journal, tid); |jbd2_update_log_tail | will lock j_checkpoint_mutex, | but will be blocked here. | jbd2_log_wait_commit(journal, tid); | wait_event(journal->j_wait_done_commit, | !tid_gt(tid, journal->j_commit_sequence)); | ... |wake_up(j_wait_done_commit) } | then deadlock occurs, Thread1 will never be waken up. To fix this issue, drop j_checkpoint_mutex in jbd2_log_do_checkpoint() when we are going to wait for transaction commit. Reviewed-by: Jan Kara <jack@suse.cz> Signed-off-by: Xiaoguang Wang <xiaoguang.wang@linux.alibaba.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/jbd2/checkpoint.c')
-rw-r--r--fs/jbd2/checkpoint.c17
1 files changed, 15 insertions, 2 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 26f8d7e46462..02e0b79753e7 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -113,7 +113,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
113 nblocks = jbd2_space_needed(journal); 113 nblocks = jbd2_space_needed(journal);
114 while (jbd2_log_space_left(journal) < nblocks) { 114 while (jbd2_log_space_left(journal) < nblocks) {
115 write_unlock(&journal->j_state_lock); 115 write_unlock(&journal->j_state_lock);
116 mutex_lock(&journal->j_checkpoint_mutex); 116 mutex_lock_io(&journal->j_checkpoint_mutex);
117 117
118 /* 118 /*
119 * Test again, another process may have checkpointed while we 119 * Test again, another process may have checkpointed while we
@@ -276,9 +276,22 @@ restart:
276 "JBD2: %s: Waiting for Godot: block %llu\n", 276 "JBD2: %s: Waiting for Godot: block %llu\n",
277 journal->j_devname, (unsigned long long) bh->b_blocknr); 277 journal->j_devname, (unsigned long long) bh->b_blocknr);
278 278
279 if (batch_count)
280 __flush_batch(journal, &batch_count);
279 jbd2_log_start_commit(journal, tid); 281 jbd2_log_start_commit(journal, tid);
282 /*
283 * jbd2_journal_commit_transaction() may want
284 * to take the checkpoint_mutex if JBD2_FLUSHED
285 * is set, jbd2_update_log_tail() called by
286 * jbd2_journal_commit_transaction() may also take
287 * checkpoint_mutex. So we need to temporarily
288 * drop it.
289 */
290 mutex_unlock(&journal->j_checkpoint_mutex);
280 jbd2_log_wait_commit(journal, tid); 291 jbd2_log_wait_commit(journal, tid);
281 goto retry; 292 mutex_lock_io(&journal->j_checkpoint_mutex);
293 spin_lock(&journal->j_list_lock);
294 goto restart;
282 } 295 }
283 if (!buffer_dirty(bh)) { 296 if (!buffer_dirty(bh)) {
284 if (unlikely(buffer_write_io_error(bh)) && !result) 297 if (unlikely(buffer_write_io_error(bh)) && !result)