aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jbd2
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2016-10-28 12:58:12 -0400
committerIngo Molnar <mingo@kernel.org>2017-01-14 05:30:06 -0500
commit6fa7aa50b2c48400bbd045daf3a2498882eb0596 (patch)
treec4b5c8a06b1afe29d19dc73298b91388ee55f49d /fs/jbd2
parent1460cb65a10f6c7a6e3a1c76513338861a0a43b6 (diff)
fs/jbd2, locking/mutex, sched/wait: Use mutex_lock_io() for journal->j_checkpoint_mutex
When an ext4 fs is bogged down by a lot of metadata IOs (in the reported case, it was deletion of millions of files, but any massive amount of journal writes would do), after the journal is filled up, tasks which try to access the filesystem and aren't currently performing the journal writes end up waiting in __jbd2_log_wait_for_space() for journal->j_checkpoint_mutex. Because those mutex sleeps aren't marked as iowait, this condition can lead to misleadingly low iowait and /proc/stat:procs_blocked. While iowait propagation is far from strict, this condition can be triggered fairly easily and annotating these sleeps correctly helps initial diagnosis quite a bit. Use the new mutex_lock_io() for journal->j_checkpoint_mutex so that these sleeps are properly marked as iowait. Reported-by: Mingbo Wan <mingbo@fb.com> Signed-off-by: Tejun Heo <tj@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andreas Dilger <adilger.kernel@dilger.ca> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Jan Kara <jack@suse.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Theodore Ts'o <tytso@mit.edu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/1477673892-28940-5-git-send-email-tj@kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/commit.c2
-rw-r--r--fs/jbd2/journal.c12
2 files changed, 7 insertions, 7 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 8c514367ba5a..b6b194ec1b4f 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -393,7 +393,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
393 /* Do we need to erase the effects of a prior jbd2_journal_flush? */ 393 /* Do we need to erase the effects of a prior jbd2_journal_flush? */
394 if (journal->j_flags & JBD2_FLUSHED) { 394 if (journal->j_flags & JBD2_FLUSHED) {
395 jbd_debug(3, "super block updated\n"); 395 jbd_debug(3, "super block updated\n");
396 mutex_lock(&journal->j_checkpoint_mutex); 396 mutex_lock_io(&journal->j_checkpoint_mutex);
397 /* 397 /*
398 * We hold j_checkpoint_mutex so tail cannot change under us. 398 * We hold j_checkpoint_mutex so tail cannot change under us.
399 * We don't need any special data guarantees for writing sb 399 * We don't need any special data guarantees for writing sb
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index a097048ed1a3..d8a5d0a08f07 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -944,7 +944,7 @@ out:
944 */ 944 */
945void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block) 945void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
946{ 946{
947 mutex_lock(&journal->j_checkpoint_mutex); 947 mutex_lock_io(&journal->j_checkpoint_mutex);
948 if (tid_gt(tid, journal->j_tail_sequence)) 948 if (tid_gt(tid, journal->j_tail_sequence))
949 __jbd2_update_log_tail(journal, tid, block); 949 __jbd2_update_log_tail(journal, tid, block);
950 mutex_unlock(&journal->j_checkpoint_mutex); 950 mutex_unlock(&journal->j_checkpoint_mutex);
@@ -1304,7 +1304,7 @@ static int journal_reset(journal_t *journal)
1304 journal->j_flags |= JBD2_FLUSHED; 1304 journal->j_flags |= JBD2_FLUSHED;
1305 } else { 1305 } else {
1306 /* Lock here to make assertions happy... */ 1306 /* Lock here to make assertions happy... */
1307 mutex_lock(&journal->j_checkpoint_mutex); 1307 mutex_lock_io(&journal->j_checkpoint_mutex);
1308 /* 1308 /*
1309 * Update log tail information. We use REQ_FUA since new 1309 * Update log tail information. We use REQ_FUA since new
1310 * transaction will start reusing journal space and so we 1310 * transaction will start reusing journal space and so we
@@ -1691,7 +1691,7 @@ int jbd2_journal_destroy(journal_t *journal)
1691 spin_lock(&journal->j_list_lock); 1691 spin_lock(&journal->j_list_lock);
1692 while (journal->j_checkpoint_transactions != NULL) { 1692 while (journal->j_checkpoint_transactions != NULL) {
1693 spin_unlock(&journal->j_list_lock); 1693 spin_unlock(&journal->j_list_lock);
1694 mutex_lock(&journal->j_checkpoint_mutex); 1694 mutex_lock_io(&journal->j_checkpoint_mutex);
1695 err = jbd2_log_do_checkpoint(journal); 1695 err = jbd2_log_do_checkpoint(journal);
1696 mutex_unlock(&journal->j_checkpoint_mutex); 1696 mutex_unlock(&journal->j_checkpoint_mutex);
1697 /* 1697 /*
@@ -1713,7 +1713,7 @@ int jbd2_journal_destroy(journal_t *journal)
1713 1713
1714 if (journal->j_sb_buffer) { 1714 if (journal->j_sb_buffer) {
1715 if (!is_journal_aborted(journal)) { 1715 if (!is_journal_aborted(journal)) {
1716 mutex_lock(&journal->j_checkpoint_mutex); 1716 mutex_lock_io(&journal->j_checkpoint_mutex);
1717 1717
1718 write_lock(&journal->j_state_lock); 1718 write_lock(&journal->j_state_lock);
1719 journal->j_tail_sequence = 1719 journal->j_tail_sequence =
@@ -1955,7 +1955,7 @@ int jbd2_journal_flush(journal_t *journal)
1955 spin_lock(&journal->j_list_lock); 1955 spin_lock(&journal->j_list_lock);
1956 while (!err && journal->j_checkpoint_transactions != NULL) { 1956 while (!err && journal->j_checkpoint_transactions != NULL) {
1957 spin_unlock(&journal->j_list_lock); 1957 spin_unlock(&journal->j_list_lock);
1958 mutex_lock(&journal->j_checkpoint_mutex); 1958 mutex_lock_io(&journal->j_checkpoint_mutex);
1959 err = jbd2_log_do_checkpoint(journal); 1959 err = jbd2_log_do_checkpoint(journal);
1960 mutex_unlock(&journal->j_checkpoint_mutex); 1960 mutex_unlock(&journal->j_checkpoint_mutex);
1961 spin_lock(&journal->j_list_lock); 1961 spin_lock(&journal->j_list_lock);
@@ -1965,7 +1965,7 @@ int jbd2_journal_flush(journal_t *journal)
1965 if (is_journal_aborted(journal)) 1965 if (is_journal_aborted(journal))
1966 return -EIO; 1966 return -EIO;
1967 1967
1968 mutex_lock(&journal->j_checkpoint_mutex); 1968 mutex_lock_io(&journal->j_checkpoint_mutex);
1969 if (!err) { 1969 if (!err) {
1970 err = jbd2_cleanup_journal_tail(journal); 1970 err = jbd2_cleanup_journal_tail(journal);
1971 if (err < 0) { 1971 if (err < 0) {