aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorBrian King <brking@linux.vnet.ibm.com>2010-10-27 21:25:12 -0400
committerTheodore Ts'o <tytso@mit.edu>2010-10-27 21:25:12 -0400
commit39e3ac2599a5f9aba499b5f8af809108e70a6163 (patch)
tree16c9d790029c36f217b1689b42869c8739e8ac5e /fs
parent58590b06d79f7ce5ab64ff3b6d537180fa50dc84 (diff)
jbd2: Fix I/O hang in jbd2_journal_release_jbd_inode
This fixes a hang seen in jbd2_journal_release_jbd_inode on a lot of Power 6 systems running with ext4. When we get in the hung state, all I/O to the disk in question gets blocked where we stay indefinitely. Looking at the task list, I can see we are stuck in jbd2_journal_release_jbd_inode waiting on a wake up. I added some debug code to detect this scenario and dump additional data if we were stuck in jbd2_journal_release_jbd_inode for longer than 30 minutes. When it hit, I was able to see that i_flags was 0, suggesting we missed the wake up. This patch changes i_flags to be an unsigned long, uses bit operators to access it, and adds barriers around the accesses. Prior to applying this patch, we were regularly hitting this hang on numerous systems in our test environment. After applying the patch, the hangs no longer occur. Signed-off-by: Brian King <brking@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/jbd2/commit.c12
-rw-r--r--fs/jbd2/journal.c4
2 files changed, 11 insertions, 5 deletions
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7c068c189d80..6494c81e3b0a 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -26,7 +26,9 @@
26#include <linux/backing-dev.h> 26#include <linux/backing-dev.h>
27#include <linux/bio.h> 27#include <linux/bio.h>
28#include <linux/blkdev.h> 28#include <linux/blkdev.h>
29#include <linux/bitops.h>
29#include <trace/events/jbd2.h> 30#include <trace/events/jbd2.h>
31#include <asm/system.h>
30 32
31/* 33/*
32 * Default IO end handler for temporary BJ_IO buffer_heads. 34 * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -236,7 +238,7 @@ static int journal_submit_data_buffers(journal_t *journal,
236 spin_lock(&journal->j_list_lock); 238 spin_lock(&journal->j_list_lock);
237 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 239 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
238 mapping = jinode->i_vfs_inode->i_mapping; 240 mapping = jinode->i_vfs_inode->i_mapping;
239 jinode->i_flags |= JI_COMMIT_RUNNING; 241 set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
240 spin_unlock(&journal->j_list_lock); 242 spin_unlock(&journal->j_list_lock);
241 /* 243 /*
242 * submit the inode data buffers. We use writepage 244 * submit the inode data buffers. We use writepage
@@ -251,7 +253,8 @@ static int journal_submit_data_buffers(journal_t *journal,
251 spin_lock(&journal->j_list_lock); 253 spin_lock(&journal->j_list_lock);
252 J_ASSERT(jinode->i_transaction == commit_transaction); 254 J_ASSERT(jinode->i_transaction == commit_transaction);
253 commit_transaction->t_flushed_data_blocks = 1; 255 commit_transaction->t_flushed_data_blocks = 1;
254 jinode->i_flags &= ~JI_COMMIT_RUNNING; 256 clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
257 smp_mb__after_clear_bit();
255 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 258 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
256 } 259 }
257 spin_unlock(&journal->j_list_lock); 260 spin_unlock(&journal->j_list_lock);
@@ -272,7 +275,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
272 /* For locking, see the comment in journal_submit_data_buffers() */ 275 /* For locking, see the comment in journal_submit_data_buffers() */
273 spin_lock(&journal->j_list_lock); 276 spin_lock(&journal->j_list_lock);
274 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) { 277 list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
275 jinode->i_flags |= JI_COMMIT_RUNNING; 278 set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
276 spin_unlock(&journal->j_list_lock); 279 spin_unlock(&journal->j_list_lock);
277 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); 280 err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
278 if (err) { 281 if (err) {
@@ -288,7 +291,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
288 ret = err; 291 ret = err;
289 } 292 }
290 spin_lock(&journal->j_list_lock); 293 spin_lock(&journal->j_list_lock);
291 jinode->i_flags &= ~JI_COMMIT_RUNNING; 294 clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
295 smp_mb__after_clear_bit();
292 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); 296 wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
293 } 297 }
294 298
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 0e8014ea6b94..75e1b5a0bc2d 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -42,12 +42,14 @@
42#include <linux/log2.h> 42#include <linux/log2.h>
43#include <linux/vmalloc.h> 43#include <linux/vmalloc.h>
44#include <linux/backing-dev.h> 44#include <linux/backing-dev.h>
45#include <linux/bitops.h>
45 46
46#define CREATE_TRACE_POINTS 47#define CREATE_TRACE_POINTS
47#include <trace/events/jbd2.h> 48#include <trace/events/jbd2.h>
48 49
49#include <asm/uaccess.h> 50#include <asm/uaccess.h>
50#include <asm/page.h> 51#include <asm/page.h>
52#include <asm/system.h>
51 53
52EXPORT_SYMBOL(jbd2_journal_extend); 54EXPORT_SYMBOL(jbd2_journal_extend);
53EXPORT_SYMBOL(jbd2_journal_stop); 55EXPORT_SYMBOL(jbd2_journal_stop);
@@ -2206,7 +2208,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
2206restart: 2208restart:
2207 spin_lock(&journal->j_list_lock); 2209 spin_lock(&journal->j_list_lock);
2208 /* Is commit writing out inode - we have to wait */ 2210 /* Is commit writing out inode - we have to wait */
2209 if (jinode->i_flags & JI_COMMIT_RUNNING) { 2211 if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
2210 wait_queue_head_t *wq; 2212 wait_queue_head_t *wq;
2211 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING); 2213 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
2212 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING); 2214 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);