aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJiaying Zhang <jiayingz@google.com>2011-08-13 12:17:13 -0400
committerTheodore Ts'o <tytso@mit.edu>2011-08-13 12:17:13 -0400
commit2581fdc810889fdea97689cb62481201d579c796 (patch)
tree55d7a2244a5efa184e290446c0ee3f61b71bf264
parent441c850857148935babe000fc2ba1455fe54a6a9 (diff)
ext4: call ext4_ioend_wait and ext4_flush_completed_IO in ext4_evict_inode
Flush inode's i_completed_io_list before calling ext4_io_wait to prevent the following deadlock scenario: A page fault happens while some process is writing inode A. During page fault, shrink_icache_memory is called that in turn evicts another inode B. Inode B has some pending io_end work so it calls ext4_ioend_wait() that waits for inode B's i_ioend_count to become zero. However, inode B's ioend work was queued behind some of inode A's ioend work on the same cpu's ext4-dio-unwritten workqueue. As the ext4-dio-unwritten thread on that cpu is processing inode A's ioend work, it tries to grab inode A's i_mutex lock. Since the i_mutex lock of inode A is still hold before the page fault happened, we enter a deadlock. Also moves ext4_flush_completed_IO and ext4_ioend_wait from ext4_destroy_inode() to ext4_evict_inode(). During inode deleteion, ext4_evict_inode() is called before ext4_destroy_inode() and in ext4_evict_inode(), we may call ext4_truncate() without holding i_mutex lock. As a result, there is a race between flush_completed_IO that is called from ext4_ext_truncate() and ext4_end_io_work, which may cause corruption on an io_end structure. This change moves ext4_flush_completed_IO and ext4_ioend_wait from ext4_destroy_inode() to ext4_evict_inode() to resolve the race between ext4_truncate() and ext4_end_io_work during inode deletion. Signed-off-by: Jiaying Zhang <jiayingz@google.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: stable@kernel.org
-rw-r--r--fs/ext4/inode.c6
-rw-r--r--fs/ext4/super.c1
2 files changed, 6 insertions, 1 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ad3a7ca21069..7dd698107822 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -120,6 +120,12 @@ void ext4_evict_inode(struct inode *inode)
120 int err; 120 int err;
121 121
122 trace_ext4_evict_inode(inode); 122 trace_ext4_evict_inode(inode);
123
124 mutex_lock(&inode->i_mutex);
125 ext4_flush_completed_IO(inode);
126 mutex_unlock(&inode->i_mutex);
127 ext4_ioend_wait(inode);
128
123 if (inode->i_nlink) { 129 if (inode->i_nlink) {
124 /* 130 /*
125 * When journalling data dirty buffers are tracked only in the 131 * When journalling data dirty buffers are tracked only in the
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 4687fea0c00f..44d0c8db2239 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -919,7 +919,6 @@ static void ext4_i_callback(struct rcu_head *head)
919 919
920static void ext4_destroy_inode(struct inode *inode) 920static void ext4_destroy_inode(struct inode *inode)
921{ 921{
922 ext4_ioend_wait(inode);
923 if (!list_empty(&(EXT4_I(inode)->i_orphan))) { 922 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
924 ext4_msg(inode->i_sb, KERN_ERR, 923 ext4_msg(inode->i_sb, KERN_ERR,
925 "Inode %lu (%p): orphan list check failed!", 924 "Inode %lu (%p): orphan list check failed!",