aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2013-03-20 09:39:42 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-03-20 09:39:42 -0400
commit1ada47d9468fe3907f7f9e00179168f5e2f90803 (patch)
tree0f3e3a17af30c8fafac2efc5833c5743bcfb7007
parent0e401101db49959f5783f6ee9e676124b5a183ac (diff)
ext4: fix ext4_evict_inode() racing against workqueue processing code
Commit 84c17543ab56 (ext4: move work from io_end to inode) triggered a regression when running xfstest #270 when the file system is mounted with dioread_nolock. The problem is that after ext4_evict_inode() calls ext4_ioend_wait(), this guarantees that last io_end structure has been freed, but it does not guarantee that the workqueue structure, which was moved into the inode by commit 84c17543ab56, is actually finished. Once ext4_flush_completed_IO() calls ext4_free_io_end() on CPU #1, this will allow ext4_ioend_wait() to return on CPU #2, at which point the evict_inode() codepath can race against the workqueue code on CPU #1 accessing EXT4_I(inode)->i_unwritten_work to find the next item of work to do. Fix this by calling cancel_work_sync() in ext4_ioend_wait(), which will be renamed ext4_ioend_shutdown(), since it is only used by ext4_evict_inode(). Also, move the call to ext4_ioend_shutdown() until after truncate_inode_pages() and filemap_write_and_wait() are called, to make sure all dirty pages have been written back and flushed from the page cache first. BUG: unable to handle kernel NULL pointer dereference at (null) IP: [<c01dda6a>] cwq_activate_delayed_work+0x3b/0x7e *pdpt = 0000000030bc3001 *pde = 0000000000000000 Oops: 0000 [#1] SMP DEBUG_PAGEALLOC Modules linked in: Pid: 6, comm: kworker/u:0 Not tainted 3.8.0-rc3-00013-g84c1754-dirty #91 Bochs Bochs EIP: 0060:[<c01dda6a>] EFLAGS: 00010046 CPU: 0 EIP is at cwq_activate_delayed_work+0x3b/0x7e EAX: 00000000 EBX: 00000000 ECX: f505fe54 EDX: 00000000 ESI: ed5b697c EDI: 00000006 EBP: f64b7e8c ESP: f64b7e84 DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 CR0: 8005003b CR2: 00000000 CR3: 30bc2000 CR4: 000006f0 DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000 DR6: ffff0ff0 DR7: 00000400 Process kworker/u:0 (pid: 6, ti=f64b6000 task=f64b4160 task.ti=f64b6000) Stack: f505fe00 00000006 f64b7e9c c01de3d7 f6435540 00000003 f64b7efc c01def1d f6435540 00000002 00000000 0000008a c16d0808 c040a10b c16d07d8 c16d08b0 f505fe00 c16d0780 00000000 00000000 ee153df4 c1ce4a30 c17d0e30 00000000 Call Trace: [<c01de3d7>] cwq_dec_nr_in_flight+0x71/0xfb [<c01def1d>] process_one_work+0x5d8/0x637 [<c040a10b>] ? ext4_end_bio+0x300/0x300 [<c01e3105>] worker_thread+0x249/0x3ef [<c01ea317>] kthread+0xd8/0xeb [<c01e2ebc>] ? manage_workers+0x4bb/0x4bb [<c023a370>] ? trace_hardirqs_on+0x27/0x37 [<c0f1b4b7>] ret_from_kernel_thread+0x1b/0x28 [<c01ea23f>] ? __init_kthread_worker+0x71/0x71 Code: 01 83 15 ac ff 6c c1 00 31 db 89 c6 8b 00 a8 04 74 12 89 c3 30 db 83 05 b0 ff 6c c1 01 83 15 b4 ff 6c c1 00 89 f0 e8 42 ff ff ff <8b> 13 89 f0 83 05 b8 ff 6c c1 6c c1 00 31 c9 83 EIP: [<c01dda6a>] cwq_activate_delayed_work+0x3b/0x7e SS:ESP 0068:f64b7e84 CR2: 0000000000000000 ---[ end trace a1923229da53d8a4 ]--- Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> Cc: Jan Kara <jack@suse.cz>
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/ext4/page-io.c12
3 files changed, 14 insertions, 4 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 167ff564bbfa..3b83cd604796 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2617,7 +2617,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2617extern int __init ext4_init_pageio(void); 2617extern int __init ext4_init_pageio(void);
2618extern void ext4_add_complete_io(ext4_io_end_t *io_end); 2618extern void ext4_add_complete_io(ext4_io_end_t *io_end);
2619extern void ext4_exit_pageio(void); 2619extern void ext4_exit_pageio(void);
2620extern void ext4_ioend_wait(struct inode *); 2620extern void ext4_ioend_shutdown(struct inode *);
2621extern void ext4_free_io_end(ext4_io_end_t *io); 2621extern void ext4_free_io_end(ext4_io_end_t *io);
2622extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2622extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2623extern void ext4_end_io_work(struct work_struct *work); 2623extern void ext4_end_io_work(struct work_struct *work);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 65bbc9339aca..ea5f24ffa60c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -185,8 +185,6 @@ void ext4_evict_inode(struct inode *inode)
185 185
186 trace_ext4_evict_inode(inode); 186 trace_ext4_evict_inode(inode);
187 187
188 ext4_ioend_wait(inode);
189
190 if (inode->i_nlink) { 188 if (inode->i_nlink) {
191 /* 189 /*
192 * When journalling data dirty buffers are tracked only in the 190 * When journalling data dirty buffers are tracked only in the
@@ -216,6 +214,7 @@ void ext4_evict_inode(struct inode *inode)
216 filemap_write_and_wait(&inode->i_data); 214 filemap_write_and_wait(&inode->i_data);
217 } 215 }
218 truncate_inode_pages(&inode->i_data, 0); 216 truncate_inode_pages(&inode->i_data, 0);
217 ext4_ioend_shutdown(inode);
219 goto no_delete; 218 goto no_delete;
220 } 219 }
221 220
@@ -225,6 +224,7 @@ void ext4_evict_inode(struct inode *inode)
225 if (ext4_should_order_data(inode)) 224 if (ext4_should_order_data(inode))
226 ext4_begin_ordered_truncate(inode, 0); 225 ext4_begin_ordered_truncate(inode, 0);
227 truncate_inode_pages(&inode->i_data, 0); 226 truncate_inode_pages(&inode->i_data, 0);
227 ext4_ioend_shutdown(inode);
228 228
229 if (is_bad_inode(inode)) 229 if (is_bad_inode(inode))
230 goto no_delete; 230 goto no_delete;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 809b31003ecc..047a6de04a0a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -50,11 +50,21 @@ void ext4_exit_pageio(void)
50 kmem_cache_destroy(io_page_cachep); 50 kmem_cache_destroy(io_page_cachep);
51} 51}
52 52
53void ext4_ioend_wait(struct inode *inode) 53/*
54 * This function is called by ext4_evict_inode() to make sure there is
55 * no more pending I/O completion work left to do.
56 */
57void ext4_ioend_shutdown(struct inode *inode)
54{ 58{
55 wait_queue_head_t *wq = ext4_ioend_wq(inode); 59 wait_queue_head_t *wq = ext4_ioend_wq(inode);
56 60
57 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); 61 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
62 /*
63 * We need to make sure the work structure is finished being
64 * used before we let the inode get destroyed.
65 */
66 if (work_pending(&EXT4_I(inode)->i_unwritten_work))
67 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
58} 68}
59 69
60static void put_io_page(struct ext4_io_page *io_page) 70static void put_io_page(struct ext4_io_page *io_page)