diff options
author | Theodore Ts'o <tytso@mit.edu> | 2013-03-20 09:39:42 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2013-03-20 09:39:42 -0400 |
commit | 1ada47d9468fe3907f7f9e00179168f5e2f90803 (patch) | |
tree | 0f3e3a17af30c8fafac2efc5833c5743bcfb7007 | |
parent | 0e401101db49959f5783f6ee9e676124b5a183ac (diff) |
ext4: fix ext4_evict_inode() racing against workqueue processing code
Commit 84c17543ab56 (ext4: move work from io_end to inode) triggered a
regression when running xfstest #270 when the file system is mounted
with dioread_nolock.
The problem is that after ext4_evict_inode() calls ext4_ioend_wait(),
this guarantees that last io_end structure has been freed, but it does
not guarantee that the workqueue structure, which was moved into the
inode by commit 84c17543ab56, is actually finished. Once
ext4_flush_completed_IO() calls ext4_free_io_end() on CPU #1, this
will allow ext4_ioend_wait() to return on CPU #2, at which point the
evict_inode() codepath can race against the workqueue code on CPU #1
accessing EXT4_I(inode)->i_unwritten_work to find the next item of
work to do.
Fix this by calling cancel_work_sync() in ext4_ioend_wait(), which
will be renamed ext4_ioend_shutdown(), since it is only used by
ext4_evict_inode(). Also, move the call to ext4_ioend_shutdown()
until after truncate_inode_pages() and filemap_write_and_wait() are
called, to make sure all dirty pages have been written back and
flushed from the page cache first.
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<c01dda6a>] cwq_activate_delayed_work+0x3b/0x7e
*pdpt = 0000000030bc3001 *pde = 0000000000000000
Oops: 0000 [#1] SMP DEBUG_PAGEALLOC
Modules linked in:
Pid: 6, comm: kworker/u:0 Not tainted 3.8.0-rc3-00013-g84c1754-dirty #91 Bochs Bochs
EIP: 0060:[<c01dda6a>] EFLAGS: 00010046 CPU: 0
EIP is at cwq_activate_delayed_work+0x3b/0x7e
EAX: 00000000 EBX: 00000000 ECX: f505fe54 EDX: 00000000
ESI: ed5b697c EDI: 00000006 EBP: f64b7e8c ESP: f64b7e84
DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068
CR0: 8005003b CR2: 00000000 CR3: 30bc2000 CR4: 000006f0
DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000
DR6: ffff0ff0 DR7: 00000400
Process kworker/u:0 (pid: 6, ti=f64b6000 task=f64b4160 task.ti=f64b6000)
Stack:
f505fe00 00000006 f64b7e9c c01de3d7 f6435540 00000003 f64b7efc c01def1d
f6435540 00000002 00000000 0000008a c16d0808 c040a10b c16d07d8 c16d08b0
f505fe00 c16d0780 00000000 00000000 ee153df4 c1ce4a30 c17d0e30 00000000
Call Trace:
[<c01de3d7>] cwq_dec_nr_in_flight+0x71/0xfb
[<c01def1d>] process_one_work+0x5d8/0x637
[<c040a10b>] ? ext4_end_bio+0x300/0x300
[<c01e3105>] worker_thread+0x249/0x3ef
[<c01ea317>] kthread+0xd8/0xeb
[<c01e2ebc>] ? manage_workers+0x4bb/0x4bb
[<c023a370>] ? trace_hardirqs_on+0x27/0x37
[<c0f1b4b7>] ret_from_kernel_thread+0x1b/0x28
[<c01ea23f>] ? __init_kthread_worker+0x71/0x71
Code: 01 83 15 ac ff 6c c1 00 31 db 89 c6 8b 00 a8 04 74 12 89 c3 30 db 83 05 b0 ff 6c c1 01 83 15 b4 ff 6c c1 00 89 f0 e8 42 ff ff ff <8b> 13 89 f0 83 05 b8 ff 6c c1
6c c1 00 31 c9 83
EIP: [<c01dda6a>] cwq_activate_delayed_work+0x3b/0x7e SS:ESP 0068:f64b7e84
CR2: 0000000000000000
---[ end trace a1923229da53d8a4 ]---
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: Jan Kara <jack@suse.cz>
-rw-r--r-- | fs/ext4/ext4.h | 2 | ||||
-rw-r--r-- | fs/ext4/inode.c | 4 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 12 |
3 files changed, 14 insertions, 4 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 167ff564bbfa..3b83cd604796 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -2617,7 +2617,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
2617 | extern int __init ext4_init_pageio(void); | 2617 | extern int __init ext4_init_pageio(void); |
2618 | extern void ext4_add_complete_io(ext4_io_end_t *io_end); | 2618 | extern void ext4_add_complete_io(ext4_io_end_t *io_end); |
2619 | extern void ext4_exit_pageio(void); | 2619 | extern void ext4_exit_pageio(void); |
2620 | extern void ext4_ioend_wait(struct inode *); | 2620 | extern void ext4_ioend_shutdown(struct inode *); |
2621 | extern void ext4_free_io_end(ext4_io_end_t *io); | 2621 | extern void ext4_free_io_end(ext4_io_end_t *io); |
2622 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | 2622 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); |
2623 | extern void ext4_end_io_work(struct work_struct *work); | 2623 | extern void ext4_end_io_work(struct work_struct *work); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 65bbc9339aca..ea5f24ffa60c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -185,8 +185,6 @@ void ext4_evict_inode(struct inode *inode) | |||
185 | 185 | ||
186 | trace_ext4_evict_inode(inode); | 186 | trace_ext4_evict_inode(inode); |
187 | 187 | ||
188 | ext4_ioend_wait(inode); | ||
189 | |||
190 | if (inode->i_nlink) { | 188 | if (inode->i_nlink) { |
191 | /* | 189 | /* |
192 | * When journalling data dirty buffers are tracked only in the | 190 | * When journalling data dirty buffers are tracked only in the |
@@ -216,6 +214,7 @@ void ext4_evict_inode(struct inode *inode) | |||
216 | filemap_write_and_wait(&inode->i_data); | 214 | filemap_write_and_wait(&inode->i_data); |
217 | } | 215 | } |
218 | truncate_inode_pages(&inode->i_data, 0); | 216 | truncate_inode_pages(&inode->i_data, 0); |
217 | ext4_ioend_shutdown(inode); | ||
219 | goto no_delete; | 218 | goto no_delete; |
220 | } | 219 | } |
221 | 220 | ||
@@ -225,6 +224,7 @@ void ext4_evict_inode(struct inode *inode) | |||
225 | if (ext4_should_order_data(inode)) | 224 | if (ext4_should_order_data(inode)) |
226 | ext4_begin_ordered_truncate(inode, 0); | 225 | ext4_begin_ordered_truncate(inode, 0); |
227 | truncate_inode_pages(&inode->i_data, 0); | 226 | truncate_inode_pages(&inode->i_data, 0); |
227 | ext4_ioend_shutdown(inode); | ||
228 | 228 | ||
229 | if (is_bad_inode(inode)) | 229 | if (is_bad_inode(inode)) |
230 | goto no_delete; | 230 | goto no_delete; |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 809b31003ecc..047a6de04a0a 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -50,11 +50,21 @@ void ext4_exit_pageio(void) | |||
50 | kmem_cache_destroy(io_page_cachep); | 50 | kmem_cache_destroy(io_page_cachep); |
51 | } | 51 | } |
52 | 52 | ||
53 | void ext4_ioend_wait(struct inode *inode) | 53 | /* |
54 | * This function is called by ext4_evict_inode() to make sure there is | ||
55 | * no more pending I/O completion work left to do. | ||
56 | */ | ||
57 | void ext4_ioend_shutdown(struct inode *inode) | ||
54 | { | 58 | { |
55 | wait_queue_head_t *wq = ext4_ioend_wq(inode); | 59 | wait_queue_head_t *wq = ext4_ioend_wq(inode); |
56 | 60 | ||
57 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); | 61 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); |
62 | /* | ||
63 | * We need to make sure the work structure is finished being | ||
64 | * used before we let the inode get destroyed. | ||
65 | */ | ||
66 | if (work_pending(&EXT4_I(inode)->i_unwritten_work)) | ||
67 | cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); | ||
58 | } | 68 | } |
59 | 69 | ||
60 | static void put_io_page(struct ext4_io_page *io_page) | 70 | static void put_io_page(struct ext4_io_page *io_page) |