diff options
author | Junxiao Bi <junxiao.bi@oracle.com> | 2013-09-11 17:23:04 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-11 18:58:13 -0400 |
commit | 146d7009b45cdb45ec3be8ad73177dae58f4bc91 (patch) | |
tree | 70c93baa9df755952c065c334df0bd86e7024889 | |
parent | 325c4ef5c4b17372c3222d896040d7848e67fbdb (diff) |
writeback: fix race that cause writeback hung
There is a race between mark inode dirty and writeback thread, see the
following scenario. In this case, writeback thread will not run though
there is dirty_io.
__mark_inode_dirty() bdi_writeback_workfn()
... ...
spin_lock(&inode->i_lock);
...
if (bdi_cap_writeback_dirty(bdi)) {
<<< assume wb has dirty_io, so wakeup_bdi is false.
<<< the following inode_dirty also have wakeup_bdi false.
if (!wb_has_dirty_io(&bdi->wb))
wakeup_bdi = true;
}
spin_unlock(&inode->i_lock);
<<< assume last dirty_io is removed here.
pages_written = wb_do_writeback(wb);
...
<<< work_list empty and wb has no dirty_io,
<<< delayed_work will not be queued.
if (!list_empty(&bdi->work_list) ||
(wb_has_dirty_io(wb) && dirty_writeback_interval))
queue_delayed_work(bdi_wq, &wb->dwork,
msecs_to_jiffies(dirty_writeback_interval * 10));
spin_lock(&bdi->wb.list_lock);
inode->dirtied_when = jiffies;
<<< new dirty_io is added.
list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
spin_unlock(&bdi->wb.list_lock);
<<< though there is dirty_io, but wakeup_bdi is false,
<<< so writeback thread will not be waked up and
<<< the new dirty_io will not be flushed.
if (wakeup_bdi)
bdi_wakeup_thread_delayed(bdi);
Writeback will run until there is a new flush work queued. This may cause
a lot of dirty pages stay in memory for a long time.
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/fs-writeback.c | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 54b3c31c2f0d..30f6f27d5a59 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -1171,6 +1171,8 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1171 | bool wakeup_bdi = false; | 1171 | bool wakeup_bdi = false; |
1172 | bdi = inode_to_bdi(inode); | 1172 | bdi = inode_to_bdi(inode); |
1173 | 1173 | ||
1174 | spin_unlock(&inode->i_lock); | ||
1175 | spin_lock(&bdi->wb.list_lock); | ||
1174 | if (bdi_cap_writeback_dirty(bdi)) { | 1176 | if (bdi_cap_writeback_dirty(bdi)) { |
1175 | WARN(!test_bit(BDI_registered, &bdi->state), | 1177 | WARN(!test_bit(BDI_registered, &bdi->state), |
1176 | "bdi-%s not registered\n", bdi->name); | 1178 | "bdi-%s not registered\n", bdi->name); |
@@ -1185,8 +1187,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1185 | wakeup_bdi = true; | 1187 | wakeup_bdi = true; |
1186 | } | 1188 | } |
1187 | 1189 | ||
1188 | spin_unlock(&inode->i_lock); | ||
1189 | spin_lock(&bdi->wb.list_lock); | ||
1190 | inode->dirtied_when = jiffies; | 1190 | inode->dirtied_when = jiffies; |
1191 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); | 1191 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); |
1192 | spin_unlock(&bdi->wb.list_lock); | 1192 | spin_unlock(&bdi->wb.list_lock); |