aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJunxiao Bi <junxiao.bi@oracle.com>2013-09-11 17:23:04 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-11 18:58:13 -0400
commit146d7009b45cdb45ec3be8ad73177dae58f4bc91 (patch)
tree70c93baa9df755952c065c334df0bd86e7024889
parent325c4ef5c4b17372c3222d896040d7848e67fbdb (diff)
writeback: fix race that cause writeback hung
There is a race between mark inode dirty and writeback thread, see the following scenario. In this case, writeback thread will not run though there is dirty_io. __mark_inode_dirty() bdi_writeback_workfn() ... ... spin_lock(&inode->i_lock); ... if (bdi_cap_writeback_dirty(bdi)) { <<< assume wb has dirty_io, so wakeup_bdi is false. <<< the following inode_dirty also have wakeup_bdi false. if (!wb_has_dirty_io(&bdi->wb)) wakeup_bdi = true; } spin_unlock(&inode->i_lock); <<< assume last dirty_io is removed here. pages_written = wb_do_writeback(wb); ... <<< work_list empty and wb has no dirty_io, <<< delayed_work will not be queued. if (!list_empty(&bdi->work_list) || (wb_has_dirty_io(wb) && dirty_writeback_interval)) queue_delayed_work(bdi_wq, &wb->dwork, msecs_to_jiffies(dirty_writeback_interval * 10)); spin_lock(&bdi->wb.list_lock); inode->dirtied_when = jiffies; <<< new dirty_io is added. list_move(&inode->i_wb_list, &bdi->wb.b_dirty); spin_unlock(&bdi->wb.list_lock); <<< though there is dirty_io, but wakeup_bdi is false, <<< so writeback thread will not be waked up and <<< the new dirty_io will not be flushed. if (wakeup_bdi) bdi_wakeup_thread_delayed(bdi); Writeback will run until there is a new flush work queued. This may cause a lot of dirty pages stay in memory for a long time. Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com> Reviewed-by: Jan Kara <jack@suse.cz> Cc: Fengguang Wu <fengguang.wu@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/fs-writeback.c4
1 files changed, 2 insertions, 2 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 54b3c31c2f0d..30f6f27d5a59 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1171,6 +1171,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1171 bool wakeup_bdi = false; 1171 bool wakeup_bdi = false;
1172 bdi = inode_to_bdi(inode); 1172 bdi = inode_to_bdi(inode);
1173 1173
1174 spin_unlock(&inode->i_lock);
1175 spin_lock(&bdi->wb.list_lock);
1174 if (bdi_cap_writeback_dirty(bdi)) { 1176 if (bdi_cap_writeback_dirty(bdi)) {
1175 WARN(!test_bit(BDI_registered, &bdi->state), 1177 WARN(!test_bit(BDI_registered, &bdi->state),
1176 "bdi-%s not registered\n", bdi->name); 1178 "bdi-%s not registered\n", bdi->name);
@@ -1185,8 +1187,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1185 wakeup_bdi = true; 1187 wakeup_bdi = true;
1186 } 1188 }
1187 1189
1188 spin_unlock(&inode->i_lock);
1189 spin_lock(&bdi->wb.list_lock);
1190 inode->dirtied_when = jiffies; 1190 inode->dirtied_when = jiffies;
1191 list_move(&inode->i_wb_list, &bdi->wb.b_dirty); 1191 list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
1192 spin_unlock(&bdi->wb.list_lock); 1192 spin_unlock(&bdi->wb.list_lock);