aboutsummaryrefslogtreecommitdiffstats
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
authorKen Chen <kenchen@google.com>2007-10-17 02:30:38 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-17 11:43:02 -0400
commit0e0f4fc22ece8e593167eccbb1a4154565c11faa (patch)
tree564ab2eabb31ab945c334706662854bb227f45e9 /fs/fs-writeback.c
parent670e4def6ef5f44315d62748134e535b479c784f (diff)
writeback: fix periodic superblock dirty inode flushing
Current -mm tree has bucketful of bug fixes in periodic writeback path. However, we still hit a glitch where dirty pages on a given inode aren't completely flushed to the disk, and system will accumulate large amount of dirty pages beyond what dirty_expire_interval is designed for. The problem is __sync_single_inode() will move an inode to sb->s_dirty list even when there are more pending dirty pages on that inode. If there is another inode with a small number of dirty pages, we hit a case where the loop iteration in wb_kupdate() terminates prematurely because wbc.nr_to_write > 0. Thus leaving the inode that has large amount of dirty pages behind and it has to wait for another dirty_writeback_interval before we flush it again. We effectively only write out MAX_WRITEBACK_PAGES every dirty_writeback_interval. If the rate of dirtying is sufficiently high, the system will start accumulate a large number of dirty pages. So fix it by having another sb->s_more_io list on which to park the inode while we iterate through sb->s_io and to allow each dirty inode which resides on that sb to have an equal chance of flushing some amount of dirty pages. Signed-off-by: Ken Chen <kenchen@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c36
1 files changed, 14 insertions, 22 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 39fadfad86f7..c9d105ff7970 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -165,25 +165,11 @@ static void redirty_tail(struct inode *inode)
165} 165}
166 166
167/* 167/*
168 * Redirty an inode, but mark it as the very next-to-be-written inode on its 168 * requeue inode for re-scanning after sb->s_io list is exhausted.
169 * superblock's dirty-inode list.
170 * We need to preserve s_dirty's reverse-time-orderedness, so we cheat by
171 * setting this inode's dirtied_when to the same value as that of the inode
172 * which is presently head-of-list, if present head-of-list is newer than this
173 * inode. (head-of-list is the least-recently-dirtied inode: the oldest one).
174 */ 169 */
175static void redirty_head(struct inode *inode) 170static void requeue_io(struct inode *inode)
176{ 171{
177 struct super_block *sb = inode->i_sb; 172 list_move(&inode->i_list, &inode->i_sb->s_more_io);
178
179 if (!list_empty(&sb->s_dirty)) {
180 struct inode *head_inode;
181
182 head_inode = list_entry(sb->s_dirty.prev, struct inode, i_list);
183 if (time_after(inode->dirtied_when, head_inode->dirtied_when))
184 inode->dirtied_when = head_inode->dirtied_when;
185 }
186 list_move_tail(&inode->i_list, &sb->s_dirty);
187} 173}
188 174
189/* 175/*
@@ -255,7 +241,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
255 * uncongested. 241 * uncongested.
256 */ 242 */
257 inode->i_state |= I_DIRTY_PAGES; 243 inode->i_state |= I_DIRTY_PAGES;
258 redirty_head(inode); 244 requeue_io(inode);
259 } else { 245 } else {
260 /* 246 /*
261 * Otherwise fully redirty the inode so that 247 * Otherwise fully redirty the inode so that
@@ -315,7 +301,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
315 * on s_io. We'll have another go at writing back this inode 301 * on s_io. We'll have another go at writing back this inode
316 * when the s_dirty iodes get moved back onto s_io. 302 * when the s_dirty iodes get moved back onto s_io.
317 */ 303 */
318 redirty_head(inode); 304 requeue_io(inode);
319 305
320 /* 306 /*
321 * Even if we don't actually write the inode itself here, 307 * Even if we don't actually write the inode itself here,
@@ -410,14 +396,14 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
410 wbc->encountered_congestion = 1; 396 wbc->encountered_congestion = 1;
411 if (!sb_is_blkdev_sb(sb)) 397 if (!sb_is_blkdev_sb(sb))
412 break; /* Skip a congested fs */ 398 break; /* Skip a congested fs */
413 redirty_head(inode); 399 requeue_io(inode);
414 continue; /* Skip a congested blockdev */ 400 continue; /* Skip a congested blockdev */
415 } 401 }
416 402
417 if (wbc->bdi && bdi != wbc->bdi) { 403 if (wbc->bdi && bdi != wbc->bdi) {
418 if (!sb_is_blkdev_sb(sb)) 404 if (!sb_is_blkdev_sb(sb))
419 break; /* fs has the wrong queue */ 405 break; /* fs has the wrong queue */
420 redirty_head(inode); 406 requeue_io(inode);
421 continue; /* blockdev has wrong queue */ 407 continue; /* blockdev has wrong queue */
422 } 408 }
423 409
@@ -427,8 +413,10 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
427 413
428 /* Was this inode dirtied too recently? */ 414 /* Was this inode dirtied too recently? */
429 if (wbc->older_than_this && time_after(inode->dirtied_when, 415 if (wbc->older_than_this && time_after(inode->dirtied_when,
430 *wbc->older_than_this)) 416 *wbc->older_than_this)) {
417 list_splice_init(&sb->s_io, sb->s_dirty.prev);
431 break; 418 break;
419 }
432 420
433 /* Is another pdflush already flushing this queue? */ 421 /* Is another pdflush already flushing this queue? */
434 if (current_is_pdflush() && !writeback_acquire(bdi)) 422 if (current_is_pdflush() && !writeback_acquire(bdi))
@@ -458,6 +446,10 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
458 if (wbc->nr_to_write <= 0) 446 if (wbc->nr_to_write <= 0)
459 break; 447 break;
460 } 448 }
449
450 if (list_empty(&sb->s_io))
451 list_splice_init(&sb->s_more_io, &sb->s_io);
452
461 return; /* Leave any unwritten inodes on s_io */ 453 return; /* Leave any unwritten inodes on s_io */
462} 454}
463 455