aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFengguang Wu <wfg@mail.ustc.edu.cn>2007-10-17 02:30:39 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-17 11:43:02 -0400
commit2c1365791048e8aff42138ed5f6040b3c7824a69 (patch)
tree3c8de64f6b4995125f3f6171fdf175232a412783
parent0e0f4fc22ece8e593167eccbb1a4154565c11faa (diff)
writeback: fix time ordering of the per superblock inode lists 8
Streamline the management of dirty inode lists and fix time ordering bugs. The writeback logic used to move not-yet-expired dirty inodes from s_dirty to s_io, *only to* move them back. The move-inodes-back-and-forth thing is a mess, which is eliminated by this patch. The new scheme is: - s_dirty acts as a time ordered io delaying queue; - s_io/s_more_io together acts as an io dispatching queue. On kupdate writeback, we pull some inodes from s_dirty to s_io at the start of every full scan of s_io. Otherwise (i.e. for sync/throttle/background writeback), we always pull from s_dirty on each run (a partial scan). Note that the line list_splice_init(&sb->s_more_io, &sb->s_io); is moved to queue_io() to leave s_io empty. Otherwise a big dirtied file will sit in s_io for a long time, preventing new expired inodes to get in. Cc: Ken Chen <kenchen@google.com> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/fs-writeback.c61
1 files changed, 38 insertions, 23 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index c9d105ff7970..1f22fb5217c0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -119,7 +119,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
119 goto out; 119 goto out;
120 120
121 /* 121 /*
122 * If the inode was already on s_dirty or s_io, don't 122 * If the inode was already on s_dirty/s_io/s_more_io, don't
123 * reposition it (that would break s_dirty time-ordering). 123 * reposition it (that would break s_dirty time-ordering).
124 */ 124 */
125 if (!was_dirty) { 125 if (!was_dirty) {
@@ -173,6 +173,33 @@ static void requeue_io(struct inode *inode)
173} 173}
174 174
175/* 175/*
176 * Move expired dirty inodes from @delaying_queue to @dispatch_queue.
177 */
178static void move_expired_inodes(struct list_head *delaying_queue,
179 struct list_head *dispatch_queue,
180 unsigned long *older_than_this)
181{
182 while (!list_empty(delaying_queue)) {
183 struct inode *inode = list_entry(delaying_queue->prev,
184 struct inode, i_list);
185 if (older_than_this &&
186 time_after(inode->dirtied_when, *older_than_this))
187 break;
188 list_move(&inode->i_list, dispatch_queue);
189 }
190}
191
192/*
193 * Queue all expired dirty inodes for io, eldest first.
194 */
195static void queue_io(struct super_block *sb,
196 unsigned long *older_than_this)
197{
198 list_splice_init(&sb->s_more_io, sb->s_io.prev);
199 move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this);
200}
201
202/*
176 * Write a single inode's dirty pages and inode data out to disk. 203 * Write a single inode's dirty pages and inode data out to disk.
177 * If `wait' is set, wait on the writeout. 204 * If `wait' is set, wait on the writeout.
178 * 205 *
@@ -222,7 +249,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
222 /* 249 /*
223 * We didn't write back all the pages. nfs_writepages() 250 * We didn't write back all the pages. nfs_writepages()
224 * sometimes bales out without doing anything. Redirty 251 * sometimes bales out without doing anything. Redirty
225 * the inode. It is moved from s_io onto s_dirty. 252 * the inode; Move it from s_io onto s_more_io/s_dirty.
226 */ 253 */
227 /* 254 /*
228 * akpm: if the caller was the kupdate function we put 255 * akpm: if the caller was the kupdate function we put
@@ -235,10 +262,9 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
235 */ 262 */
236 if (wbc->for_kupdate) { 263 if (wbc->for_kupdate) {
237 /* 264 /*
238 * For the kupdate function we leave the inode 265 * For the kupdate function we move the inode
239 * at the head of sb_dirty so it will get more 266 * to s_more_io so it will get more writeout as
240 * writeout as soon as the queue becomes 267 * soon as the queue becomes uncongested.
241 * uncongested.
242 */ 268 */
243 inode->i_state |= I_DIRTY_PAGES; 269 inode->i_state |= I_DIRTY_PAGES;
244 requeue_io(inode); 270 requeue_io(inode);
@@ -296,10 +322,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
296 322
297 /* 323 /*
298 * We're skipping this inode because it's locked, and we're not 324 * We're skipping this inode because it's locked, and we're not
299 * doing writeback-for-data-integrity. Move it to the head of 325 * doing writeback-for-data-integrity. Move it to s_more_io so
300 * s_dirty so that writeback can proceed with the other inodes 326 * that writeback can proceed with the other inodes on s_io.
301 * on s_io. We'll have another go at writing back this inode 327 * We'll have another go at writing back this inode when we
302 * when the s_dirty iodes get moved back onto s_io. 328 * completed a full scan of s_io.
303 */ 329 */
304 requeue_io(inode); 330 requeue_io(inode);
305 331
@@ -366,7 +392,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
366 const unsigned long start = jiffies; /* livelock avoidance */ 392 const unsigned long start = jiffies; /* livelock avoidance */
367 393
368 if (!wbc->for_kupdate || list_empty(&sb->s_io)) 394 if (!wbc->for_kupdate || list_empty(&sb->s_io))
369 list_splice_init(&sb->s_dirty, &sb->s_io); 395 queue_io(sb, wbc->older_than_this);
370 396
371 while (!list_empty(&sb->s_io)) { 397 while (!list_empty(&sb->s_io)) {
372 struct inode *inode = list_entry(sb->s_io.prev, 398 struct inode *inode = list_entry(sb->s_io.prev,
@@ -411,13 +437,6 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
411 if (time_after(inode->dirtied_when, start)) 437 if (time_after(inode->dirtied_when, start))
412 break; 438 break;
413 439
414 /* Was this inode dirtied too recently? */
415 if (wbc->older_than_this && time_after(inode->dirtied_when,
416 *wbc->older_than_this)) {
417 list_splice_init(&sb->s_io, sb->s_dirty.prev);
418 break;
419 }
420
421 /* Is another pdflush already flushing this queue? */ 440 /* Is another pdflush already flushing this queue? */
422 if (current_is_pdflush() && !writeback_acquire(bdi)) 441 if (current_is_pdflush() && !writeback_acquire(bdi))
423 break; 442 break;
@@ -446,10 +465,6 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
446 if (wbc->nr_to_write <= 0) 465 if (wbc->nr_to_write <= 0)
447 break; 466 break;
448 } 467 }
449
450 if (list_empty(&sb->s_io))
451 list_splice_init(&sb->s_more_io, &sb->s_io);
452
453 return; /* Leave any unwritten inodes on s_io */ 468 return; /* Leave any unwritten inodes on s_io */
454} 469}
455 470
@@ -459,7 +474,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
459 * Note: 474 * Note:
460 * We don't need to grab a reference to superblock here. If it has non-empty 475 * We don't need to grab a reference to superblock here. If it has non-empty
461 * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed 476 * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed
462 * past sync_inodes_sb() until both the ->s_dirty and ->s_io lists are 477 * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all
463 * empty. Since __sync_single_inode() regains inode_lock before it finally moves 478 * empty. Since __sync_single_inode() regains inode_lock before it finally moves
464 * inode from superblock lists we are OK. 479 * inode from superblock lists we are OK.
465 * 480 *