diff options
author | Fengguang Wu <wfg@mail.ustc.edu.cn> | 2007-10-17 02:30:39 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-10-17 11:43:02 -0400 |
commit | 2c1365791048e8aff42138ed5f6040b3c7824a69 (patch) | |
tree | 3c8de64f6b4995125f3f6171fdf175232a412783 | |
parent | 0e0f4fc22ece8e593167eccbb1a4154565c11faa (diff) |
writeback: fix time ordering of the per superblock inode lists 8
Streamline the management of dirty inode lists and fix time ordering bugs.
The writeback logic used to move not-yet-expired dirty inodes from s_dirty to
s_io, *only to* move them back. The move-inodes-back-and-forth thing is a
mess, which is eliminated by this patch.
The new scheme is:
- s_dirty acts as a time ordered io delaying queue;
- s_io/s_more_io together acts as an io dispatching queue.
On kupdate writeback, we pull some inodes from s_dirty to s_io at the start of
every full scan of s_io. Otherwise (i.e. for sync/throttle/background
writeback), we always pull from s_dirty on each run (a partial scan).
Note that the line
list_splice_init(&sb->s_more_io, &sb->s_io);
is moved to queue_io() to leave s_io empty. Otherwise a big dirtied file will
sit in s_io for a long time, preventing new expired inodes to get in.
Cc: Ken Chen <kenchen@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Fengguang Wu <wfg@mail.ustc.edu.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/fs-writeback.c | 61 |
1 files changed, 38 insertions, 23 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index c9d105ff7970..1f22fb5217c0 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -119,7 +119,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
119 | goto out; | 119 | goto out; |
120 | 120 | ||
121 | /* | 121 | /* |
122 | * If the inode was already on s_dirty or s_io, don't | 122 | * If the inode was already on s_dirty/s_io/s_more_io, don't |
123 | * reposition it (that would break s_dirty time-ordering). | 123 | * reposition it (that would break s_dirty time-ordering). |
124 | */ | 124 | */ |
125 | if (!was_dirty) { | 125 | if (!was_dirty) { |
@@ -173,6 +173,33 @@ static void requeue_io(struct inode *inode) | |||
173 | } | 173 | } |
174 | 174 | ||
175 | /* | 175 | /* |
176 | * Move expired dirty inodes from @delaying_queue to @dispatch_queue. | ||
177 | */ | ||
178 | static void move_expired_inodes(struct list_head *delaying_queue, | ||
179 | struct list_head *dispatch_queue, | ||
180 | unsigned long *older_than_this) | ||
181 | { | ||
182 | while (!list_empty(delaying_queue)) { | ||
183 | struct inode *inode = list_entry(delaying_queue->prev, | ||
184 | struct inode, i_list); | ||
185 | if (older_than_this && | ||
186 | time_after(inode->dirtied_when, *older_than_this)) | ||
187 | break; | ||
188 | list_move(&inode->i_list, dispatch_queue); | ||
189 | } | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * Queue all expired dirty inodes for io, eldest first. | ||
194 | */ | ||
195 | static void queue_io(struct super_block *sb, | ||
196 | unsigned long *older_than_this) | ||
197 | { | ||
198 | list_splice_init(&sb->s_more_io, sb->s_io.prev); | ||
199 | move_expired_inodes(&sb->s_dirty, &sb->s_io, older_than_this); | ||
200 | } | ||
201 | |||
202 | /* | ||
176 | * Write a single inode's dirty pages and inode data out to disk. | 203 | * Write a single inode's dirty pages and inode data out to disk. |
177 | * If `wait' is set, wait on the writeout. | 204 | * If `wait' is set, wait on the writeout. |
178 | * | 205 | * |
@@ -222,7 +249,7 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
222 | /* | 249 | /* |
223 | * We didn't write back all the pages. nfs_writepages() | 250 | * We didn't write back all the pages. nfs_writepages() |
224 | * sometimes bales out without doing anything. Redirty | 251 | * sometimes bales out without doing anything. Redirty |
225 | * the inode. It is moved from s_io onto s_dirty. | 252 | * the inode; Move it from s_io onto s_more_io/s_dirty. |
226 | */ | 253 | */ |
227 | /* | 254 | /* |
228 | * akpm: if the caller was the kupdate function we put | 255 | * akpm: if the caller was the kupdate function we put |
@@ -235,10 +262,9 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
235 | */ | 262 | */ |
236 | if (wbc->for_kupdate) { | 263 | if (wbc->for_kupdate) { |
237 | /* | 264 | /* |
238 | * For the kupdate function we leave the inode | 265 | * For the kupdate function we move the inode |
239 | * at the head of sb_dirty so it will get more | 266 | * to s_more_io so it will get more writeout as |
240 | * writeout as soon as the queue becomes | 267 | * soon as the queue becomes uncongested. |
241 | * uncongested. | ||
242 | */ | 268 | */ |
243 | inode->i_state |= I_DIRTY_PAGES; | 269 | inode->i_state |= I_DIRTY_PAGES; |
244 | requeue_io(inode); | 270 | requeue_io(inode); |
@@ -296,10 +322,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
296 | 322 | ||
297 | /* | 323 | /* |
298 | * We're skipping this inode because it's locked, and we're not | 324 | * We're skipping this inode because it's locked, and we're not |
299 | * doing writeback-for-data-integrity. Move it to the head of | 325 | * doing writeback-for-data-integrity. Move it to s_more_io so |
300 | * s_dirty so that writeback can proceed with the other inodes | 326 | * that writeback can proceed with the other inodes on s_io. |
301 | * on s_io. We'll have another go at writing back this inode | 327 | * We'll have another go at writing back this inode when we |
302 | * when the s_dirty iodes get moved back onto s_io. | 328 | * completed a full scan of s_io. |
303 | */ | 329 | */ |
304 | requeue_io(inode); | 330 | requeue_io(inode); |
305 | 331 | ||
@@ -366,7 +392,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) | |||
366 | const unsigned long start = jiffies; /* livelock avoidance */ | 392 | const unsigned long start = jiffies; /* livelock avoidance */ |
367 | 393 | ||
368 | if (!wbc->for_kupdate || list_empty(&sb->s_io)) | 394 | if (!wbc->for_kupdate || list_empty(&sb->s_io)) |
369 | list_splice_init(&sb->s_dirty, &sb->s_io); | 395 | queue_io(sb, wbc->older_than_this); |
370 | 396 | ||
371 | while (!list_empty(&sb->s_io)) { | 397 | while (!list_empty(&sb->s_io)) { |
372 | struct inode *inode = list_entry(sb->s_io.prev, | 398 | struct inode *inode = list_entry(sb->s_io.prev, |
@@ -411,13 +437,6 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) | |||
411 | if (time_after(inode->dirtied_when, start)) | 437 | if (time_after(inode->dirtied_when, start)) |
412 | break; | 438 | break; |
413 | 439 | ||
414 | /* Was this inode dirtied too recently? */ | ||
415 | if (wbc->older_than_this && time_after(inode->dirtied_when, | ||
416 | *wbc->older_than_this)) { | ||
417 | list_splice_init(&sb->s_io, sb->s_dirty.prev); | ||
418 | break; | ||
419 | } | ||
420 | |||
421 | /* Is another pdflush already flushing this queue? */ | 440 | /* Is another pdflush already flushing this queue? */ |
422 | if (current_is_pdflush() && !writeback_acquire(bdi)) | 441 | if (current_is_pdflush() && !writeback_acquire(bdi)) |
423 | break; | 442 | break; |
@@ -446,10 +465,6 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) | |||
446 | if (wbc->nr_to_write <= 0) | 465 | if (wbc->nr_to_write <= 0) |
447 | break; | 466 | break; |
448 | } | 467 | } |
449 | |||
450 | if (list_empty(&sb->s_io)) | ||
451 | list_splice_init(&sb->s_more_io, &sb->s_io); | ||
452 | |||
453 | return; /* Leave any unwritten inodes on s_io */ | 468 | return; /* Leave any unwritten inodes on s_io */ |
454 | } | 469 | } |
455 | 470 | ||
@@ -459,7 +474,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) | |||
459 | * Note: | 474 | * Note: |
460 | * We don't need to grab a reference to superblock here. If it has non-empty | 475 | * We don't need to grab a reference to superblock here. If it has non-empty |
461 | * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed | 476 | * ->s_dirty it's hadn't been killed yet and kill_super() won't proceed |
462 | * past sync_inodes_sb() until both the ->s_dirty and ->s_io lists are | 477 | * past sync_inodes_sb() until the ->s_dirty/s_io/s_more_io lists are all |
463 | * empty. Since __sync_single_inode() regains inode_lock before it finally moves | 478 | * empty. Since __sync_single_inode() regains inode_lock before it finally moves |
464 | * inode from superblock lists we are OK. | 479 | * inode from superblock lists we are OK. |
465 | * | 480 | * |