aboutsummaryrefslogtreecommitdiffstats
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
authorWu Fengguang <fengguang.wu@intel.com>2010-08-29 15:28:09 -0400
committerWu Fengguang <fengguang.wu@intel.com>2011-07-10 01:09:03 -0400
commit1a12d8bd7b2998be01ee55edb64e7473728abb9c (patch)
treed3466ea7eaa308e3d40a43e372bd4baaf461d7bb /fs/fs-writeback.c
parente1cbe236013c82bcf9a156e98d7b47efb89d2674 (diff)
writeback: scale IO chunk size up to half device bandwidth
Originally, MAX_WRITEBACK_PAGES was hard-coded to 1024 because of a concern of not holding I_SYNC for too long. (At least, that was the comment previously.) This doesn't make sense now because the only time we wait for I_SYNC is if we are calling sync or fsync, and in that case we need to write out all of the data anyway. Previously there may have been other code paths that waited on I_SYNC, but not any more. -- Theodore Ts'o So remove the MAX_WRITEBACK_PAGES constraint. The writeback pages will adapt to as large as the storage device can write within 500ms. XFS is observed to do IO completions in a batch, and the batch size is equal to the write chunk size. To avoid dirty pages to suddenly drop out of balance_dirty_pages()'s dirty control scope and create large fluctuations, the chunk size is also limited to half the control scope. The balance_dirty_pages() control scrope is [(background_thresh + dirty_thresh) / 2, dirty_thresh] which is by default [15%, 20%] of global dirty pages, whose range size is dirty_thresh / DIRTY_FULL_SCOPE. The adpative write chunk size will be rounded to the nearest 4MB boundary. http://bugzilla.kernel.org/show_bug.cgi?id=13930 CC: Theodore Ts'o <tytso@mit.edu> CC: Dave Chinner <david@fromorbit.com> CC: Chris Mason <chris.mason@oracle.com> CC: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c23
1 files changed, 10 insertions, 13 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 227ff12257f3..50445cf0b83a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -30,15 +30,6 @@
30#include "internal.h" 30#include "internal.h"
31 31
32/* 32/*
33 * The maximum number of pages to writeout in a single bdi flush/kupdate
34 * operation. We do this so we don't hold I_SYNC against an inode for
35 * enormous amounts of time, which would block a userspace task which has
36 * been forced to throttle against that inode. Also, the code reevaluates
37 * the dirty each time it has written this many pages.
38 */
39#define MAX_WRITEBACK_PAGES 1024L
40
41/*
42 * Passed into wb_writeback(), essentially a subset of writeback_control 33 * Passed into wb_writeback(), essentially a subset of writeback_control
43 */ 34 */
44struct wb_writeback_work { 35struct wb_writeback_work {
@@ -515,7 +506,8 @@ static bool pin_sb_for_writeback(struct super_block *sb)
515 return false; 506 return false;
516} 507}
517 508
518static long writeback_chunk_size(struct wb_writeback_work *work) 509static long writeback_chunk_size(struct backing_dev_info *bdi,
510 struct wb_writeback_work *work)
519{ 511{
520 long pages; 512 long pages;
521 513
@@ -534,8 +526,13 @@ static long writeback_chunk_size(struct wb_writeback_work *work)
534 */ 526 */
535 if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages) 527 if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
536 pages = LONG_MAX; 528 pages = LONG_MAX;
537 else 529 else {
538 pages = min(MAX_WRITEBACK_PAGES, work->nr_pages); 530 pages = min(bdi->avg_write_bandwidth / 2,
531 global_dirty_limit / DIRTY_SCOPE);
532 pages = min(pages, work->nr_pages);
533 pages = round_down(pages + MIN_WRITEBACK_PAGES,
534 MIN_WRITEBACK_PAGES);
535 }
539 536
540 return pages; 537 return pages;
541} 538}
@@ -600,7 +597,7 @@ static long writeback_sb_inodes(struct super_block *sb,
600 continue; 597 continue;
601 } 598 }
602 __iget(inode); 599 __iget(inode);
603 write_chunk = writeback_chunk_size(work); 600 write_chunk = writeback_chunk_size(wb->bdi, work);
604 wbc.nr_to_write = write_chunk; 601 wbc.nr_to_write = write_chunk;
605 wbc.pages_skipped = 0; 602 wbc.pages_skipped = 0;
606 603