aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorWu Fengguang <fengguang.wu@intel.com>2011-06-11 21:21:43 -0400
committerWu Fengguang <fengguang.wu@intel.com>2011-10-03 09:08:57 -0400
commitc8462cc9de9e92264ec647903772f6036a99b286 (patch)
treef442132f53651a04e67f3a119ead9f54be51a6cb /mm
parent143dfe8611a63030ce0c79419dc362f7838be557 (diff)
writeback: limit max dirty pause time
Apply two policies to scale down the max pause time for 1) small number of concurrent dirtiers 2) small memory system (comparing to storage bandwidth) MAX_PAUSE=200ms may only be suitable for high end servers with lots of concurrent dirtiers, where the large pause time can reduce much overheads. Otherwise, smaller pause time is desirable whenever possible, so as to get good responsiveness and smooth user experiences. It's actually required for good disk utilization in the case when all the dirty pages can be synced to disk within MAX_PAUSE=200ms. Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Diffstat (limited to 'mm')
-rw-r--r--mm/page-writeback.c44
1 files changed, 42 insertions, 2 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index f32f25092c66..cc351e6f9ed9 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -939,6 +939,43 @@ static unsigned long dirty_poll_interval(unsigned long dirty,
939 return 1; 939 return 1;
940} 940}
941 941
942static unsigned long bdi_max_pause(struct backing_dev_info *bdi,
943 unsigned long bdi_dirty)
944{
945 unsigned long bw = bdi->avg_write_bandwidth;
946 unsigned long hi = ilog2(bw);
947 unsigned long lo = ilog2(bdi->dirty_ratelimit);
948 unsigned long t;
949
950 /* target for 20ms max pause on 1-dd case */
951 t = HZ / 50;
952
953 /*
954 * Scale up pause time for concurrent dirtiers in order to reduce CPU
955 * overheads.
956 *
957 * (N * 20ms) on 2^N concurrent tasks.
958 */
959 if (hi > lo)
960 t += (hi - lo) * (20 * HZ) / 1024;
961
962 /*
963 * Limit pause time for small memory systems. If sleeping for too long
964 * time, a small pool of dirty/writeback pages may go empty and disk go
965 * idle.
966 *
967 * 8 serves as the safety ratio.
968 */
969 if (bdi_dirty)
970 t = min(t, bdi_dirty * HZ / (8 * bw + 1));
971
972 /*
973 * The pause time will be settled within range (max_pause/4, max_pause).
974 * Apply a minimal value of 4 to get a non-zero max_pause/4.
975 */
976 return clamp_val(t, 4, MAX_PAUSE);
977}
978
942/* 979/*
943 * balance_dirty_pages() must be called by processes which are generating dirty 980 * balance_dirty_pages() must be called by processes which are generating dirty
944 * data. It looks at the number of dirty pages in the machine and will force 981 * data. It looks at the number of dirty pages in the machine and will force
@@ -958,6 +995,7 @@ static void balance_dirty_pages(struct address_space *mapping,
958 unsigned long dirty_thresh; 995 unsigned long dirty_thresh;
959 unsigned long bdi_thresh; 996 unsigned long bdi_thresh;
960 long pause = 0; 997 long pause = 0;
998 long max_pause;
961 bool dirty_exceeded = false; 999 bool dirty_exceeded = false;
962 unsigned long task_ratelimit; 1000 unsigned long task_ratelimit;
963 unsigned long dirty_ratelimit; 1001 unsigned long dirty_ratelimit;
@@ -1035,18 +1073,20 @@ static void balance_dirty_pages(struct address_space *mapping,
1035 nr_dirty, bdi_thresh, bdi_dirty, 1073 nr_dirty, bdi_thresh, bdi_dirty,
1036 start_time); 1074 start_time);
1037 1075
1076 max_pause = bdi_max_pause(bdi, bdi_dirty);
1077
1038 dirty_ratelimit = bdi->dirty_ratelimit; 1078 dirty_ratelimit = bdi->dirty_ratelimit;
1039 pos_ratio = bdi_position_ratio(bdi, dirty_thresh, 1079 pos_ratio = bdi_position_ratio(bdi, dirty_thresh,
1040 background_thresh, nr_dirty, 1080 background_thresh, nr_dirty,
1041 bdi_thresh, bdi_dirty); 1081 bdi_thresh, bdi_dirty);
1042 if (unlikely(pos_ratio == 0)) { 1082 if (unlikely(pos_ratio == 0)) {
1043 pause = MAX_PAUSE; 1083 pause = max_pause;
1044 goto pause; 1084 goto pause;
1045 } 1085 }
1046 task_ratelimit = (u64)dirty_ratelimit * 1086 task_ratelimit = (u64)dirty_ratelimit *
1047 pos_ratio >> RATELIMIT_CALC_SHIFT; 1087 pos_ratio >> RATELIMIT_CALC_SHIFT;
1048 pause = (HZ * pages_dirtied) / (task_ratelimit | 1); 1088 pause = (HZ * pages_dirtied) / (task_ratelimit | 1);
1049 pause = min_t(long, pause, MAX_PAUSE); 1089 pause = min(pause, max_pause);
1050 1090
1051pause: 1091pause:
1052 __set_current_state(TASK_UNINTERRUPTIBLE); 1092 __set_current_state(TASK_UNINTERRUPTIBLE);