aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorWu Fengguang <fengguang.wu@intel.com>2011-03-02 16:54:09 -0500
committerWu Fengguang <fengguang.wu@intel.com>2011-07-10 01:09:02 -0400
commitc42843f2f0bbc9d716a32caf667d18fc2bf3bc4c (patch)
tree835b801d215dd70cbb5a282232ce23fa3167a880 /mm
parent7762741e3af69720186802e945229b6a5afd5c49 (diff)
writeback: introduce smoothed global dirty limit
The start of a heavy weight application (ie. KVM) may instantly knock down determine_dirtyable_memory() if the swap is not enabled or full. global_dirty_limits() and bdi_dirty_limit() will in turn get global/bdi dirty thresholds that are _much_ lower than the global/bdi dirty pages. balance_dirty_pages() will then heavily throttle all dirtiers including the light ones, until the dirty pages drop below the new dirty thresholds. During this _deep_ dirty-exceeded state, the system may appear rather unresponsive to the users. About "deep" dirty-exceeded: task_dirty_limit() assigns 1/8 lower dirty threshold to heavy dirtiers than light ones, and the dirty pages will be throttled around the heavy dirtiers' dirty threshold and reasonably below the light dirtiers' dirty threshold. In this state, only the heavy dirtiers will be throttled and the dirty pages are carefully controlled to not exceed the light dirtiers' dirty threshold. However if the threshold itself suddenly drops below the number of dirty pages, the light dirtiers will get heavily throttled. So introduce global_dirty_limit for tracking the global dirty threshold with policies - follow downwards slowly - follow up in one shot global_dirty_limit can effectively mask out the impact of sudden drop of dirtyable memory. It will be used in the next patch for two new type of dirty limits. Note that the new dirty limits are not going to avoid throttling the light dirtiers, but could limit their sleep time to 200ms. Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Diffstat (limited to 'mm')
-rw-r--r--mm/page-writeback.c74
1 files changed, 72 insertions, 2 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5f3e1b46ace5..da959952b9f5 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -116,6 +116,7 @@ EXPORT_SYMBOL(laptop_mode);
116 116
117/* End of sysctl-exported parameters */ 117/* End of sysctl-exported parameters */
118 118
119unsigned long global_dirty_limit;
119 120
120/* 121/*
121 * Scale the writeback cache size proportional to the relative writeout speeds. 122 * Scale the writeback cache size proportional to the relative writeout speeds.
@@ -516,7 +517,67 @@ out:
516 bdi->avg_write_bandwidth = avg; 517 bdi->avg_write_bandwidth = avg;
517} 518}
518 519
520/*
521 * The global dirtyable memory and dirty threshold could be suddenly knocked
522 * down by a large amount (eg. on the startup of KVM in a swapless system).
523 * This may throw the system into deep dirty exceeded state and throttle
524 * heavy/light dirtiers alike. To retain good responsiveness, maintain
525 * global_dirty_limit for tracking slowly down to the knocked down dirty
526 * threshold.
527 */
528static void update_dirty_limit(unsigned long thresh, unsigned long dirty)
529{
530 unsigned long limit = global_dirty_limit;
531
532 /*
533 * Follow up in one step.
534 */
535 if (limit < thresh) {
536 limit = thresh;
537 goto update;
538 }
539
540 /*
541 * Follow down slowly. Use the higher one as the target, because thresh
542 * may drop below dirty. This is exactly the reason to introduce
543 * global_dirty_limit which is guaranteed to lie above the dirty pages.
544 */
545 thresh = max(thresh, dirty);
546 if (limit > thresh) {
547 limit -= (limit - thresh) >> 5;
548 goto update;
549 }
550 return;
551update:
552 global_dirty_limit = limit;
553}
554
555static void global_update_bandwidth(unsigned long thresh,
556 unsigned long dirty,
557 unsigned long now)
558{
559 static DEFINE_SPINLOCK(dirty_lock);
560 static unsigned long update_time;
561
562 /*
563 * check locklessly first to optimize away locking for the most time
564 */
565 if (time_before(now, update_time + BANDWIDTH_INTERVAL))
566 return;
567
568 spin_lock(&dirty_lock);
569 if (time_after_eq(now, update_time + BANDWIDTH_INTERVAL)) {
570 update_dirty_limit(thresh, dirty);
571 update_time = now;
572 }
573 spin_unlock(&dirty_lock);
574}
575
519void __bdi_update_bandwidth(struct backing_dev_info *bdi, 576void __bdi_update_bandwidth(struct backing_dev_info *bdi,
577 unsigned long thresh,
578 unsigned long dirty,
579 unsigned long bdi_thresh,
580 unsigned long bdi_dirty,
520 unsigned long start_time) 581 unsigned long start_time)
521{ 582{
522 unsigned long now = jiffies; 583 unsigned long now = jiffies;
@@ -538,6 +599,9 @@ void __bdi_update_bandwidth(struct backing_dev_info *bdi,
538 if (elapsed > HZ && time_before(bdi->bw_time_stamp, start_time)) 599 if (elapsed > HZ && time_before(bdi->bw_time_stamp, start_time))
539 goto snapshot; 600 goto snapshot;
540 601
602 if (thresh)
603 global_update_bandwidth(thresh, dirty, now);
604
541 bdi_update_write_bandwidth(bdi, elapsed, written); 605 bdi_update_write_bandwidth(bdi, elapsed, written);
542 606
543snapshot: 607snapshot:
@@ -546,12 +610,17 @@ snapshot:
546} 610}
547 611
548static void bdi_update_bandwidth(struct backing_dev_info *bdi, 612static void bdi_update_bandwidth(struct backing_dev_info *bdi,
613 unsigned long thresh,
614 unsigned long dirty,
615 unsigned long bdi_thresh,
616 unsigned long bdi_dirty,
549 unsigned long start_time) 617 unsigned long start_time)
550{ 618{
551 if (time_is_after_eq_jiffies(bdi->bw_time_stamp + BANDWIDTH_INTERVAL)) 619 if (time_is_after_eq_jiffies(bdi->bw_time_stamp + BANDWIDTH_INTERVAL))
552 return; 620 return;
553 spin_lock(&bdi->wb.list_lock); 621 spin_lock(&bdi->wb.list_lock);
554 __bdi_update_bandwidth(bdi, start_time); 622 __bdi_update_bandwidth(bdi, thresh, dirty, bdi_thresh, bdi_dirty,
623 start_time);
555 spin_unlock(&bdi->wb.list_lock); 624 spin_unlock(&bdi->wb.list_lock);
556} 625}
557 626
@@ -630,7 +699,8 @@ static void balance_dirty_pages(struct address_space *mapping,
630 if (!bdi->dirty_exceeded) 699 if (!bdi->dirty_exceeded)
631 bdi->dirty_exceeded = 1; 700 bdi->dirty_exceeded = 1;
632 701
633 bdi_update_bandwidth(bdi, start_time); 702 bdi_update_bandwidth(bdi, dirty_thresh, nr_dirty,
703 bdi_thresh, bdi_dirty, start_time);
634 704
635 /* Note: nr_reclaimable denotes nr_dirty + nr_unstable. 705 /* Note: nr_reclaimable denotes nr_dirty + nr_unstable.
636 * Unstable writes are a feature of certain networked 706 * Unstable writes are a feature of certain networked