aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--include/linux/writeback.h6
-rw-r--r--mm/page-writeback.c74
3 files changed, 79 insertions, 3 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 5826992910e9..227ff12257f3 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -699,7 +699,7 @@ static inline bool over_bground_thresh(void)
699static void wb_update_bandwidth(struct bdi_writeback *wb, 699static void wb_update_bandwidth(struct bdi_writeback *wb,
700 unsigned long start_time) 700 unsigned long start_time)
701{ 701{
702 __bdi_update_bandwidth(wb->bdi, start_time); 702 __bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, start_time);
703} 703}
704 704
705/* 705/*
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 66862f2d90c8..e9d371b6053b 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -84,6 +84,8 @@ static inline void laptop_sync_completion(void) { }
84#endif 84#endif
85void throttle_vm_writeout(gfp_t gfp_mask); 85void throttle_vm_writeout(gfp_t gfp_mask);
86 86
87extern unsigned long global_dirty_limit;
88
87/* These are exported to sysctl. */ 89/* These are exported to sysctl. */
88extern int dirty_background_ratio; 90extern int dirty_background_ratio;
89extern unsigned long dirty_background_bytes; 91extern unsigned long dirty_background_bytes;
@@ -119,6 +121,10 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi,
119 unsigned long dirty); 121 unsigned long dirty);
120 122
121void __bdi_update_bandwidth(struct backing_dev_info *bdi, 123void __bdi_update_bandwidth(struct backing_dev_info *bdi,
124 unsigned long thresh,
125 unsigned long dirty,
126 unsigned long bdi_thresh,
127 unsigned long bdi_dirty,
122 unsigned long start_time); 128 unsigned long start_time);
123 129
124void page_writeback_init(void); 130void page_writeback_init(void);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 5f3e1b46ace5..da959952b9f5 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -116,6 +116,7 @@ EXPORT_SYMBOL(laptop_mode);
116 116
117/* End of sysctl-exported parameters */ 117/* End of sysctl-exported parameters */
118 118
119unsigned long global_dirty_limit;
119 120
120/* 121/*
121 * Scale the writeback cache size proportional to the relative writeout speeds. 122 * Scale the writeback cache size proportional to the relative writeout speeds.
@@ -516,7 +517,67 @@ out:
516 bdi->avg_write_bandwidth = avg; 517 bdi->avg_write_bandwidth = avg;
517} 518}
518 519
520/*
521 * The global dirtyable memory and dirty threshold could be suddenly knocked
522 * down by a large amount (eg. on the startup of KVM in a swapless system).
523 * This may throw the system into deep dirty exceeded state and throttle
524 * heavy/light dirtiers alike. To retain good responsiveness, maintain
525 * global_dirty_limit for tracking slowly down to the knocked down dirty
526 * threshold.
527 */
528static void update_dirty_limit(unsigned long thresh, unsigned long dirty)
529{
530 unsigned long limit = global_dirty_limit;
531
532 /*
533 * Follow up in one step.
534 */
535 if (limit < thresh) {
536 limit = thresh;
537 goto update;
538 }
539
540 /*
541 * Follow down slowly. Use the higher one as the target, because thresh
542 * may drop below dirty. This is exactly the reason to introduce
543 * global_dirty_limit which is guaranteed to lie above the dirty pages.
544 */
545 thresh = max(thresh, dirty);
546 if (limit > thresh) {
547 limit -= (limit - thresh) >> 5;
548 goto update;
549 }
550 return;
551update:
552 global_dirty_limit = limit;
553}
554
555static void global_update_bandwidth(unsigned long thresh,
556 unsigned long dirty,
557 unsigned long now)
558{
559 static DEFINE_SPINLOCK(dirty_lock);
560 static unsigned long update_time;
561
562 /*
563 * check locklessly first to optimize away locking for the most time
564 */
565 if (time_before(now, update_time + BANDWIDTH_INTERVAL))
566 return;
567
568 spin_lock(&dirty_lock);
569 if (time_after_eq(now, update_time + BANDWIDTH_INTERVAL)) {
570 update_dirty_limit(thresh, dirty);
571 update_time = now;
572 }
573 spin_unlock(&dirty_lock);
574}
575
519void __bdi_update_bandwidth(struct backing_dev_info *bdi, 576void __bdi_update_bandwidth(struct backing_dev_info *bdi,
577 unsigned long thresh,
578 unsigned long dirty,
579 unsigned long bdi_thresh,
580 unsigned long bdi_dirty,
520 unsigned long start_time) 581 unsigned long start_time)
521{ 582{
522 unsigned long now = jiffies; 583 unsigned long now = jiffies;
@@ -538,6 +599,9 @@ void __bdi_update_bandwidth(struct backing_dev_info *bdi,
538 if (elapsed > HZ && time_before(bdi->bw_time_stamp, start_time)) 599 if (elapsed > HZ && time_before(bdi->bw_time_stamp, start_time))
539 goto snapshot; 600 goto snapshot;
540 601
602 if (thresh)
603 global_update_bandwidth(thresh, dirty, now);
604
541 bdi_update_write_bandwidth(bdi, elapsed, written); 605 bdi_update_write_bandwidth(bdi, elapsed, written);
542 606
543snapshot: 607snapshot:
@@ -546,12 +610,17 @@ snapshot:
546} 610}
547 611
548static void bdi_update_bandwidth(struct backing_dev_info *bdi, 612static void bdi_update_bandwidth(struct backing_dev_info *bdi,
613 unsigned long thresh,
614 unsigned long dirty,
615 unsigned long bdi_thresh,
616 unsigned long bdi_dirty,
549 unsigned long start_time) 617 unsigned long start_time)
550{ 618{
551 if (time_is_after_eq_jiffies(bdi->bw_time_stamp + BANDWIDTH_INTERVAL)) 619 if (time_is_after_eq_jiffies(bdi->bw_time_stamp + BANDWIDTH_INTERVAL))
552 return; 620 return;
553 spin_lock(&bdi->wb.list_lock); 621 spin_lock(&bdi->wb.list_lock);
554 __bdi_update_bandwidth(bdi, start_time); 622 __bdi_update_bandwidth(bdi, thresh, dirty, bdi_thresh, bdi_dirty,
623 start_time);
555 spin_unlock(&bdi->wb.list_lock); 624 spin_unlock(&bdi->wb.list_lock);
556} 625}
557 626
@@ -630,7 +699,8 @@ static void balance_dirty_pages(struct address_space *mapping,
630 if (!bdi->dirty_exceeded) 699 if (!bdi->dirty_exceeded)
631 bdi->dirty_exceeded = 1; 700 bdi->dirty_exceeded = 1;
632 701
633 bdi_update_bandwidth(bdi, start_time); 702 bdi_update_bandwidth(bdi, dirty_thresh, nr_dirty,
703 bdi_thresh, bdi_dirty, start_time);
634 704
635 /* Note: nr_reclaimable denotes nr_dirty + nr_unstable. 705 /* Note: nr_reclaimable denotes nr_dirty + nr_unstable.
636 * Unstable writes are a feature of certain networked 706 * Unstable writes are a feature of certain networked