summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2016-03-17 17:19:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-17 18:09:34 -0400
commit795ae7a0de6b834a0cc202aa55c190ef81496665 (patch)
treef76ff0f0a99242aee5048cc018c33f01fe299a1c /mm/page_alloc.c
parent3ed3a4f0ddffece942bb2661924d87be4ce63cb7 (diff)
mm: scale kswapd watermarks in proportion to memory
In machines with 140G of memory and enterprise flash storage, we have seen read and write bursts routinely exceed the kswapd watermarks and cause thundering herds in direct reclaim. Unfortunately, the only way to tune kswapd aggressiveness is through adjusting min_free_kbytes - the system's emergency reserves - which is entirely unrelated to the system's latency requirements. In order to get kswapd to maintain a 250M buffer of free memory, the emergency reserves need to be set to 1G. That is a lot of memory wasted for no good reason. On the other hand, it's reasonable to assume that allocation bursts and overall allocation concurrency scale with memory capacity, so it makes sense to make kswapd aggressiveness a function of that as well. Change the kswapd watermark scale factor from the currently fixed 25% of the tunable emergency reserve to a tunable 0.1% of memory. Beyond 1G of memory, this will produce bigger watermark steps than the current formula in default settings. Ensure that the new formula never chooses steps smaller than that, i.e. 25% of the emergency reserve. On a 140G machine, this raises the default watermark steps - the distance between min and low, and low and high - from 16M to 143M. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Mel Gorman <mgorman@suse.de> Acked-by: Rik van Riel <riel@redhat.com> Acked-by: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c29
1 files changed, 27 insertions, 2 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 941b802e11ec..d156310aedeb 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -249,6 +249,7 @@ compound_page_dtor * const compound_page_dtors[] = {
249 249
250int min_free_kbytes = 1024; 250int min_free_kbytes = 1024;
251int user_min_free_kbytes = -1; 251int user_min_free_kbytes = -1;
252int watermark_scale_factor = 10;
252 253
253static unsigned long __meminitdata nr_kernel_pages; 254static unsigned long __meminitdata nr_kernel_pages;
254static unsigned long __meminitdata nr_all_pages; 255static unsigned long __meminitdata nr_all_pages;
@@ -6347,8 +6348,17 @@ static void __setup_per_zone_wmarks(void)
6347 zone->watermark[WMARK_MIN] = tmp; 6348 zone->watermark[WMARK_MIN] = tmp;
6348 } 6349 }
6349 6350
6350 zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + (tmp >> 2); 6351 /*
6351 zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1); 6352 * Set the kswapd watermarks distance according to the
6353 * scale factor in proportion to available memory, but
6354 * ensure a minimum size on small systems.
6355 */
6356 tmp = max_t(u64, tmp >> 2,
6357 mult_frac(zone->managed_pages,
6358 watermark_scale_factor, 10000));
6359
6360 zone->watermark[WMARK_LOW] = min_wmark_pages(zone) + tmp;
6361 zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + tmp * 2;
6352 6362
6353 __mod_zone_page_state(zone, NR_ALLOC_BATCH, 6363 __mod_zone_page_state(zone, NR_ALLOC_BATCH,
6354 high_wmark_pages(zone) - low_wmark_pages(zone) - 6364 high_wmark_pages(zone) - low_wmark_pages(zone) -
@@ -6489,6 +6499,21 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
6489 return 0; 6499 return 0;
6490} 6500}
6491 6501
6502int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
6503 void __user *buffer, size_t *length, loff_t *ppos)
6504{
6505 int rc;
6506
6507 rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
6508 if (rc)
6509 return rc;
6510
6511 if (write)
6512 setup_per_zone_wmarks();
6513
6514 return 0;
6515}
6516
6492#ifdef CONFIG_NUMA 6517#ifdef CONFIG_NUMA
6493int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write, 6518int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
6494 void __user *buffer, size_t *length, loff_t *ppos) 6519 void __user *buffer, size_t *length, loff_t *ppos)