aboutsummaryrefslogtreecommitdiffstats
path: root/mm/vmstat.c
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux.com>2010-09-09 19:38:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-09-09 21:57:25 -0400
commitaa45484031ddee09b06350ab8528bfe5b2c76d1c (patch)
tree6758072232db9a54453022ec3e6cede35d52001c /mm/vmstat.c
parent72853e2991a2702ae93aaf889ac7db743a415dd3 (diff)
mm: page allocator: calculate a better estimate of NR_FREE_PAGES when memory is low and kswapd is awake
Ordinarily watermark checks are based on the vmstat NR_FREE_PAGES as it is cheaper than scanning a number of lists. To avoid synchronization overhead, counter deltas are maintained on a per-cpu basis and drained both periodically and when the delta is above a threshold. On large CPU systems, the difference between the estimated and real value of NR_FREE_PAGES can be very high. If NR_FREE_PAGES is much higher than number of real free page in buddy, the VM can allocate pages below min watermark, at worst reducing the real number of pages to zero. Even if the OOM killer kills some victim for freeing memory, it may not free memory if the exit path requires a new page resulting in livelock. This patch introduces a zone_page_state_snapshot() function (courtesy of Christoph) that takes a slightly more accurate view of an arbitrary vmstat counter. It is used to read NR_FREE_PAGES while kswapd is awake to avoid the watermark being accidentally broken. The estimate is not perfect and may result in cache line bounces but is expected to be lighter than the IPI calls necessary to continually drain the per-cpu counters while kswapd is awake. Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmstat.c')
-rw-r--r--mm/vmstat.c15
1 files changed, 14 insertions, 1 deletions
diff --git a/mm/vmstat.c b/mm/vmstat.c
index a8d6b59e609a..355a9e669aaa 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -138,11 +138,24 @@ static void refresh_zone_stat_thresholds(void)
138 int threshold; 138 int threshold;
139 139
140 for_each_populated_zone(zone) { 140 for_each_populated_zone(zone) {
141 unsigned long max_drift, tolerate_drift;
142
141 threshold = calculate_threshold(zone); 143 threshold = calculate_threshold(zone);
142 144
143 for_each_online_cpu(cpu) 145 for_each_online_cpu(cpu)
144 per_cpu_ptr(zone->pageset, cpu)->stat_threshold 146 per_cpu_ptr(zone->pageset, cpu)->stat_threshold
145 = threshold; 147 = threshold;
148
149 /*
150 * Only set percpu_drift_mark if there is a danger that
151 * NR_FREE_PAGES reports the low watermark is ok when in fact
152 * the min watermark could be breached by an allocation
153 */
154 tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone);
155 max_drift = num_online_cpus() * threshold;
156 if (max_drift > tolerate_drift)
157 zone->percpu_drift_mark = high_wmark_pages(zone) +
158 max_drift;
146 } 159 }
147} 160}
148 161
@@ -813,7 +826,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
813 "\n scanned %lu" 826 "\n scanned %lu"
814 "\n spanned %lu" 827 "\n spanned %lu"
815 "\n present %lu", 828 "\n present %lu",
816 zone_page_state(zone, NR_FREE_PAGES), 829 zone_nr_free_pages(zone),
817 min_wmark_pages(zone), 830 min_wmark_pages(zone),
818 low_wmark_pages(zone), 831 low_wmark_pages(zone),
819 high_wmark_pages(zone), 832 high_wmark_pages(zone),