aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/vmstat.h
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux.com>2010-09-09 19:38:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-09-09 21:57:25 -0400
commitaa45484031ddee09b06350ab8528bfe5b2c76d1c (patch)
tree6758072232db9a54453022ec3e6cede35d52001c /include/linux/vmstat.h
parent72853e2991a2702ae93aaf889ac7db743a415dd3 (diff)
mm: page allocator: calculate a better estimate of NR_FREE_PAGES when memory is low and kswapd is awake
Ordinarily watermark checks are based on the vmstat NR_FREE_PAGES as it is cheaper than scanning a number of lists. To avoid synchronization overhead, counter deltas are maintained on a per-cpu basis and drained both periodically and when the delta is above a threshold. On large CPU systems, the difference between the estimated and real value of NR_FREE_PAGES can be very high. If NR_FREE_PAGES is much higher than number of real free page in buddy, the VM can allocate pages below min watermark, at worst reducing the real number of pages to zero. Even if the OOM killer kills some victim for freeing memory, it may not free memory if the exit path requires a new page resulting in livelock. This patch introduces a zone_page_state_snapshot() function (courtesy of Christoph) that takes a slightly more accurate view of an arbitrary vmstat counter. It is used to read NR_FREE_PAGES while kswapd is awake to avoid the watermark being accidentally broken. The estimate is not perfect and may result in cache line bounces but is expected to be lighter than the IPI calls necessary to continually drain the per-cpu counters while kswapd is awake. Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux/vmstat.h')
-rw-r--r--include/linux/vmstat.h22
1 files changed, 22 insertions, 0 deletions
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 7f43ccdc1d38..eaaea37b3b75 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -170,6 +170,28 @@ static inline unsigned long zone_page_state(struct zone *zone,
170 return x; 170 return x;
171} 171}
172 172
173/*
174 * More accurate version that also considers the currently pending
175 * deltas. For that we need to loop over all cpus to find the current
176 * deltas. There is no synchronization so the result cannot be
177 * exactly accurate either.
178 */
179static inline unsigned long zone_page_state_snapshot(struct zone *zone,
180 enum zone_stat_item item)
181{
182 long x = atomic_long_read(&zone->vm_stat[item]);
183
184#ifdef CONFIG_SMP
185 int cpu;
186 for_each_online_cpu(cpu)
187 x += per_cpu_ptr(zone->pageset, cpu)->vm_stat_diff[item];
188
189 if (x < 0)
190 x = 0;
191#endif
192 return x;
193}
194
173extern unsigned long global_reclaimable_pages(void); 195extern unsigned long global_reclaimable_pages(void);
174extern unsigned long zone_reclaimable_pages(struct zone *zone); 196extern unsigned long zone_reclaimable_pages(struct zone *zone);
175 197