aboutsummaryrefslogtreecommitdiffstats
path: root/mm/page-writeback.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2016-07-28 18:47:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 19:07:41 -0400
commitbca6759258dbef378bcf5b872177bcd2259ceb68 (patch)
tree17b2a1307ab70e4fb6f8f2cf4b535240b2433d5d /mm/page-writeback.c
parente2ecc8a79ed49f7838b4fdf352c4c48cec9424ac (diff)
mm, vmstat: remove zone and node double accounting by approximating retries
The number of LRU pages, dirty pages and writeback pages must be accounted for on both zones and nodes because of the reclaim retry logic, compaction retry logic and highmem calculations all depending on per-zone stats. Many lowmem allocations are immune from OOM kill due to a check in __alloc_pages_may_oom for (ac->high_zoneidx < ZONE_NORMAL) since commit 03668b3ceb0c ("oom: avoid oom killer for lowmem allocations"). The exception is costly high-order allocations or allocations that cannot fail. If the __alloc_pages_may_oom avoids OOM-kill for low-order lowmem allocations then it would fall through to __alloc_pages_direct_compact. This patch will blindly retry reclaim for zone-constrained allocations in should_reclaim_retry up to MAX_RECLAIM_RETRIES. This is not ideal but without per-zone stats there are not many alternatives. The impact it that zone-constrained allocations may delay before considering the OOM killer. As there is no guarantee enough memory can ever be freed to satisfy compaction, this patch avoids retrying compaction for zone-contrained allocations. In combination, that means that the per-node stats can be used when deciding whether to continue reclaim using a rough approximation. While it is possible this will make the wrong decision on occasion, it will not infinite loop as the number of reclaim attempts is capped by MAX_RECLAIM_RETRIES. The final step is calculating the number of dirtyable highmem pages. As those calculations only care about the global count of file pages in highmem. This patch uses a global counter used instead of per-zone stats as it is sufficient. In combination, this allows the per-zone LRU and dirty state counters to be removed. [mgorman@techsingularity.net: fix acct_highmem_file_pages()] Link: http://lkml.kernel.org/r/1468853426-12858-4-git-send-email-mgorman@techsingularity.netLink: http://lkml.kernel.org/r/1467970510-21195-35-git-send-email-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Suggested by: Michal Hocko <mhocko@kernel.org> Acked-by: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Rik van Riel <riel@surriel.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page-writeback.c')
-rw-r--r--mm/page-writeback.c13
1 files changed, 5 insertions, 8 deletions
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 3c02aa603f5a..0bca2376bd42 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -299,6 +299,9 @@ static unsigned long node_dirtyable_memory(struct pglist_data *pgdat)
299 299
300 return nr_pages; 300 return nr_pages;
301} 301}
302#ifdef CONFIG_HIGHMEM
303atomic_t highmem_file_pages;
304#endif
302 305
303static unsigned long highmem_dirtyable_memory(unsigned long total) 306static unsigned long highmem_dirtyable_memory(unsigned long total)
304{ 307{
@@ -306,18 +309,17 @@ static unsigned long highmem_dirtyable_memory(unsigned long total)
306 int node; 309 int node;
307 unsigned long x = 0; 310 unsigned long x = 0;
308 int i; 311 int i;
312 unsigned long dirtyable = atomic_read(&highmem_file_pages);
309 313
310 for_each_node_state(node, N_HIGH_MEMORY) { 314 for_each_node_state(node, N_HIGH_MEMORY) {
311 for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) { 315 for (i = ZONE_NORMAL + 1; i < MAX_NR_ZONES; i++) {
312 struct zone *z; 316 struct zone *z;
313 unsigned long dirtyable;
314 317
315 if (!is_highmem_idx(i)) 318 if (!is_highmem_idx(i))
316 continue; 319 continue;
317 320
318 z = &NODE_DATA(node)->node_zones[i]; 321 z = &NODE_DATA(node)->node_zones[i];
319 dirtyable = zone_page_state(z, NR_FREE_PAGES) + 322 dirtyable += zone_page_state(z, NR_FREE_PAGES);
320 zone_page_state(z, NR_ZONE_LRU_FILE);
321 323
322 /* watch for underflows */ 324 /* watch for underflows */
323 dirtyable -= min(dirtyable, high_wmark_pages(z)); 325 dirtyable -= min(dirtyable, high_wmark_pages(z));
@@ -2460,7 +2462,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
2460 2462
2461 mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY); 2463 mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_DIRTY);
2462 __inc_node_page_state(page, NR_FILE_DIRTY); 2464 __inc_node_page_state(page, NR_FILE_DIRTY);
2463 __inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2464 __inc_node_page_state(page, NR_DIRTIED); 2465 __inc_node_page_state(page, NR_DIRTIED);
2465 __inc_wb_stat(wb, WB_RECLAIMABLE); 2466 __inc_wb_stat(wb, WB_RECLAIMABLE);
2466 __inc_wb_stat(wb, WB_DIRTIED); 2467 __inc_wb_stat(wb, WB_DIRTIED);
@@ -2482,7 +2483,6 @@ void account_page_cleaned(struct page *page, struct address_space *mapping,
2482 if (mapping_cap_account_dirty(mapping)) { 2483 if (mapping_cap_account_dirty(mapping)) {
2483 mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY); 2484 mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
2484 dec_node_page_state(page, NR_FILE_DIRTY); 2485 dec_node_page_state(page, NR_FILE_DIRTY);
2485 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2486 dec_wb_stat(wb, WB_RECLAIMABLE); 2486 dec_wb_stat(wb, WB_RECLAIMABLE);
2487 task_io_account_cancelled_write(PAGE_SIZE); 2487 task_io_account_cancelled_write(PAGE_SIZE);
2488 } 2488 }
@@ -2739,7 +2739,6 @@ int clear_page_dirty_for_io(struct page *page)
2739 if (TestClearPageDirty(page)) { 2739 if (TestClearPageDirty(page)) {
2740 mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY); 2740 mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_DIRTY);
2741 dec_node_page_state(page, NR_FILE_DIRTY); 2741 dec_node_page_state(page, NR_FILE_DIRTY);
2742 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2743 dec_wb_stat(wb, WB_RECLAIMABLE); 2742 dec_wb_stat(wb, WB_RECLAIMABLE);
2744 ret = 1; 2743 ret = 1;
2745 } 2744 }
@@ -2786,7 +2785,6 @@ int test_clear_page_writeback(struct page *page)
2786 if (ret) { 2785 if (ret) {
2787 mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); 2786 mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
2788 dec_node_page_state(page, NR_WRITEBACK); 2787 dec_node_page_state(page, NR_WRITEBACK);
2789 dec_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2790 inc_node_page_state(page, NR_WRITTEN); 2788 inc_node_page_state(page, NR_WRITTEN);
2791 } 2789 }
2792 unlock_page_memcg(page); 2790 unlock_page_memcg(page);
@@ -2841,7 +2839,6 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
2841 if (!ret) { 2839 if (!ret) {
2842 mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); 2840 mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
2843 inc_node_page_state(page, NR_WRITEBACK); 2841 inc_node_page_state(page, NR_WRITEBACK);
2844 inc_zone_page_state(page, NR_ZONE_WRITE_PENDING);
2845 } 2842 }
2846 unlock_page_memcg(page); 2843 unlock_page_memcg(page);
2847 return ret; 2844 return ret;