summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2016-07-28 18:45:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 19:07:41 -0400
commit599d0c954f91d0689c9bb421b5bc04ea02437a41 (patch)
treee863ce685841e494bcb63e458739e0939ac684f6 /mm/page_alloc.c
parenta52633d8e9c35832f1409dc5fa166019048a3f1f (diff)
mm, vmscan: move LRU lists to node
This moves the LRU lists from the zone to the node and related data such as counters, tracing, congestion tracking and writeback tracking. Unfortunately, due to reclaim and compaction retry logic, it is necessary to account for the number of LRU pages on both zone and node logic. Most reclaim logic is based on the node counters but the retry logic uses the zone counters which do not distinguish inactive and active sizes. It would be possible to leave the LRU counters on a per-zone basis but it's a heavier calculation across multiple cache lines that is much more frequent than the retry checks. Other than the LRU counters, this is mostly a mechanical patch but note that it introduces a number of anomalies. For example, the scans are per-zone but using per-node counters. We also mark a node as congested when a zone is congested. This causes weird problems that are fixed later but is easier to review. In the event that there is excessive overhead on 32-bit systems due to the nodes being on LRU then there are two potential solutions 1. Long-term isolation of highmem pages when reclaim is lowmem When pages are skipped, they are immediately added back onto the LRU list. If lowmem reclaim persisted for long periods of time, the same highmem pages get continually scanned. The idea would be that lowmem keeps those pages on a separate list until a reclaim for highmem pages arrives that splices the highmem pages back onto the LRU. It potentially could be implemented similar to the UNEVICTABLE list. That would reduce the skip rate with the potential corner case is that highmem pages have to be scanned and reclaimed to free lowmem slab pages. 2. Linear scan lowmem pages if the initial LRU shrink fails This will break LRU ordering but may be preferable and faster during memory pressure than skipping LRU pages. Link: http://lkml.kernel.org/r/1467970510-21195-4-git-send-email-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Rik van Riel <riel@surriel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c68
1 files changed, 37 insertions, 31 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5760c626c309..35e2d0f9d44f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1078,9 +1078,9 @@ static void free_pcppages_bulk(struct zone *zone, int count,
1078 1078
1079 spin_lock(&zone->lock); 1079 spin_lock(&zone->lock);
1080 isolated_pageblocks = has_isolate_pageblock(zone); 1080 isolated_pageblocks = has_isolate_pageblock(zone);
1081 nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED); 1081 nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
1082 if (nr_scanned) 1082 if (nr_scanned)
1083 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); 1083 __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
1084 1084
1085 while (count) { 1085 while (count) {
1086 struct page *page; 1086 struct page *page;
@@ -1135,9 +1135,9 @@ static void free_one_page(struct zone *zone,
1135{ 1135{
1136 unsigned long nr_scanned; 1136 unsigned long nr_scanned;
1137 spin_lock(&zone->lock); 1137 spin_lock(&zone->lock);
1138 nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED); 1138 nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
1139 if (nr_scanned) 1139 if (nr_scanned)
1140 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); 1140 __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
1141 1141
1142 if (unlikely(has_isolate_pageblock(zone) || 1142 if (unlikely(has_isolate_pageblock(zone) ||
1143 is_migrate_isolate(migratetype))) { 1143 is_migrate_isolate(migratetype))) {
@@ -4288,6 +4288,7 @@ void show_free_areas(unsigned int filter)
4288 unsigned long free_pcp = 0; 4288 unsigned long free_pcp = 0;
4289 int cpu; 4289 int cpu;
4290 struct zone *zone; 4290 struct zone *zone;
4291 pg_data_t *pgdat;
4291 4292
4292 for_each_populated_zone(zone) { 4293 for_each_populated_zone(zone) {
4293 if (skip_free_areas_node(filter, zone_to_nid(zone))) 4294 if (skip_free_areas_node(filter, zone_to_nid(zone)))
@@ -4306,13 +4307,13 @@ void show_free_areas(unsigned int filter)
4306 " anon_thp: %lu shmem_thp: %lu shmem_pmdmapped: %lu\n" 4307 " anon_thp: %lu shmem_thp: %lu shmem_pmdmapped: %lu\n"
4307#endif 4308#endif
4308 " free:%lu free_pcp:%lu free_cma:%lu\n", 4309 " free:%lu free_pcp:%lu free_cma:%lu\n",
4309 global_page_state(NR_ACTIVE_ANON), 4310 global_node_page_state(NR_ACTIVE_ANON),
4310 global_page_state(NR_INACTIVE_ANON), 4311 global_node_page_state(NR_INACTIVE_ANON),
4311 global_page_state(NR_ISOLATED_ANON), 4312 global_node_page_state(NR_ISOLATED_ANON),
4312 global_page_state(NR_ACTIVE_FILE), 4313 global_node_page_state(NR_ACTIVE_FILE),
4313 global_page_state(NR_INACTIVE_FILE), 4314 global_node_page_state(NR_INACTIVE_FILE),
4314 global_page_state(NR_ISOLATED_FILE), 4315 global_node_page_state(NR_ISOLATED_FILE),
4315 global_page_state(NR_UNEVICTABLE), 4316 global_node_page_state(NR_UNEVICTABLE),
4316 global_page_state(NR_FILE_DIRTY), 4317 global_page_state(NR_FILE_DIRTY),
4317 global_page_state(NR_WRITEBACK), 4318 global_page_state(NR_WRITEBACK),
4318 global_page_state(NR_UNSTABLE_NFS), 4319 global_page_state(NR_UNSTABLE_NFS),
@@ -4331,6 +4332,28 @@ void show_free_areas(unsigned int filter)
4331 free_pcp, 4332 free_pcp,
4332 global_page_state(NR_FREE_CMA_PAGES)); 4333 global_page_state(NR_FREE_CMA_PAGES));
4333 4334
4335 for_each_online_pgdat(pgdat) {
4336 printk("Node %d"
4337 " active_anon:%lukB"
4338 " inactive_anon:%lukB"
4339 " active_file:%lukB"
4340 " inactive_file:%lukB"
4341 " unevictable:%lukB"
4342 " isolated(anon):%lukB"
4343 " isolated(file):%lukB"
4344 " all_unreclaimable? %s"
4345 "\n",
4346 pgdat->node_id,
4347 K(node_page_state(pgdat, NR_ACTIVE_ANON)),
4348 K(node_page_state(pgdat, NR_INACTIVE_ANON)),
4349 K(node_page_state(pgdat, NR_ACTIVE_FILE)),
4350 K(node_page_state(pgdat, NR_INACTIVE_FILE)),
4351 K(node_page_state(pgdat, NR_UNEVICTABLE)),
4352 K(node_page_state(pgdat, NR_ISOLATED_ANON)),
4353 K(node_page_state(pgdat, NR_ISOLATED_FILE)),
4354 !pgdat_reclaimable(pgdat) ? "yes" : "no");
4355 }
4356
4334 for_each_populated_zone(zone) { 4357 for_each_populated_zone(zone) {
4335 int i; 4358 int i;
4336 4359
@@ -4347,13 +4370,6 @@ void show_free_areas(unsigned int filter)
4347 " min:%lukB" 4370 " min:%lukB"
4348 " low:%lukB" 4371 " low:%lukB"
4349 " high:%lukB" 4372 " high:%lukB"
4350 " active_anon:%lukB"
4351 " inactive_anon:%lukB"
4352 " active_file:%lukB"
4353 " inactive_file:%lukB"
4354 " unevictable:%lukB"
4355 " isolated(anon):%lukB"
4356 " isolated(file):%lukB"
4357 " present:%lukB" 4373 " present:%lukB"
4358 " managed:%lukB" 4374 " managed:%lukB"
4359 " mlocked:%lukB" 4375 " mlocked:%lukB"
@@ -4376,21 +4392,13 @@ void show_free_areas(unsigned int filter)
4376 " local_pcp:%ukB" 4392 " local_pcp:%ukB"
4377 " free_cma:%lukB" 4393 " free_cma:%lukB"
4378 " writeback_tmp:%lukB" 4394 " writeback_tmp:%lukB"
4379 " pages_scanned:%lu" 4395 " node_pages_scanned:%lu"
4380 " all_unreclaimable? %s"
4381 "\n", 4396 "\n",
4382 zone->name, 4397 zone->name,
4383 K(zone_page_state(zone, NR_FREE_PAGES)), 4398 K(zone_page_state(zone, NR_FREE_PAGES)),
4384 K(min_wmark_pages(zone)), 4399 K(min_wmark_pages(zone)),
4385 K(low_wmark_pages(zone)), 4400 K(low_wmark_pages(zone)),
4386 K(high_wmark_pages(zone)), 4401 K(high_wmark_pages(zone)),
4387 K(zone_page_state(zone, NR_ACTIVE_ANON)),
4388 K(zone_page_state(zone, NR_INACTIVE_ANON)),
4389 K(zone_page_state(zone, NR_ACTIVE_FILE)),
4390 K(zone_page_state(zone, NR_INACTIVE_FILE)),
4391 K(zone_page_state(zone, NR_UNEVICTABLE)),
4392 K(zone_page_state(zone, NR_ISOLATED_ANON)),
4393 K(zone_page_state(zone, NR_ISOLATED_FILE)),
4394 K(zone->present_pages), 4402 K(zone->present_pages),
4395 K(zone->managed_pages), 4403 K(zone->managed_pages),
4396 K(zone_page_state(zone, NR_MLOCK)), 4404 K(zone_page_state(zone, NR_MLOCK)),
@@ -4415,9 +4423,7 @@ void show_free_areas(unsigned int filter)
4415 K(this_cpu_read(zone->pageset->pcp.count)), 4423 K(this_cpu_read(zone->pageset->pcp.count)),
4416 K(zone_page_state(zone, NR_FREE_CMA_PAGES)), 4424 K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
4417 K(zone_page_state(zone, NR_WRITEBACK_TEMP)), 4425 K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
4418 K(zone_page_state(zone, NR_PAGES_SCANNED)), 4426 K(node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED)));
4419 (!zone_reclaimable(zone) ? "yes" : "no")
4420 );
4421 printk("lowmem_reserve[]:"); 4427 printk("lowmem_reserve[]:");
4422 for (i = 0; i < MAX_NR_ZONES; i++) 4428 for (i = 0; i < MAX_NR_ZONES; i++)
4423 printk(" %ld", zone->lowmem_reserve[i]); 4429 printk(" %ld", zone->lowmem_reserve[i]);
@@ -5967,7 +5973,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
5967 /* For bootup, initialized properly in watermark setup */ 5973 /* For bootup, initialized properly in watermark setup */
5968 mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages); 5974 mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages);
5969 5975
5970 lruvec_init(&zone->lruvec); 5976 lruvec_init(zone_lruvec(zone));
5971 if (!size) 5977 if (!size)
5972 continue; 5978 continue;
5973 5979