aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMel Gorman <mgorman@techsingularity.net>2016-07-28 18:45:31 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 19:07:41 -0400
commit599d0c954f91d0689c9bb421b5bc04ea02437a41 (patch)
treee863ce685841e494bcb63e458739e0939ac684f6
parenta52633d8e9c35832f1409dc5fa166019048a3f1f (diff)
mm, vmscan: move LRU lists to node
This moves the LRU lists from the zone to the node and related data such as counters, tracing, congestion tracking and writeback tracking. Unfortunately, due to reclaim and compaction retry logic, it is necessary to account for the number of LRU pages on both zone and node logic. Most reclaim logic is based on the node counters but the retry logic uses the zone counters which do not distinguish inactive and active sizes. It would be possible to leave the LRU counters on a per-zone basis but it's a heavier calculation across multiple cache lines that is much more frequent than the retry checks. Other than the LRU counters, this is mostly a mechanical patch but note that it introduces a number of anomalies. For example, the scans are per-zone but using per-node counters. We also mark a node as congested when a zone is congested. This causes weird problems that are fixed later but is easier to review. In the event that there is excessive overhead on 32-bit systems due to the nodes being on LRU then there are two potential solutions 1. Long-term isolation of highmem pages when reclaim is lowmem When pages are skipped, they are immediately added back onto the LRU list. If lowmem reclaim persisted for long periods of time, the same highmem pages get continually scanned. The idea would be that lowmem keeps those pages on a separate list until a reclaim for highmem pages arrives that splices the highmem pages back onto the LRU. It potentially could be implemented similar to the UNEVICTABLE list. That would reduce the skip rate with the potential corner case is that highmem pages have to be scanned and reclaimed to free lowmem slab pages. 2. Linear scan lowmem pages if the initial LRU shrink fails This will break LRU ordering but may be preferable and faster during memory pressure than skipping LRU pages. Link: http://lkml.kernel.org/r/1467970510-21195-4-git-send-email-mgorman@techsingularity.net Signed-off-by: Mel Gorman <mgorman@techsingularity.net> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Vlastimil Babka <vbabka@suse.cz> Cc: Hillf Danton <hillf.zj@alibaba-inc.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Minchan Kim <minchan@kernel.org> Cc: Rik van Riel <riel@surriel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/tile/mm/pgtable.c8
-rw-r--r--drivers/base/node.c19
-rw-r--r--drivers/staging/android/lowmemorykiller.c8
-rw-r--r--include/linux/backing-dev.h2
-rw-r--r--include/linux/memcontrol.h18
-rw-r--r--include/linux/mm_inline.h21
-rw-r--r--include/linux/mmzone.h68
-rw-r--r--include/linux/swap.h1
-rw-r--r--include/linux/vm_event_item.h10
-rw-r--r--include/linux/vmstat.h17
-rw-r--r--include/trace/events/vmscan.h12
-rw-r--r--kernel/power/snapshot.c10
-rw-r--r--mm/backing-dev.c15
-rw-r--r--mm/compaction.c18
-rw-r--r--mm/huge_memory.c2
-rw-r--r--mm/internal.h2
-rw-r--r--mm/khugepaged.c4
-rw-r--r--mm/memcontrol.c17
-rw-r--r--mm/memory-failure.c4
-rw-r--r--mm/memory_hotplug.c2
-rw-r--r--mm/mempolicy.c2
-rw-r--r--mm/migrate.c21
-rw-r--r--mm/mlock.c2
-rw-r--r--mm/page-writeback.c8
-rw-r--r--mm/page_alloc.c68
-rw-r--r--mm/swap.c50
-rw-r--r--mm/vmscan.c226
-rw-r--r--mm/vmstat.c47
-rw-r--r--mm/workingset.c4
29 files changed, 386 insertions, 300 deletions
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index c4d5bf841a7f..9e389213580d 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -45,10 +45,10 @@ void show_mem(unsigned int filter)
45 struct zone *zone; 45 struct zone *zone;
46 46
47 pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu pagecache:%lu swap:%lu\n", 47 pr_err("Active:%lu inactive:%lu dirty:%lu writeback:%lu unstable:%lu free:%lu\n slab:%lu mapped:%lu pagetables:%lu bounce:%lu pagecache:%lu swap:%lu\n",
48 (global_page_state(NR_ACTIVE_ANON) + 48 (global_node_page_state(NR_ACTIVE_ANON) +
49 global_page_state(NR_ACTIVE_FILE)), 49 global_node_page_state(NR_ACTIVE_FILE)),
50 (global_page_state(NR_INACTIVE_ANON) + 50 (global_node_page_state(NR_INACTIVE_ANON) +
51 global_page_state(NR_INACTIVE_FILE)), 51 global_node_page_state(NR_INACTIVE_FILE)),
52 global_page_state(NR_FILE_DIRTY), 52 global_page_state(NR_FILE_DIRTY),
53 global_page_state(NR_WRITEBACK), 53 global_page_state(NR_WRITEBACK),
54 global_page_state(NR_UNSTABLE_NFS), 54 global_page_state(NR_UNSTABLE_NFS),
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 0a1b6433a76c..d4698f096209 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -56,6 +56,7 @@ static ssize_t node_read_meminfo(struct device *dev,
56{ 56{
57 int n; 57 int n;
58 int nid = dev->id; 58 int nid = dev->id;
59 struct pglist_data *pgdat = NODE_DATA(nid);
59 struct sysinfo i; 60 struct sysinfo i;
60 61
61 si_meminfo_node(&i, nid); 62 si_meminfo_node(&i, nid);
@@ -74,15 +75,15 @@ static ssize_t node_read_meminfo(struct device *dev,
74 nid, K(i.totalram), 75 nid, K(i.totalram),
75 nid, K(i.freeram), 76 nid, K(i.freeram),
76 nid, K(i.totalram - i.freeram), 77 nid, K(i.totalram - i.freeram),
77 nid, K(sum_zone_node_page_state(nid, NR_ACTIVE_ANON) + 78 nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
78 sum_zone_node_page_state(nid, NR_ACTIVE_FILE)), 79 node_page_state(pgdat, NR_ACTIVE_FILE)),
79 nid, K(sum_zone_node_page_state(nid, NR_INACTIVE_ANON) + 80 nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) +
80 sum_zone_node_page_state(nid, NR_INACTIVE_FILE)), 81 node_page_state(pgdat, NR_INACTIVE_FILE)),
81 nid, K(sum_zone_node_page_state(nid, NR_ACTIVE_ANON)), 82 nid, K(node_page_state(pgdat, NR_ACTIVE_ANON)),
82 nid, K(sum_zone_node_page_state(nid, NR_INACTIVE_ANON)), 83 nid, K(node_page_state(pgdat, NR_INACTIVE_ANON)),
83 nid, K(sum_zone_node_page_state(nid, NR_ACTIVE_FILE)), 84 nid, K(node_page_state(pgdat, NR_ACTIVE_FILE)),
84 nid, K(sum_zone_node_page_state(nid, NR_INACTIVE_FILE)), 85 nid, K(node_page_state(pgdat, NR_INACTIVE_FILE)),
85 nid, K(sum_zone_node_page_state(nid, NR_UNEVICTABLE)), 86 nid, K(node_page_state(pgdat, NR_UNEVICTABLE)),
86 nid, K(sum_zone_node_page_state(nid, NR_MLOCK))); 87 nid, K(sum_zone_node_page_state(nid, NR_MLOCK)));
87 88
88#ifdef CONFIG_HIGHMEM 89#ifdef CONFIG_HIGHMEM
diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c
index 24d2745e9437..93dbcc38eb0f 100644
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@@ -72,10 +72,10 @@ static unsigned long lowmem_deathpending_timeout;
72static unsigned long lowmem_count(struct shrinker *s, 72static unsigned long lowmem_count(struct shrinker *s,
73 struct shrink_control *sc) 73 struct shrink_control *sc)
74{ 74{
75 return global_page_state(NR_ACTIVE_ANON) + 75 return global_node_page_state(NR_ACTIVE_ANON) +
76 global_page_state(NR_ACTIVE_FILE) + 76 global_node_page_state(NR_ACTIVE_FILE) +
77 global_page_state(NR_INACTIVE_ANON) + 77 global_node_page_state(NR_INACTIVE_ANON) +
78 global_page_state(NR_INACTIVE_FILE); 78 global_node_page_state(NR_INACTIVE_FILE);
79} 79}
80 80
81static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) 81static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index c82794f20110..491a91717788 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -197,7 +197,7 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
197} 197}
198 198
199long congestion_wait(int sync, long timeout); 199long congestion_wait(int sync, long timeout);
200long wait_iff_congested(struct zone *zone, int sync, long timeout); 200long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout);
201int pdflush_proc_obsolete(struct ctl_table *table, int write, 201int pdflush_proc_obsolete(struct ctl_table *table, int write,
202 void __user *buffer, size_t *lenp, loff_t *ppos); 202 void __user *buffer, size_t *lenp, loff_t *ppos);
203 203
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1c4df4420258..6d2321c148cd 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -339,7 +339,7 @@ static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
339 struct lruvec *lruvec; 339 struct lruvec *lruvec;
340 340
341 if (mem_cgroup_disabled()) { 341 if (mem_cgroup_disabled()) {
342 lruvec = &zone->lruvec; 342 lruvec = zone_lruvec(zone);
343 goto out; 343 goto out;
344 } 344 }
345 345
@@ -348,15 +348,15 @@ static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
348out: 348out:
349 /* 349 /*
350 * Since a node can be onlined after the mem_cgroup was created, 350 * Since a node can be onlined after the mem_cgroup was created,
351 * we have to be prepared to initialize lruvec->zone here; 351 * we have to be prepared to initialize lruvec->pgdat here;
352 * and if offlined then reonlined, we need to reinitialize it. 352 * and if offlined then reonlined, we need to reinitialize it.
353 */ 353 */
354 if (unlikely(lruvec->zone != zone)) 354 if (unlikely(lruvec->pgdat != zone->zone_pgdat))
355 lruvec->zone = zone; 355 lruvec->pgdat = zone->zone_pgdat;
356 return lruvec; 356 return lruvec;
357} 357}
358 358
359struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); 359struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
360 360
361bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg); 361bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
362struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); 362struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
@@ -437,7 +437,7 @@ static inline bool mem_cgroup_online(struct mem_cgroup *memcg)
437int mem_cgroup_select_victim_node(struct mem_cgroup *memcg); 437int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
438 438
439void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, 439void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
440 int nr_pages); 440 enum zone_type zid, int nr_pages);
441 441
442unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, 442unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
443 int nid, unsigned int lru_mask); 443 int nid, unsigned int lru_mask);
@@ -612,13 +612,13 @@ static inline void mem_cgroup_migrate(struct page *old, struct page *new)
612static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, 612static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone,
613 struct mem_cgroup *memcg) 613 struct mem_cgroup *memcg)
614{ 614{
615 return &zone->lruvec; 615 return zone_lruvec(zone);
616} 616}
617 617
618static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, 618static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
619 struct zone *zone) 619 struct pglist_data *pgdat)
620{ 620{
621 return &zone->lruvec; 621 return &pgdat->lruvec;
622} 622}
623 623
624static inline bool mm_match_cgroup(struct mm_struct *mm, 624static inline bool mm_match_cgroup(struct mm_struct *mm,
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 5bd29ba4f174..9aadcc781857 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -23,25 +23,32 @@ static inline int page_is_file_cache(struct page *page)
23} 23}
24 24
25static __always_inline void __update_lru_size(struct lruvec *lruvec, 25static __always_inline void __update_lru_size(struct lruvec *lruvec,
26 enum lru_list lru, int nr_pages) 26 enum lru_list lru, enum zone_type zid,
27 int nr_pages)
27{ 28{
28 __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages); 29 struct pglist_data *pgdat = lruvec_pgdat(lruvec);
30
31 __mod_node_page_state(pgdat, NR_LRU_BASE + lru, nr_pages);
32 __mod_zone_page_state(&pgdat->node_zones[zid],
33 NR_ZONE_LRU_BASE + !!is_file_lru(lru),
34 nr_pages);
29} 35}
30 36
31static __always_inline void update_lru_size(struct lruvec *lruvec, 37static __always_inline void update_lru_size(struct lruvec *lruvec,
32 enum lru_list lru, int nr_pages) 38 enum lru_list lru, enum zone_type zid,
39 int nr_pages)
33{ 40{
34#ifdef CONFIG_MEMCG 41#ifdef CONFIG_MEMCG
35 mem_cgroup_update_lru_size(lruvec, lru, nr_pages); 42 mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
36#else 43#else
37 __update_lru_size(lruvec, lru, nr_pages); 44 __update_lru_size(lruvec, lru, zid, nr_pages);
38#endif 45#endif
39} 46}
40 47
41static __always_inline void add_page_to_lru_list(struct page *page, 48static __always_inline void add_page_to_lru_list(struct page *page,
42 struct lruvec *lruvec, enum lru_list lru) 49 struct lruvec *lruvec, enum lru_list lru)
43{ 50{
44 update_lru_size(lruvec, lru, hpage_nr_pages(page)); 51 update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
45 list_add(&page->lru, &lruvec->lists[lru]); 52 list_add(&page->lru, &lruvec->lists[lru]);
46} 53}
47 54
@@ -49,7 +56,7 @@ static __always_inline void del_page_from_lru_list(struct page *page,
49 struct lruvec *lruvec, enum lru_list lru) 56 struct lruvec *lruvec, enum lru_list lru)
50{ 57{
51 list_del(&page->lru); 58 list_del(&page->lru);
52 update_lru_size(lruvec, lru, -hpage_nr_pages(page)); 59 update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page));
53} 60}
54 61
55/** 62/**
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index cfa870107abe..d4f5cac0a8c3 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -111,12 +111,9 @@ enum zone_stat_item {
111 /* First 128 byte cacheline (assuming 64 bit words) */ 111 /* First 128 byte cacheline (assuming 64 bit words) */
112 NR_FREE_PAGES, 112 NR_FREE_PAGES,
113 NR_ALLOC_BATCH, 113 NR_ALLOC_BATCH,
114 NR_LRU_BASE, 114 NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
115 NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ 115 NR_ZONE_LRU_ANON = NR_ZONE_LRU_BASE,
116 NR_ACTIVE_ANON, /* " " " " " */ 116 NR_ZONE_LRU_FILE,
117 NR_INACTIVE_FILE, /* " " " " " */
118 NR_ACTIVE_FILE, /* " " " " " */
119 NR_UNEVICTABLE, /* " " " " " */
120 NR_MLOCK, /* mlock()ed pages found and moved off LRU */ 117 NR_MLOCK, /* mlock()ed pages found and moved off LRU */
121 NR_ANON_PAGES, /* Mapped anonymous pages */ 118 NR_ANON_PAGES, /* Mapped anonymous pages */
122 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. 119 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
@@ -134,12 +131,9 @@ enum zone_stat_item {
134 NR_VMSCAN_WRITE, 131 NR_VMSCAN_WRITE,
135 NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ 132 NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
136 NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ 133 NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
137 NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
138 NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
139 NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ 134 NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
140 NR_DIRTIED, /* page dirtyings since bootup */ 135 NR_DIRTIED, /* page dirtyings since bootup */
141 NR_WRITTEN, /* page writings since bootup */ 136 NR_WRITTEN, /* page writings since bootup */
142 NR_PAGES_SCANNED, /* pages scanned since last reclaim */
143#if IS_ENABLED(CONFIG_ZSMALLOC) 137#if IS_ENABLED(CONFIG_ZSMALLOC)
144 NR_ZSPAGES, /* allocated in zsmalloc */ 138 NR_ZSPAGES, /* allocated in zsmalloc */
145#endif 139#endif
@@ -161,6 +155,15 @@ enum zone_stat_item {
161 NR_VM_ZONE_STAT_ITEMS }; 155 NR_VM_ZONE_STAT_ITEMS };
162 156
163enum node_stat_item { 157enum node_stat_item {
158 NR_LRU_BASE,
159 NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
160 NR_ACTIVE_ANON, /* " " " " " */
161 NR_INACTIVE_FILE, /* " " " " " */
162 NR_ACTIVE_FILE, /* " " " " " */
163 NR_UNEVICTABLE, /* " " " " " */
164 NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
165 NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
166 NR_PAGES_SCANNED, /* pages scanned since last reclaim */
164 NR_VM_NODE_STAT_ITEMS 167 NR_VM_NODE_STAT_ITEMS
165}; 168};
166 169
@@ -219,7 +222,7 @@ struct lruvec {
219 /* Evictions & activations on the inactive file list */ 222 /* Evictions & activations on the inactive file list */
220 atomic_long_t inactive_age; 223 atomic_long_t inactive_age;
221#ifdef CONFIG_MEMCG 224#ifdef CONFIG_MEMCG
222 struct zone *zone; 225 struct pglist_data *pgdat;
223#endif 226#endif
224}; 227};
225 228
@@ -357,13 +360,6 @@ struct zone {
357#ifdef CONFIG_NUMA 360#ifdef CONFIG_NUMA
358 int node; 361 int node;
359#endif 362#endif
360
361 /*
362 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
363 * this zone's LRU. Maintained by the pageout code.
364 */
365 unsigned int inactive_ratio;
366
367 struct pglist_data *zone_pgdat; 363 struct pglist_data *zone_pgdat;
368 struct per_cpu_pageset __percpu *pageset; 364 struct per_cpu_pageset __percpu *pageset;
369 365
@@ -495,9 +491,6 @@ struct zone {
495 491
496 /* Write-intensive fields used by page reclaim */ 492 /* Write-intensive fields used by page reclaim */
497 493
498 /* Fields commonly accessed by the page reclaim scanner */
499 struct lruvec lruvec;
500
501 /* 494 /*
502 * When free pages are below this point, additional steps are taken 495 * When free pages are below this point, additional steps are taken
503 * when reading the number of free pages to avoid per-cpu counter 496 * when reading the number of free pages to avoid per-cpu counter
@@ -537,17 +530,20 @@ struct zone {
537 530
538enum zone_flags { 531enum zone_flags {
539 ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */ 532 ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */
540 ZONE_CONGESTED, /* zone has many dirty pages backed by 533 ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */
534};
535
536enum pgdat_flags {
537 PGDAT_CONGESTED, /* pgdat has many dirty pages backed by
541 * a congested BDI 538 * a congested BDI
542 */ 539 */
543 ZONE_DIRTY, /* reclaim scanning has recently found 540 PGDAT_DIRTY, /* reclaim scanning has recently found
544 * many dirty file pages at the tail 541 * many dirty file pages at the tail
545 * of the LRU. 542 * of the LRU.
546 */ 543 */
547 ZONE_WRITEBACK, /* reclaim scanning has recently found 544 PGDAT_WRITEBACK, /* reclaim scanning has recently found
548 * many pages under writeback 545 * many pages under writeback
549 */ 546 */
550 ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */
551}; 547};
552 548
553static inline unsigned long zone_end_pfn(const struct zone *zone) 549static inline unsigned long zone_end_pfn(const struct zone *zone)
@@ -707,6 +703,19 @@ typedef struct pglist_data {
707 unsigned long split_queue_len; 703 unsigned long split_queue_len;
708#endif 704#endif
709 705
706 /* Fields commonly accessed by the page reclaim scanner */
707 struct lruvec lruvec;
708
709 /*
710 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
711 * this node's LRU. Maintained by the pageout code.
712 */
713 unsigned int inactive_ratio;
714
715 unsigned long flags;
716
717 ZONE_PADDING(_pad2_)
718
710 /* Per-node vmstats */ 719 /* Per-node vmstats */
711 struct per_cpu_nodestat __percpu *per_cpu_nodestats; 720 struct per_cpu_nodestat __percpu *per_cpu_nodestats;
712 atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; 721 atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS];
@@ -728,6 +737,11 @@ static inline spinlock_t *zone_lru_lock(struct zone *zone)
728 return &zone->zone_pgdat->lru_lock; 737 return &zone->zone_pgdat->lru_lock;
729} 738}
730 739
740static inline struct lruvec *zone_lruvec(struct zone *zone)
741{
742 return &zone->zone_pgdat->lruvec;
743}
744
731static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) 745static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
732{ 746{
733 return pgdat->node_start_pfn + pgdat->node_spanned_pages; 747 return pgdat->node_start_pfn + pgdat->node_spanned_pages;
@@ -779,12 +793,12 @@ extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
779 793
780extern void lruvec_init(struct lruvec *lruvec); 794extern void lruvec_init(struct lruvec *lruvec);
781 795
782static inline struct zone *lruvec_zone(struct lruvec *lruvec) 796static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
783{ 797{
784#ifdef CONFIG_MEMCG 798#ifdef CONFIG_MEMCG
785 return lruvec->zone; 799 return lruvec->pgdat;
786#else 800#else
787 return container_of(lruvec, struct zone, lruvec); 801 return container_of(lruvec, struct pglist_data, lruvec);
788#endif 802#endif
789} 803}
790 804
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0af2bb2028fd..c82f916008b7 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -317,6 +317,7 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
317 317
318/* linux/mm/vmscan.c */ 318/* linux/mm/vmscan.c */
319extern unsigned long zone_reclaimable_pages(struct zone *zone); 319extern unsigned long zone_reclaimable_pages(struct zone *zone);
320extern unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat);
320extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, 321extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
321 gfp_t gfp_mask, nodemask_t *mask); 322 gfp_t gfp_mask, nodemask_t *mask);
322extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); 323extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 42604173f122..1798ff542517 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -26,11 +26,11 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
26 PGFREE, PGACTIVATE, PGDEACTIVATE, 26 PGFREE, PGACTIVATE, PGDEACTIVATE,
27 PGFAULT, PGMAJFAULT, 27 PGFAULT, PGMAJFAULT,
28 PGLAZYFREED, 28 PGLAZYFREED,
29 FOR_ALL_ZONES(PGREFILL), 29 PGREFILL,
30 FOR_ALL_ZONES(PGSTEAL_KSWAPD), 30 PGSTEAL_KSWAPD,
31 FOR_ALL_ZONES(PGSTEAL_DIRECT), 31 PGSTEAL_DIRECT,
32 FOR_ALL_ZONES(PGSCAN_KSWAPD), 32 PGSCAN_KSWAPD,
33 FOR_ALL_ZONES(PGSCAN_DIRECT), 33 PGSCAN_DIRECT,
34 PGSCAN_DIRECT_THROTTLE, 34 PGSCAN_DIRECT_THROTTLE,
35#ifdef CONFIG_NUMA 35#ifdef CONFIG_NUMA
36 PGSCAN_ZONE_RECLAIM_FAILED, 36 PGSCAN_ZONE_RECLAIM_FAILED,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d1744aa3ab9c..fee321c98550 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -178,6 +178,23 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
178 return x; 178 return x;
179} 179}
180 180
181static inline unsigned long node_page_state_snapshot(pg_data_t *pgdat,
182 enum node_stat_item item)
183{
184 long x = atomic_long_read(&pgdat->vm_stat[item]);
185
186#ifdef CONFIG_SMP
187 int cpu;
188 for_each_online_cpu(cpu)
189 x += per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->vm_node_stat_diff[item];
190
191 if (x < 0)
192 x = 0;
193#endif
194 return x;
195}
196
197
181#ifdef CONFIG_NUMA 198#ifdef CONFIG_NUMA
182extern unsigned long sum_zone_node_page_state(int node, 199extern unsigned long sum_zone_node_page_state(int node,
183 enum zone_stat_item item); 200 enum zone_stat_item item);
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 0101ef37f1ee..897f1aa1ee5f 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -352,15 +352,14 @@ TRACE_EVENT(mm_vmscan_writepage,
352 352
353TRACE_EVENT(mm_vmscan_lru_shrink_inactive, 353TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
354 354
355 TP_PROTO(struct zone *zone, 355 TP_PROTO(int nid,
356 unsigned long nr_scanned, unsigned long nr_reclaimed, 356 unsigned long nr_scanned, unsigned long nr_reclaimed,
357 int priority, int file), 357 int priority, int file),
358 358
359 TP_ARGS(zone, nr_scanned, nr_reclaimed, priority, file), 359 TP_ARGS(nid, nr_scanned, nr_reclaimed, priority, file),
360 360
361 TP_STRUCT__entry( 361 TP_STRUCT__entry(
362 __field(int, nid) 362 __field(int, nid)
363 __field(int, zid)
364 __field(unsigned long, nr_scanned) 363 __field(unsigned long, nr_scanned)
365 __field(unsigned long, nr_reclaimed) 364 __field(unsigned long, nr_reclaimed)
366 __field(int, priority) 365 __field(int, priority)
@@ -368,16 +367,15 @@ TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
368 ), 367 ),
369 368
370 TP_fast_assign( 369 TP_fast_assign(
371 __entry->nid = zone_to_nid(zone); 370 __entry->nid = nid;
372 __entry->zid = zone_idx(zone);
373 __entry->nr_scanned = nr_scanned; 371 __entry->nr_scanned = nr_scanned;
374 __entry->nr_reclaimed = nr_reclaimed; 372 __entry->nr_reclaimed = nr_reclaimed;
375 __entry->priority = priority; 373 __entry->priority = priority;
376 __entry->reclaim_flags = trace_shrink_flags(file); 374 __entry->reclaim_flags = trace_shrink_flags(file);
377 ), 375 ),
378 376
379 TP_printk("nid=%d zid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s", 377 TP_printk("nid=%d nr_scanned=%ld nr_reclaimed=%ld priority=%d flags=%s",
380 __entry->nid, __entry->zid, 378 __entry->nid,
381 __entry->nr_scanned, __entry->nr_reclaimed, 379 __entry->nr_scanned, __entry->nr_reclaimed,
382 __entry->priority, 380 __entry->priority,
383 show_reclaim_flags(__entry->reclaim_flags)) 381 show_reclaim_flags(__entry->reclaim_flags))
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index d90df926b59f..9a0178c2ac1d 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -1627,11 +1627,11 @@ static unsigned long minimum_image_size(unsigned long saveable)
1627 unsigned long size; 1627 unsigned long size;
1628 1628
1629 size = global_page_state(NR_SLAB_RECLAIMABLE) 1629 size = global_page_state(NR_SLAB_RECLAIMABLE)
1630 + global_page_state(NR_ACTIVE_ANON) 1630 + global_node_page_state(NR_ACTIVE_ANON)
1631 + global_page_state(NR_INACTIVE_ANON) 1631 + global_node_page_state(NR_INACTIVE_ANON)
1632 + global_page_state(NR_ACTIVE_FILE) 1632 + global_node_page_state(NR_ACTIVE_FILE)
1633 + global_page_state(NR_INACTIVE_FILE) 1633 + global_node_page_state(NR_INACTIVE_FILE)
1634 - global_page_state(NR_FILE_MAPPED); 1634 - global_node_page_state(NR_FILE_MAPPED);
1635 1635
1636 return saveable <= size ? 0 : saveable - size; 1636 return saveable <= size ? 0 : saveable - size;
1637} 1637}
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index ed173b8ae8f2..efe237742074 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -947,24 +947,24 @@ long congestion_wait(int sync, long timeout)
947EXPORT_SYMBOL(congestion_wait); 947EXPORT_SYMBOL(congestion_wait);
948 948
949/** 949/**
950 * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a zone to complete writes 950 * wait_iff_congested - Conditionally wait for a backing_dev to become uncongested or a pgdat to complete writes
951 * @zone: A zone to check if it is heavily congested 951 * @pgdat: A pgdat to check if it is heavily congested
952 * @sync: SYNC or ASYNC IO 952 * @sync: SYNC or ASYNC IO
953 * @timeout: timeout in jiffies 953 * @timeout: timeout in jiffies
954 * 954 *
955 * In the event of a congested backing_dev (any backing_dev) and the given 955 * In the event of a congested backing_dev (any backing_dev) and the given
956 * @zone has experienced recent congestion, this waits for up to @timeout 956 * @pgdat has experienced recent congestion, this waits for up to @timeout
957 * jiffies for either a BDI to exit congestion of the given @sync queue 957 * jiffies for either a BDI to exit congestion of the given @sync queue
958 * or a write to complete. 958 * or a write to complete.
959 * 959 *
960 * In the absence of zone congestion, cond_resched() is called to yield 960 * In the absence of pgdat congestion, cond_resched() is called to yield
961 * the processor if necessary but otherwise does not sleep. 961 * the processor if necessary but otherwise does not sleep.
962 * 962 *
963 * The return value is 0 if the sleep is for the full timeout. Otherwise, 963 * The return value is 0 if the sleep is for the full timeout. Otherwise,
964 * it is the number of jiffies that were still remaining when the function 964 * it is the number of jiffies that were still remaining when the function
965 * returned. return_value == timeout implies the function did not sleep. 965 * returned. return_value == timeout implies the function did not sleep.
966 */ 966 */
967long wait_iff_congested(struct zone *zone, int sync, long timeout) 967long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout)
968{ 968{
969 long ret; 969 long ret;
970 unsigned long start = jiffies; 970 unsigned long start = jiffies;
@@ -973,12 +973,13 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
973 973
974 /* 974 /*
975 * If there is no congestion, or heavy congestion is not being 975 * If there is no congestion, or heavy congestion is not being
976 * encountered in the current zone, yield if necessary instead 976 * encountered in the current pgdat, yield if necessary instead
977 * of sleeping on the congestion queue 977 * of sleeping on the congestion queue
978 */ 978 */
979 if (atomic_read(&nr_wb_congested[sync]) == 0 || 979 if (atomic_read(&nr_wb_congested[sync]) == 0 ||
980 !test_bit(ZONE_CONGESTED, &zone->flags)) { 980 !test_bit(PGDAT_CONGESTED, &pgdat->flags)) {
981 cond_resched(); 981 cond_resched();
982
982 /* In case we scheduled, work out time remaining */ 983 /* In case we scheduled, work out time remaining */
983 ret = timeout - (jiffies - start); 984 ret = timeout - (jiffies - start);
984 if (ret < 0) 985 if (ret < 0)
diff --git a/mm/compaction.c b/mm/compaction.c
index 5c65fad3f330..e5995f38d677 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -646,8 +646,8 @@ static void acct_isolated(struct zone *zone, struct compact_control *cc)
646 list_for_each_entry(page, &cc->migratepages, lru) 646 list_for_each_entry(page, &cc->migratepages, lru)
647 count[!!page_is_file_cache(page)]++; 647 count[!!page_is_file_cache(page)]++;
648 648
649 mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]); 649 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, count[0]);
650 mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]); 650 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, count[1]);
651} 651}
652 652
653/* Similar to reclaim, but different enough that they don't share logic */ 653/* Similar to reclaim, but different enough that they don't share logic */
@@ -655,12 +655,12 @@ static bool too_many_isolated(struct zone *zone)
655{ 655{
656 unsigned long active, inactive, isolated; 656 unsigned long active, inactive, isolated;
657 657
658 inactive = zone_page_state(zone, NR_INACTIVE_FILE) + 658 inactive = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE) +
659 zone_page_state(zone, NR_INACTIVE_ANON); 659 node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON);
660 active = zone_page_state(zone, NR_ACTIVE_FILE) + 660 active = node_page_state(zone->zone_pgdat, NR_ACTIVE_FILE) +
661 zone_page_state(zone, NR_ACTIVE_ANON); 661 node_page_state(zone->zone_pgdat, NR_ACTIVE_ANON);
662 isolated = zone_page_state(zone, NR_ISOLATED_FILE) + 662 isolated = node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE) +
663 zone_page_state(zone, NR_ISOLATED_ANON); 663 node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON);
664 664
665 return isolated > (inactive + active) / 2; 665 return isolated > (inactive + active) / 2;
666} 666}
@@ -856,7 +856,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
856 } 856 }
857 } 857 }
858 858
859 lruvec = mem_cgroup_page_lruvec(page, zone); 859 lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
860 860
861 /* Try isolate the page */ 861 /* Try isolate the page */
862 if (__isolate_lru_page(page, isolate_mode) != 0) 862 if (__isolate_lru_page(page, isolate_mode) != 0)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 99578b63814b..481fb0128d21 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1818,7 +1818,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
1818 pgoff_t end = -1; 1818 pgoff_t end = -1;
1819 int i; 1819 int i;
1820 1820
1821 lruvec = mem_cgroup_page_lruvec(head, zone); 1821 lruvec = mem_cgroup_page_lruvec(head, zone->zone_pgdat);
1822 1822
1823 /* complete memcg works before add pages to LRU */ 1823 /* complete memcg works before add pages to LRU */
1824 mem_cgroup_split_huge_fixup(head); 1824 mem_cgroup_split_huge_fixup(head);
diff --git a/mm/internal.h b/mm/internal.h
index 9b6a6c43ac39..2f80d0343c56 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -78,7 +78,7 @@ extern unsigned long highest_memmap_pfn;
78 */ 78 */
79extern int isolate_lru_page(struct page *page); 79extern int isolate_lru_page(struct page *page);
80extern void putback_lru_page(struct page *page); 80extern void putback_lru_page(struct page *page);
81extern bool zone_reclaimable(struct zone *zone); 81extern bool pgdat_reclaimable(struct pglist_data *pgdat);
82 82
83/* 83/*
84 * in mm/rmap.c: 84 * in mm/rmap.c:
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 7dbee698d6aa..374237bb059d 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -480,7 +480,7 @@ void __khugepaged_exit(struct mm_struct *mm)
480static void release_pte_page(struct page *page) 480static void release_pte_page(struct page *page)
481{ 481{
482 /* 0 stands for page_is_file_cache(page) == false */ 482 /* 0 stands for page_is_file_cache(page) == false */
483 dec_zone_page_state(page, NR_ISOLATED_ANON + 0); 483 dec_node_page_state(page, NR_ISOLATED_ANON + 0);
484 unlock_page(page); 484 unlock_page(page);
485 putback_lru_page(page); 485 putback_lru_page(page);
486} 486}
@@ -576,7 +576,7 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
576 goto out; 576 goto out;
577 } 577 }
578 /* 0 stands for page_is_file_cache(page) == false */ 578 /* 0 stands for page_is_file_cache(page) == false */
579 inc_zone_page_state(page, NR_ISOLATED_ANON + 0); 579 inc_node_page_state(page, NR_ISOLATED_ANON + 0);
580 VM_BUG_ON_PAGE(!PageLocked(page), page); 580 VM_BUG_ON_PAGE(!PageLocked(page), page);
581 VM_BUG_ON_PAGE(PageLRU(page), page); 581 VM_BUG_ON_PAGE(PageLRU(page), page);
582 582
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9b70f9ca8ddf..50c86ad121bc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -943,14 +943,14 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
943 * and putback protocol: the LRU lock must be held, and the page must 943 * and putback protocol: the LRU lock must be held, and the page must
944 * either be PageLRU() or the caller must have isolated/allocated it. 944 * either be PageLRU() or the caller must have isolated/allocated it.
945 */ 945 */
946struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone) 946struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct pglist_data *pgdat)
947{ 947{
948 struct mem_cgroup_per_zone *mz; 948 struct mem_cgroup_per_zone *mz;
949 struct mem_cgroup *memcg; 949 struct mem_cgroup *memcg;
950 struct lruvec *lruvec; 950 struct lruvec *lruvec;
951 951
952 if (mem_cgroup_disabled()) { 952 if (mem_cgroup_disabled()) {
953 lruvec = &zone->lruvec; 953 lruvec = &pgdat->lruvec;
954 goto out; 954 goto out;
955 } 955 }
956 956
@@ -970,8 +970,8 @@ out:
970 * we have to be prepared to initialize lruvec->zone here; 970 * we have to be prepared to initialize lruvec->zone here;
971 * and if offlined then reonlined, we need to reinitialize it. 971 * and if offlined then reonlined, we need to reinitialize it.
972 */ 972 */
973 if (unlikely(lruvec->zone != zone)) 973 if (unlikely(lruvec->pgdat != pgdat))
974 lruvec->zone = zone; 974 lruvec->pgdat = pgdat;
975 return lruvec; 975 return lruvec;
976} 976}
977 977
@@ -979,6 +979,7 @@ out:
979 * mem_cgroup_update_lru_size - account for adding or removing an lru page 979 * mem_cgroup_update_lru_size - account for adding or removing an lru page
980 * @lruvec: mem_cgroup per zone lru vector 980 * @lruvec: mem_cgroup per zone lru vector
981 * @lru: index of lru list the page is sitting on 981 * @lru: index of lru list the page is sitting on
982 * @zid: Zone ID of the zone pages have been added to
982 * @nr_pages: positive when adding or negative when removing 983 * @nr_pages: positive when adding or negative when removing
983 * 984 *
984 * This function must be called under lru_lock, just before a page is added 985 * This function must be called under lru_lock, just before a page is added
@@ -986,14 +987,14 @@ out:
986 * so as to allow it to check that lru_size 0 is consistent with list_empty). 987 * so as to allow it to check that lru_size 0 is consistent with list_empty).
987 */ 988 */
988void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru, 989void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
989 int nr_pages) 990 enum zone_type zid, int nr_pages)
990{ 991{
991 struct mem_cgroup_per_zone *mz; 992 struct mem_cgroup_per_zone *mz;
992 unsigned long *lru_size; 993 unsigned long *lru_size;
993 long size; 994 long size;
994 bool empty; 995 bool empty;
995 996
996 __update_lru_size(lruvec, lru, nr_pages); 997 __update_lru_size(lruvec, lru, zid, nr_pages);
997 998
998 if (mem_cgroup_disabled()) 999 if (mem_cgroup_disabled())
999 return; 1000 return;
@@ -2069,7 +2070,7 @@ static void lock_page_lru(struct page *page, int *isolated)
2069 if (PageLRU(page)) { 2070 if (PageLRU(page)) {
2070 struct lruvec *lruvec; 2071 struct lruvec *lruvec;
2071 2072
2072 lruvec = mem_cgroup_page_lruvec(page, zone); 2073 lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
2073 ClearPageLRU(page); 2074 ClearPageLRU(page);
2074 del_page_from_lru_list(page, lruvec, page_lru(page)); 2075 del_page_from_lru_list(page, lruvec, page_lru(page));
2075 *isolated = 1; 2076 *isolated = 1;
@@ -2084,7 +2085,7 @@ static void unlock_page_lru(struct page *page, int isolated)
2084 if (isolated) { 2085 if (isolated) {
2085 struct lruvec *lruvec; 2086 struct lruvec *lruvec;
2086 2087
2087 lruvec = mem_cgroup_page_lruvec(page, zone); 2088 lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
2088 VM_BUG_ON_PAGE(PageLRU(page), page); 2089 VM_BUG_ON_PAGE(PageLRU(page), page);
2089 SetPageLRU(page); 2090 SetPageLRU(page);
2090 add_page_to_lru_list(page, lruvec, page_lru(page)); 2091 add_page_to_lru_list(page, lruvec, page_lru(page));
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2fcca6b0e005..11de752ccaf5 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1663,7 +1663,7 @@ static int __soft_offline_page(struct page *page, int flags)
1663 put_hwpoison_page(page); 1663 put_hwpoison_page(page);
1664 if (!ret) { 1664 if (!ret) {
1665 LIST_HEAD(pagelist); 1665 LIST_HEAD(pagelist);
1666 inc_zone_page_state(page, NR_ISOLATED_ANON + 1666 inc_node_page_state(page, NR_ISOLATED_ANON +
1667 page_is_file_cache(page)); 1667 page_is_file_cache(page));
1668 list_add(&page->lru, &pagelist); 1668 list_add(&page->lru, &pagelist);
1669 ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, 1669 ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
@@ -1671,7 +1671,7 @@ static int __soft_offline_page(struct page *page, int flags)
1671 if (ret) { 1671 if (ret) {
1672 if (!list_empty(&pagelist)) { 1672 if (!list_empty(&pagelist)) {
1673 list_del(&page->lru); 1673 list_del(&page->lru);
1674 dec_zone_page_state(page, NR_ISOLATED_ANON + 1674 dec_node_page_state(page, NR_ISOLATED_ANON +
1675 page_is_file_cache(page)); 1675 page_is_file_cache(page));
1676 putback_lru_page(page); 1676 putback_lru_page(page);
1677 } 1677 }
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 82d0b98d27f8..c5278360ca66 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1586,7 +1586,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
1586 put_page(page); 1586 put_page(page);
1587 list_add_tail(&page->lru, &source); 1587 list_add_tail(&page->lru, &source);
1588 move_pages--; 1588 move_pages--;
1589 inc_zone_page_state(page, NR_ISOLATED_ANON + 1589 inc_node_page_state(page, NR_ISOLATED_ANON +
1590 page_is_file_cache(page)); 1590 page_is_file_cache(page));
1591 1591
1592 } else { 1592 } else {
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 53e40d3f3933..d8c4e38fb5f4 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -962,7 +962,7 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
962 if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) { 962 if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) {
963 if (!isolate_lru_page(page)) { 963 if (!isolate_lru_page(page)) {
964 list_add_tail(&page->lru, pagelist); 964 list_add_tail(&page->lru, pagelist);
965 inc_zone_page_state(page, NR_ISOLATED_ANON + 965 inc_node_page_state(page, NR_ISOLATED_ANON +
966 page_is_file_cache(page)); 966 page_is_file_cache(page));
967 } 967 }
968 } 968 }
diff --git a/mm/migrate.c b/mm/migrate.c
index 2232f6923cc7..3033dae33a0a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -168,7 +168,7 @@ void putback_movable_pages(struct list_head *l)
168 continue; 168 continue;
169 } 169 }
170 list_del(&page->lru); 170 list_del(&page->lru);
171 dec_zone_page_state(page, NR_ISOLATED_ANON + 171 dec_node_page_state(page, NR_ISOLATED_ANON +
172 page_is_file_cache(page)); 172 page_is_file_cache(page));
173 /* 173 /*
174 * We isolated non-lru movable page so here we can use 174 * We isolated non-lru movable page so here we can use
@@ -1119,7 +1119,7 @@ out:
1119 * restored. 1119 * restored.
1120 */ 1120 */
1121 list_del(&page->lru); 1121 list_del(&page->lru);
1122 dec_zone_page_state(page, NR_ISOLATED_ANON + 1122 dec_node_page_state(page, NR_ISOLATED_ANON +
1123 page_is_file_cache(page)); 1123 page_is_file_cache(page));
1124 } 1124 }
1125 1125
@@ -1460,7 +1460,7 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
1460 err = isolate_lru_page(page); 1460 err = isolate_lru_page(page);
1461 if (!err) { 1461 if (!err) {
1462 list_add_tail(&page->lru, &pagelist); 1462 list_add_tail(&page->lru, &pagelist);
1463 inc_zone_page_state(page, NR_ISOLATED_ANON + 1463 inc_node_page_state(page, NR_ISOLATED_ANON +
1464 page_is_file_cache(page)); 1464 page_is_file_cache(page));
1465 } 1465 }
1466put_and_set: 1466put_and_set:
@@ -1726,15 +1726,16 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
1726 unsigned long nr_migrate_pages) 1726 unsigned long nr_migrate_pages)
1727{ 1727{
1728 int z; 1728 int z;
1729
1730 if (!pgdat_reclaimable(pgdat))
1731 return false;
1732
1729 for (z = pgdat->nr_zones - 1; z >= 0; z--) { 1733 for (z = pgdat->nr_zones - 1; z >= 0; z--) {
1730 struct zone *zone = pgdat->node_zones + z; 1734 struct zone *zone = pgdat->node_zones + z;
1731 1735
1732 if (!populated_zone(zone)) 1736 if (!populated_zone(zone))
1733 continue; 1737 continue;
1734 1738
1735 if (!zone_reclaimable(zone))
1736 continue;
1737
1738 /* Avoid waking kswapd by allocating pages_to_migrate pages. */ 1739 /* Avoid waking kswapd by allocating pages_to_migrate pages. */
1739 if (!zone_watermark_ok(zone, 0, 1740 if (!zone_watermark_ok(zone, 0,
1740 high_wmark_pages(zone) + 1741 high_wmark_pages(zone) +
@@ -1828,7 +1829,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
1828 } 1829 }
1829 1830
1830 page_lru = page_is_file_cache(page); 1831 page_lru = page_is_file_cache(page);
1831 mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru, 1832 mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
1832 hpage_nr_pages(page)); 1833 hpage_nr_pages(page));
1833 1834
1834 /* 1835 /*
@@ -1886,7 +1887,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
1886 if (nr_remaining) { 1887 if (nr_remaining) {
1887 if (!list_empty(&migratepages)) { 1888 if (!list_empty(&migratepages)) {
1888 list_del(&page->lru); 1889 list_del(&page->lru);
1889 dec_zone_page_state(page, NR_ISOLATED_ANON + 1890 dec_node_page_state(page, NR_ISOLATED_ANON +
1890 page_is_file_cache(page)); 1891 page_is_file_cache(page));
1891 putback_lru_page(page); 1892 putback_lru_page(page);
1892 } 1893 }
@@ -1979,7 +1980,7 @@ fail_putback:
1979 /* Retake the callers reference and putback on LRU */ 1980 /* Retake the callers reference and putback on LRU */
1980 get_page(page); 1981 get_page(page);
1981 putback_lru_page(page); 1982 putback_lru_page(page);
1982 mod_zone_page_state(page_zone(page), 1983 mod_node_page_state(page_pgdat(page),
1983 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR); 1984 NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
1984 1985
1985 goto out_unlock; 1986 goto out_unlock;
@@ -2030,7 +2031,7 @@ fail_putback:
2030 count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR); 2031 count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
2031 count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR); 2032 count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
2032 2033
2033 mod_zone_page_state(page_zone(page), 2034 mod_node_page_state(page_pgdat(page),
2034 NR_ISOLATED_ANON + page_lru, 2035 NR_ISOLATED_ANON + page_lru,
2035 -HPAGE_PMD_NR); 2036 -HPAGE_PMD_NR);
2036 return isolated; 2037 return isolated;
diff --git a/mm/mlock.c b/mm/mlock.c
index 997f63082ff5..14645be06e30 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -103,7 +103,7 @@ static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
103 if (PageLRU(page)) { 103 if (PageLRU(page)) {
104 struct lruvec *lruvec; 104 struct lruvec *lruvec;
105 105
106 lruvec = mem_cgroup_page_lruvec(page, page_zone(page)); 106 lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
107 if (getpage) 107 if (getpage)
108 get_page(page); 108 get_page(page);
109 ClearPageLRU(page); 109 ClearPageLRU(page);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d578d2a56b19..0ada2b2954b0 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -285,8 +285,8 @@ static unsigned long zone_dirtyable_memory(struct zone *zone)
285 */ 285 */
286 nr_pages -= min(nr_pages, zone->totalreserve_pages); 286 nr_pages -= min(nr_pages, zone->totalreserve_pages);
287 287
288 nr_pages += zone_page_state(zone, NR_INACTIVE_FILE); 288 nr_pages += node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE);
289 nr_pages += zone_page_state(zone, NR_ACTIVE_FILE); 289 nr_pages += node_page_state(zone->zone_pgdat, NR_ACTIVE_FILE);
290 290
291 return nr_pages; 291 return nr_pages;
292} 292}
@@ -348,8 +348,8 @@ static unsigned long global_dirtyable_memory(void)
348 */ 348 */
349 x -= min(x, totalreserve_pages); 349 x -= min(x, totalreserve_pages);
350 350
351 x += global_page_state(NR_INACTIVE_FILE); 351 x += global_node_page_state(NR_INACTIVE_FILE);
352 x += global_page_state(NR_ACTIVE_FILE); 352 x += global_node_page_state(NR_ACTIVE_FILE);
353 353
354 if (!vm_highmem_is_dirtyable) 354 if (!vm_highmem_is_dirtyable)
355 x -= highmem_dirtyable_memory(x); 355 x -= highmem_dirtyable_memory(x);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5760c626c309..35e2d0f9d44f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1078,9 +1078,9 @@ static void free_pcppages_bulk(struct zone *zone, int count,
1078 1078
1079 spin_lock(&zone->lock); 1079 spin_lock(&zone->lock);
1080 isolated_pageblocks = has_isolate_pageblock(zone); 1080 isolated_pageblocks = has_isolate_pageblock(zone);
1081 nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED); 1081 nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
1082 if (nr_scanned) 1082 if (nr_scanned)
1083 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); 1083 __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
1084 1084
1085 while (count) { 1085 while (count) {
1086 struct page *page; 1086 struct page *page;
@@ -1135,9 +1135,9 @@ static void free_one_page(struct zone *zone,
1135{ 1135{
1136 unsigned long nr_scanned; 1136 unsigned long nr_scanned;
1137 spin_lock(&zone->lock); 1137 spin_lock(&zone->lock);
1138 nr_scanned = zone_page_state(zone, NR_PAGES_SCANNED); 1138 nr_scanned = node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED);
1139 if (nr_scanned) 1139 if (nr_scanned)
1140 __mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned); 1140 __mod_node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED, -nr_scanned);
1141 1141
1142 if (unlikely(has_isolate_pageblock(zone) || 1142 if (unlikely(has_isolate_pageblock(zone) ||
1143 is_migrate_isolate(migratetype))) { 1143 is_migrate_isolate(migratetype))) {
@@ -4288,6 +4288,7 @@ void show_free_areas(unsigned int filter)
4288 unsigned long free_pcp = 0; 4288 unsigned long free_pcp = 0;
4289 int cpu; 4289 int cpu;
4290 struct zone *zone; 4290 struct zone *zone;
4291 pg_data_t *pgdat;
4291 4292
4292 for_each_populated_zone(zone) { 4293 for_each_populated_zone(zone) {
4293 if (skip_free_areas_node(filter, zone_to_nid(zone))) 4294 if (skip_free_areas_node(filter, zone_to_nid(zone)))
@@ -4306,13 +4307,13 @@ void show_free_areas(unsigned int filter)
4306 " anon_thp: %lu shmem_thp: %lu shmem_pmdmapped: %lu\n" 4307 " anon_thp: %lu shmem_thp: %lu shmem_pmdmapped: %lu\n"
4307#endif 4308#endif
4308 " free:%lu free_pcp:%lu free_cma:%lu\n", 4309 " free:%lu free_pcp:%lu free_cma:%lu\n",
4309 global_page_state(NR_ACTIVE_ANON), 4310 global_node_page_state(NR_ACTIVE_ANON),
4310 global_page_state(NR_INACTIVE_ANON), 4311 global_node_page_state(NR_INACTIVE_ANON),
4311 global_page_state(NR_ISOLATED_ANON), 4312 global_node_page_state(NR_ISOLATED_ANON),
4312 global_page_state(NR_ACTIVE_FILE), 4313 global_node_page_state(NR_ACTIVE_FILE),
4313 global_page_state(NR_INACTIVE_FILE), 4314 global_node_page_state(NR_INACTIVE_FILE),
4314 global_page_state(NR_ISOLATED_FILE), 4315 global_node_page_state(NR_ISOLATED_FILE),
4315 global_page_state(NR_UNEVICTABLE), 4316 global_node_page_state(NR_UNEVICTABLE),
4316 global_page_state(NR_FILE_DIRTY), 4317 global_page_state(NR_FILE_DIRTY),
4317 global_page_state(NR_WRITEBACK), 4318 global_page_state(NR_WRITEBACK),
4318 global_page_state(NR_UNSTABLE_NFS), 4319 global_page_state(NR_UNSTABLE_NFS),
@@ -4331,6 +4332,28 @@ void show_free_areas(unsigned int filter)
4331 free_pcp, 4332 free_pcp,
4332 global_page_state(NR_FREE_CMA_PAGES)); 4333 global_page_state(NR_FREE_CMA_PAGES));
4333 4334
4335 for_each_online_pgdat(pgdat) {
4336 printk("Node %d"
4337 " active_anon:%lukB"
4338 " inactive_anon:%lukB"
4339 " active_file:%lukB"
4340 " inactive_file:%lukB"
4341 " unevictable:%lukB"
4342 " isolated(anon):%lukB"
4343 " isolated(file):%lukB"
4344 " all_unreclaimable? %s"
4345 "\n",
4346 pgdat->node_id,
4347 K(node_page_state(pgdat, NR_ACTIVE_ANON)),
4348 K(node_page_state(pgdat, NR_INACTIVE_ANON)),
4349 K(node_page_state(pgdat, NR_ACTIVE_FILE)),
4350 K(node_page_state(pgdat, NR_INACTIVE_FILE)),
4351 K(node_page_state(pgdat, NR_UNEVICTABLE)),
4352 K(node_page_state(pgdat, NR_ISOLATED_ANON)),
4353 K(node_page_state(pgdat, NR_ISOLATED_FILE)),
4354 !pgdat_reclaimable(pgdat) ? "yes" : "no");
4355 }
4356
4334 for_each_populated_zone(zone) { 4357 for_each_populated_zone(zone) {
4335 int i; 4358 int i;
4336 4359
@@ -4347,13 +4370,6 @@ void show_free_areas(unsigned int filter)
4347 " min:%lukB" 4370 " min:%lukB"
4348 " low:%lukB" 4371 " low:%lukB"
4349 " high:%lukB" 4372 " high:%lukB"
4350 " active_anon:%lukB"
4351 " inactive_anon:%lukB"
4352 " active_file:%lukB"
4353 " inactive_file:%lukB"
4354 " unevictable:%lukB"
4355 " isolated(anon):%lukB"
4356 " isolated(file):%lukB"
4357 " present:%lukB" 4373 " present:%lukB"
4358 " managed:%lukB" 4374 " managed:%lukB"
4359 " mlocked:%lukB" 4375 " mlocked:%lukB"
@@ -4376,21 +4392,13 @@ void show_free_areas(unsigned int filter)
4376 " local_pcp:%ukB" 4392 " local_pcp:%ukB"
4377 " free_cma:%lukB" 4393 " free_cma:%lukB"
4378 " writeback_tmp:%lukB" 4394 " writeback_tmp:%lukB"
4379 " pages_scanned:%lu" 4395 " node_pages_scanned:%lu"
4380 " all_unreclaimable? %s"
4381 "\n", 4396 "\n",
4382 zone->name, 4397 zone->name,
4383 K(zone_page_state(zone, NR_FREE_PAGES)), 4398 K(zone_page_state(zone, NR_FREE_PAGES)),
4384 K(min_wmark_pages(zone)), 4399 K(min_wmark_pages(zone)),
4385 K(low_wmark_pages(zone)), 4400 K(low_wmark_pages(zone)),
4386 K(high_wmark_pages(zone)), 4401 K(high_wmark_pages(zone)),
4387 K(zone_page_state(zone, NR_ACTIVE_ANON)),
4388 K(zone_page_state(zone, NR_INACTIVE_ANON)),
4389 K(zone_page_state(zone, NR_ACTIVE_FILE)),
4390 K(zone_page_state(zone, NR_INACTIVE_FILE)),
4391 K(zone_page_state(zone, NR_UNEVICTABLE)),
4392 K(zone_page_state(zone, NR_ISOLATED_ANON)),
4393 K(zone_page_state(zone, NR_ISOLATED_FILE)),
4394 K(zone->present_pages), 4402 K(zone->present_pages),
4395 K(zone->managed_pages), 4403 K(zone->managed_pages),
4396 K(zone_page_state(zone, NR_MLOCK)), 4404 K(zone_page_state(zone, NR_MLOCK)),
@@ -4415,9 +4423,7 @@ void show_free_areas(unsigned int filter)
4415 K(this_cpu_read(zone->pageset->pcp.count)), 4423 K(this_cpu_read(zone->pageset->pcp.count)),
4416 K(zone_page_state(zone, NR_FREE_CMA_PAGES)), 4424 K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
4417 K(zone_page_state(zone, NR_WRITEBACK_TEMP)), 4425 K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
4418 K(zone_page_state(zone, NR_PAGES_SCANNED)), 4426 K(node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED)));
4419 (!zone_reclaimable(zone) ? "yes" : "no")
4420 );
4421 printk("lowmem_reserve[]:"); 4427 printk("lowmem_reserve[]:");
4422 for (i = 0; i < MAX_NR_ZONES; i++) 4428 for (i = 0; i < MAX_NR_ZONES; i++)
4423 printk(" %ld", zone->lowmem_reserve[i]); 4429 printk(" %ld", zone->lowmem_reserve[i]);
@@ -5967,7 +5973,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat)
5967 /* For bootup, initialized properly in watermark setup */ 5973 /* For bootup, initialized properly in watermark setup */
5968 mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages); 5974 mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages);
5969 5975
5970 lruvec_init(&zone->lruvec); 5976 lruvec_init(zone_lruvec(zone));
5971 if (!size) 5977 if (!size)
5972 continue; 5978 continue;
5973 5979
diff --git a/mm/swap.c b/mm/swap.c
index bf37e5cfae81..77af473635fe 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -63,7 +63,7 @@ static void __page_cache_release(struct page *page)
63 unsigned long flags; 63 unsigned long flags;
64 64
65 spin_lock_irqsave(zone_lru_lock(zone), flags); 65 spin_lock_irqsave(zone_lru_lock(zone), flags);
66 lruvec = mem_cgroup_page_lruvec(page, zone); 66 lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
67 VM_BUG_ON_PAGE(!PageLRU(page), page); 67 VM_BUG_ON_PAGE(!PageLRU(page), page);
68 __ClearPageLRU(page); 68 __ClearPageLRU(page);
69 del_page_from_lru_list(page, lruvec, page_off_lru(page)); 69 del_page_from_lru_list(page, lruvec, page_off_lru(page));
@@ -194,7 +194,7 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
194 spin_lock_irqsave(zone_lru_lock(zone), flags); 194 spin_lock_irqsave(zone_lru_lock(zone), flags);
195 } 195 }
196 196
197 lruvec = mem_cgroup_page_lruvec(page, zone); 197 lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
198 (*move_fn)(page, lruvec, arg); 198 (*move_fn)(page, lruvec, arg);
199 } 199 }
200 if (zone) 200 if (zone)
@@ -319,7 +319,7 @@ void activate_page(struct page *page)
319 319
320 page = compound_head(page); 320 page = compound_head(page);
321 spin_lock_irq(zone_lru_lock(zone)); 321 spin_lock_irq(zone_lru_lock(zone));
322 __activate_page(page, mem_cgroup_page_lruvec(page, zone), NULL); 322 __activate_page(page, mem_cgroup_page_lruvec(page, zone->zone_pgdat), NULL);
323 spin_unlock_irq(zone_lru_lock(zone)); 323 spin_unlock_irq(zone_lru_lock(zone));
324} 324}
325#endif 325#endif
@@ -445,16 +445,16 @@ void lru_cache_add(struct page *page)
445 */ 445 */
446void add_page_to_unevictable_list(struct page *page) 446void add_page_to_unevictable_list(struct page *page)
447{ 447{
448 struct zone *zone = page_zone(page); 448 struct pglist_data *pgdat = page_pgdat(page);
449 struct lruvec *lruvec; 449 struct lruvec *lruvec;
450 450
451 spin_lock_irq(zone_lru_lock(zone)); 451 spin_lock_irq(&pgdat->lru_lock);
452 lruvec = mem_cgroup_page_lruvec(page, zone); 452 lruvec = mem_cgroup_page_lruvec(page, pgdat);
453 ClearPageActive(page); 453 ClearPageActive(page);
454 SetPageUnevictable(page); 454 SetPageUnevictable(page);
455 SetPageLRU(page); 455 SetPageLRU(page);
456 add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE); 456 add_page_to_lru_list(page, lruvec, LRU_UNEVICTABLE);
457 spin_unlock_irq(zone_lru_lock(zone)); 457 spin_unlock_irq(&pgdat->lru_lock);
458} 458}
459 459
460/** 460/**
@@ -730,7 +730,7 @@ void release_pages(struct page **pages, int nr, bool cold)
730{ 730{
731 int i; 731 int i;
732 LIST_HEAD(pages_to_free); 732 LIST_HEAD(pages_to_free);
733 struct zone *zone = NULL; 733 struct pglist_data *locked_pgdat = NULL;
734 struct lruvec *lruvec; 734 struct lruvec *lruvec;
735 unsigned long uninitialized_var(flags); 735 unsigned long uninitialized_var(flags);
736 unsigned int uninitialized_var(lock_batch); 736 unsigned int uninitialized_var(lock_batch);
@@ -741,11 +741,11 @@ void release_pages(struct page **pages, int nr, bool cold)
741 /* 741 /*
742 * Make sure the IRQ-safe lock-holding time does not get 742 * Make sure the IRQ-safe lock-holding time does not get
743 * excessive with a continuous string of pages from the 743 * excessive with a continuous string of pages from the
744 * same zone. The lock is held only if zone != NULL. 744 * same pgdat. The lock is held only if pgdat != NULL.
745 */ 745 */
746 if (zone && ++lock_batch == SWAP_CLUSTER_MAX) { 746 if (locked_pgdat && ++lock_batch == SWAP_CLUSTER_MAX) {
747 spin_unlock_irqrestore(zone_lru_lock(zone), flags); 747 spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
748 zone = NULL; 748 locked_pgdat = NULL;
749 } 749 }
750 750
751 if (is_huge_zero_page(page)) { 751 if (is_huge_zero_page(page)) {
@@ -758,27 +758,27 @@ void release_pages(struct page **pages, int nr, bool cold)
758 continue; 758 continue;
759 759
760 if (PageCompound(page)) { 760 if (PageCompound(page)) {
761 if (zone) { 761 if (locked_pgdat) {
762 spin_unlock_irqrestore(zone_lru_lock(zone), flags); 762 spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
763 zone = NULL; 763 locked_pgdat = NULL;
764 } 764 }
765 __put_compound_page(page); 765 __put_compound_page(page);
766 continue; 766 continue;
767 } 767 }
768 768
769 if (PageLRU(page)) { 769 if (PageLRU(page)) {
770 struct zone *pagezone = page_zone(page); 770 struct pglist_data *pgdat = page_pgdat(page);
771 771
772 if (pagezone != zone) { 772 if (pgdat != locked_pgdat) {
773 if (zone) 773 if (locked_pgdat)
774 spin_unlock_irqrestore(zone_lru_lock(zone), 774 spin_unlock_irqrestore(&locked_pgdat->lru_lock,
775 flags); 775 flags);
776 lock_batch = 0; 776 lock_batch = 0;
777 zone = pagezone; 777 locked_pgdat = pgdat;
778 spin_lock_irqsave(zone_lru_lock(zone), flags); 778 spin_lock_irqsave(&locked_pgdat->lru_lock, flags);
779 } 779 }
780 780
781 lruvec = mem_cgroup_page_lruvec(page, zone); 781 lruvec = mem_cgroup_page_lruvec(page, locked_pgdat);
782 VM_BUG_ON_PAGE(!PageLRU(page), page); 782 VM_BUG_ON_PAGE(!PageLRU(page), page);
783 __ClearPageLRU(page); 783 __ClearPageLRU(page);
784 del_page_from_lru_list(page, lruvec, page_off_lru(page)); 784 del_page_from_lru_list(page, lruvec, page_off_lru(page));
@@ -789,8 +789,8 @@ void release_pages(struct page **pages, int nr, bool cold)
789 789
790 list_add(&page->lru, &pages_to_free); 790 list_add(&page->lru, &pages_to_free);
791 } 791 }
792 if (zone) 792 if (locked_pgdat)
793 spin_unlock_irqrestore(zone_lru_lock(zone), flags); 793 spin_unlock_irqrestore(&locked_pgdat->lru_lock, flags);
794 794
795 mem_cgroup_uncharge_list(&pages_to_free); 795 mem_cgroup_uncharge_list(&pages_to_free);
796 free_hot_cold_page_list(&pages_to_free, cold); 796 free_hot_cold_page_list(&pages_to_free, cold);
@@ -826,7 +826,7 @@ void lru_add_page_tail(struct page *page, struct page *page_tail,
826 VM_BUG_ON_PAGE(PageCompound(page_tail), page); 826 VM_BUG_ON_PAGE(PageCompound(page_tail), page);
827 VM_BUG_ON_PAGE(PageLRU(page_tail), page); 827 VM_BUG_ON_PAGE(PageLRU(page_tail), page);
828 VM_BUG_ON(NR_CPUS != 1 && 828 VM_BUG_ON(NR_CPUS != 1 &&
829 !spin_is_locked(zone_lru_lock(lruvec_zone(lruvec)))); 829 !spin_is_locked(&lruvec_pgdat(lruvec)->lru_lock));
830 830
831 if (!list) 831 if (!list)
832 SetPageLRU(page_tail); 832 SetPageLRU(page_tail);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e7ffcd259cc4..86a523a761c9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -191,26 +191,42 @@ static bool sane_reclaim(struct scan_control *sc)
191} 191}
192#endif 192#endif
193 193
194/*
195 * This misses isolated pages which are not accounted for to save counters.
196 * As the data only determines if reclaim or compaction continues, it is
197 * not expected that isolated pages will be a dominating factor.
198 */
194unsigned long zone_reclaimable_pages(struct zone *zone) 199unsigned long zone_reclaimable_pages(struct zone *zone)
195{ 200{
196 unsigned long nr; 201 unsigned long nr;
197 202
198 nr = zone_page_state_snapshot(zone, NR_ACTIVE_FILE) + 203 nr = zone_page_state_snapshot(zone, NR_ZONE_LRU_FILE);
199 zone_page_state_snapshot(zone, NR_INACTIVE_FILE) + 204 if (get_nr_swap_pages() > 0)
200 zone_page_state_snapshot(zone, NR_ISOLATED_FILE); 205 nr += zone_page_state_snapshot(zone, NR_ZONE_LRU_ANON);
206
207 return nr;
208}
209
210unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat)
211{
212 unsigned long nr;
213
214 nr = node_page_state_snapshot(pgdat, NR_ACTIVE_FILE) +
215 node_page_state_snapshot(pgdat, NR_INACTIVE_FILE) +
216 node_page_state_snapshot(pgdat, NR_ISOLATED_FILE);
201 217
202 if (get_nr_swap_pages() > 0) 218 if (get_nr_swap_pages() > 0)
203 nr += zone_page_state_snapshot(zone, NR_ACTIVE_ANON) + 219 nr += node_page_state_snapshot(pgdat, NR_ACTIVE_ANON) +
204 zone_page_state_snapshot(zone, NR_INACTIVE_ANON) + 220 node_page_state_snapshot(pgdat, NR_INACTIVE_ANON) +
205 zone_page_state_snapshot(zone, NR_ISOLATED_ANON); 221 node_page_state_snapshot(pgdat, NR_ISOLATED_ANON);
206 222
207 return nr; 223 return nr;
208} 224}
209 225
210bool zone_reclaimable(struct zone *zone) 226bool pgdat_reclaimable(struct pglist_data *pgdat)
211{ 227{
212 return zone_page_state_snapshot(zone, NR_PAGES_SCANNED) < 228 return node_page_state_snapshot(pgdat, NR_PAGES_SCANNED) <
213 zone_reclaimable_pages(zone) * 6; 229 pgdat_reclaimable_pages(pgdat) * 6;
214} 230}
215 231
216unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru) 232unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru)
@@ -218,7 +234,7 @@ unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru)
218 if (!mem_cgroup_disabled()) 234 if (!mem_cgroup_disabled())
219 return mem_cgroup_get_lru_size(lruvec, lru); 235 return mem_cgroup_get_lru_size(lruvec, lru);
220 236
221 return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru); 237 return node_page_state(lruvec_pgdat(lruvec), NR_LRU_BASE + lru);
222} 238}
223 239
224/* 240/*
@@ -877,7 +893,7 @@ static void page_check_dirty_writeback(struct page *page,
877 * shrink_page_list() returns the number of reclaimed pages 893 * shrink_page_list() returns the number of reclaimed pages
878 */ 894 */
879static unsigned long shrink_page_list(struct list_head *page_list, 895static unsigned long shrink_page_list(struct list_head *page_list,
880 struct zone *zone, 896 struct pglist_data *pgdat,
881 struct scan_control *sc, 897 struct scan_control *sc,
882 enum ttu_flags ttu_flags, 898 enum ttu_flags ttu_flags,
883 unsigned long *ret_nr_dirty, 899 unsigned long *ret_nr_dirty,
@@ -917,7 +933,6 @@ static unsigned long shrink_page_list(struct list_head *page_list,
917 goto keep; 933 goto keep;
918 934
919 VM_BUG_ON_PAGE(PageActive(page), page); 935 VM_BUG_ON_PAGE(PageActive(page), page);
920 VM_BUG_ON_PAGE(page_zone(page) != zone, page);
921 936
922 sc->nr_scanned++; 937 sc->nr_scanned++;
923 938
@@ -996,7 +1011,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
996 /* Case 1 above */ 1011 /* Case 1 above */
997 if (current_is_kswapd() && 1012 if (current_is_kswapd() &&
998 PageReclaim(page) && 1013 PageReclaim(page) &&
999 test_bit(ZONE_WRITEBACK, &zone->flags)) { 1014 test_bit(PGDAT_WRITEBACK, &pgdat->flags)) {
1000 nr_immediate++; 1015 nr_immediate++;
1001 goto keep_locked; 1016 goto keep_locked;
1002 1017
@@ -1092,7 +1107,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
1092 */ 1107 */
1093 if (page_is_file_cache(page) && 1108 if (page_is_file_cache(page) &&
1094 (!current_is_kswapd() || 1109 (!current_is_kswapd() ||
1095 !test_bit(ZONE_DIRTY, &zone->flags))) { 1110 !test_bit(PGDAT_DIRTY, &pgdat->flags))) {
1096 /* 1111 /*
1097 * Immediately reclaim when written back. 1112 * Immediately reclaim when written back.
1098 * Similar in principal to deactivate_page() 1113 * Similar in principal to deactivate_page()
@@ -1266,11 +1281,11 @@ unsigned long reclaim_clean_pages_from_list(struct zone *zone,
1266 } 1281 }
1267 } 1282 }
1268 1283
1269 ret = shrink_page_list(&clean_pages, zone, &sc, 1284 ret = shrink_page_list(&clean_pages, zone->zone_pgdat, &sc,
1270 TTU_UNMAP|TTU_IGNORE_ACCESS, 1285 TTU_UNMAP|TTU_IGNORE_ACCESS,
1271 &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true); 1286 &dummy1, &dummy2, &dummy3, &dummy4, &dummy5, true);
1272 list_splice(&clean_pages, page_list); 1287 list_splice(&clean_pages, page_list);
1273 mod_zone_page_state(zone, NR_ISOLATED_FILE, -ret); 1288 mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, -ret);
1274 return ret; 1289 return ret;
1275} 1290}
1276 1291
@@ -1375,7 +1390,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1375{ 1390{
1376 struct list_head *src = &lruvec->lists[lru]; 1391 struct list_head *src = &lruvec->lists[lru];
1377 unsigned long nr_taken = 0; 1392 unsigned long nr_taken = 0;
1378 unsigned long scan; 1393 unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 };
1394 unsigned long scan, nr_pages;
1379 1395
1380 for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan && 1396 for (scan = 0; scan < nr_to_scan && nr_taken < nr_to_scan &&
1381 !list_empty(src); scan++) { 1397 !list_empty(src); scan++) {
@@ -1388,7 +1404,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1388 1404
1389 switch (__isolate_lru_page(page, mode)) { 1405 switch (__isolate_lru_page(page, mode)) {
1390 case 0: 1406 case 0:
1391 nr_taken += hpage_nr_pages(page); 1407 nr_pages = hpage_nr_pages(page);
1408 nr_taken += nr_pages;
1409 nr_zone_taken[page_zonenum(page)] += nr_pages;
1392 list_move(&page->lru, dst); 1410 list_move(&page->lru, dst);
1393 break; 1411 break;
1394 1412
@@ -1405,6 +1423,13 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
1405 *nr_scanned = scan; 1423 *nr_scanned = scan;
1406 trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan, 1424 trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan,
1407 nr_taken, mode, is_file_lru(lru)); 1425 nr_taken, mode, is_file_lru(lru));
1426 for (scan = 0; scan < MAX_NR_ZONES; scan++) {
1427 nr_pages = nr_zone_taken[scan];
1428 if (!nr_pages)
1429 continue;
1430
1431 update_lru_size(lruvec, lru, scan, -nr_pages);
1432 }
1408 return nr_taken; 1433 return nr_taken;
1409} 1434}
1410 1435
@@ -1445,7 +1470,7 @@ int isolate_lru_page(struct page *page)
1445 struct lruvec *lruvec; 1470 struct lruvec *lruvec;
1446 1471
1447 spin_lock_irq(zone_lru_lock(zone)); 1472 spin_lock_irq(zone_lru_lock(zone));
1448 lruvec = mem_cgroup_page_lruvec(page, zone); 1473 lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
1449 if (PageLRU(page)) { 1474 if (PageLRU(page)) {
1450 int lru = page_lru(page); 1475 int lru = page_lru(page);
1451 get_page(page); 1476 get_page(page);
@@ -1465,7 +1490,7 @@ int isolate_lru_page(struct page *page)
1465 * the LRU list will go small and be scanned faster than necessary, leading to 1490 * the LRU list will go small and be scanned faster than necessary, leading to
1466 * unnecessary swapping, thrashing and OOM. 1491 * unnecessary swapping, thrashing and OOM.
1467 */ 1492 */
1468static int too_many_isolated(struct zone *zone, int file, 1493static int too_many_isolated(struct pglist_data *pgdat, int file,
1469 struct scan_control *sc) 1494 struct scan_control *sc)
1470{ 1495{
1471 unsigned long inactive, isolated; 1496 unsigned long inactive, isolated;
@@ -1477,11 +1502,11 @@ static int too_many_isolated(struct zone *zone, int file,
1477 return 0; 1502 return 0;
1478 1503
1479 if (file) { 1504 if (file) {
1480 inactive = zone_page_state(zone, NR_INACTIVE_FILE); 1505 inactive = node_page_state(pgdat, NR_INACTIVE_FILE);
1481 isolated = zone_page_state(zone, NR_ISOLATED_FILE); 1506 isolated = node_page_state(pgdat, NR_ISOLATED_FILE);
1482 } else { 1507 } else {
1483 inactive = zone_page_state(zone, NR_INACTIVE_ANON); 1508 inactive = node_page_state(pgdat, NR_INACTIVE_ANON);
1484 isolated = zone_page_state(zone, NR_ISOLATED_ANON); 1509 isolated = node_page_state(pgdat, NR_ISOLATED_ANON);
1485 } 1510 }
1486 1511
1487 /* 1512 /*
@@ -1499,7 +1524,7 @@ static noinline_for_stack void
1499putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) 1524putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
1500{ 1525{
1501 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; 1526 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
1502 struct zone *zone = lruvec_zone(lruvec); 1527 struct pglist_data *pgdat = lruvec_pgdat(lruvec);
1503 LIST_HEAD(pages_to_free); 1528 LIST_HEAD(pages_to_free);
1504 1529
1505 /* 1530 /*
@@ -1512,13 +1537,13 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
1512 VM_BUG_ON_PAGE(PageLRU(page), page); 1537 VM_BUG_ON_PAGE(PageLRU(page), page);
1513 list_del(&page->lru); 1538 list_del(&page->lru);
1514 if (unlikely(!page_evictable(page))) { 1539 if (unlikely(!page_evictable(page))) {
1515 spin_unlock_irq(zone_lru_lock(zone)); 1540 spin_unlock_irq(&pgdat->lru_lock);
1516 putback_lru_page(page); 1541 putback_lru_page(page);
1517 spin_lock_irq(zone_lru_lock(zone)); 1542 spin_lock_irq(&pgdat->lru_lock);
1518 continue; 1543 continue;
1519 } 1544 }
1520 1545
1521 lruvec = mem_cgroup_page_lruvec(page, zone); 1546 lruvec = mem_cgroup_page_lruvec(page, pgdat);
1522 1547
1523 SetPageLRU(page); 1548 SetPageLRU(page);
1524 lru = page_lru(page); 1549 lru = page_lru(page);
@@ -1535,10 +1560,10 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
1535 del_page_from_lru_list(page, lruvec, lru); 1560 del_page_from_lru_list(page, lruvec, lru);
1536 1561
1537 if (unlikely(PageCompound(page))) { 1562 if (unlikely(PageCompound(page))) {
1538 spin_unlock_irq(zone_lru_lock(zone)); 1563 spin_unlock_irq(&pgdat->lru_lock);
1539 mem_cgroup_uncharge(page); 1564 mem_cgroup_uncharge(page);
1540 (*get_compound_page_dtor(page))(page); 1565 (*get_compound_page_dtor(page))(page);
1541 spin_lock_irq(zone_lru_lock(zone)); 1566 spin_lock_irq(&pgdat->lru_lock);
1542 } else 1567 } else
1543 list_add(&page->lru, &pages_to_free); 1568 list_add(&page->lru, &pages_to_free);
1544 } 1569 }
@@ -1582,10 +1607,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
1582 unsigned long nr_immediate = 0; 1607 unsigned long nr_immediate = 0;
1583 isolate_mode_t isolate_mode = 0; 1608 isolate_mode_t isolate_mode = 0;
1584 int file = is_file_lru(lru); 1609 int file = is_file_lru(lru);
1585 struct zone *zone = lruvec_zone(lruvec); 1610 struct pglist_data *pgdat = lruvec_pgdat(lruvec);
1586 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; 1611 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
1587 1612
1588 while (unlikely(too_many_isolated(zone, file, sc))) { 1613 while (unlikely(too_many_isolated(pgdat, file, sc))) {
1589 congestion_wait(BLK_RW_ASYNC, HZ/10); 1614 congestion_wait(BLK_RW_ASYNC, HZ/10);
1590 1615
1591 /* We are about to die and free our memory. Return now. */ 1616 /* We are about to die and free our memory. Return now. */
@@ -1600,48 +1625,45 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
1600 if (!sc->may_writepage) 1625 if (!sc->may_writepage)
1601 isolate_mode |= ISOLATE_CLEAN; 1626 isolate_mode |= ISOLATE_CLEAN;
1602 1627
1603 spin_lock_irq(zone_lru_lock(zone)); 1628 spin_lock_irq(&pgdat->lru_lock);
1604 1629
1605 nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list, 1630 nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list,
1606 &nr_scanned, sc, isolate_mode, lru); 1631 &nr_scanned, sc, isolate_mode, lru);
1607 1632
1608 update_lru_size(lruvec, lru, -nr_taken); 1633 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
1609 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
1610 reclaim_stat->recent_scanned[file] += nr_taken; 1634 reclaim_stat->recent_scanned[file] += nr_taken;
1611 1635
1612 if (global_reclaim(sc)) { 1636 if (global_reclaim(sc)) {
1613 __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned); 1637 __mod_node_page_state(pgdat, NR_PAGES_SCANNED, nr_scanned);
1614 if (current_is_kswapd()) 1638 if (current_is_kswapd())
1615 __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned); 1639 __count_vm_events(PGSCAN_KSWAPD, nr_scanned);
1616 else 1640 else
1617 __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned); 1641 __count_vm_events(PGSCAN_DIRECT, nr_scanned);
1618 } 1642 }
1619 spin_unlock_irq(zone_lru_lock(zone)); 1643 spin_unlock_irq(&pgdat->lru_lock);
1620 1644
1621 if (nr_taken == 0) 1645 if (nr_taken == 0)
1622 return 0; 1646 return 0;
1623 1647
1624 nr_reclaimed = shrink_page_list(&page_list, zone, sc, TTU_UNMAP, 1648 nr_reclaimed = shrink_page_list(&page_list, pgdat, sc, TTU_UNMAP,
1625 &nr_dirty, &nr_unqueued_dirty, &nr_congested, 1649 &nr_dirty, &nr_unqueued_dirty, &nr_congested,
1626 &nr_writeback, &nr_immediate, 1650 &nr_writeback, &nr_immediate,
1627 false); 1651 false);
1628 1652
1629 spin_lock_irq(zone_lru_lock(zone)); 1653 spin_lock_irq(&pgdat->lru_lock);
1630 1654
1631 if (global_reclaim(sc)) { 1655 if (global_reclaim(sc)) {
1632 if (current_is_kswapd()) 1656 if (current_is_kswapd())
1633 __count_zone_vm_events(PGSTEAL_KSWAPD, zone, 1657 __count_vm_events(PGSTEAL_KSWAPD, nr_reclaimed);
1634 nr_reclaimed);
1635 else 1658 else
1636 __count_zone_vm_events(PGSTEAL_DIRECT, zone, 1659 __count_vm_events(PGSTEAL_DIRECT, nr_reclaimed);
1637 nr_reclaimed);
1638 } 1660 }
1639 1661
1640 putback_inactive_pages(lruvec, &page_list); 1662 putback_inactive_pages(lruvec, &page_list);
1641 1663
1642 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); 1664 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
1643 1665
1644 spin_unlock_irq(zone_lru_lock(zone)); 1666 spin_unlock_irq(&pgdat->lru_lock);
1645 1667
1646 mem_cgroup_uncharge_list(&page_list); 1668 mem_cgroup_uncharge_list(&page_list);
1647 free_hot_cold_page_list(&page_list, true); 1669 free_hot_cold_page_list(&page_list, true);
@@ -1661,7 +1683,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
1661 * are encountered in the nr_immediate check below. 1683 * are encountered in the nr_immediate check below.
1662 */ 1684 */
1663 if (nr_writeback && nr_writeback == nr_taken) 1685 if (nr_writeback && nr_writeback == nr_taken)
1664 set_bit(ZONE_WRITEBACK, &zone->flags); 1686 set_bit(PGDAT_WRITEBACK, &pgdat->flags);
1665 1687
1666 /* 1688 /*
1667 * Legacy memcg will stall in page writeback so avoid forcibly 1689 * Legacy memcg will stall in page writeback so avoid forcibly
@@ -1673,16 +1695,16 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
1673 * backed by a congested BDI and wait_iff_congested will stall. 1695 * backed by a congested BDI and wait_iff_congested will stall.
1674 */ 1696 */
1675 if (nr_dirty && nr_dirty == nr_congested) 1697 if (nr_dirty && nr_dirty == nr_congested)
1676 set_bit(ZONE_CONGESTED, &zone->flags); 1698 set_bit(PGDAT_CONGESTED, &pgdat->flags);
1677 1699
1678 /* 1700 /*
1679 * If dirty pages are scanned that are not queued for IO, it 1701 * If dirty pages are scanned that are not queued for IO, it
1680 * implies that flushers are not keeping up. In this case, flag 1702 * implies that flushers are not keeping up. In this case, flag
1681 * the zone ZONE_DIRTY and kswapd will start writing pages from 1703 * the pgdat PGDAT_DIRTY and kswapd will start writing pages from
1682 * reclaim context. 1704 * reclaim context.
1683 */ 1705 */
1684 if (nr_unqueued_dirty == nr_taken) 1706 if (nr_unqueued_dirty == nr_taken)
1685 set_bit(ZONE_DIRTY, &zone->flags); 1707 set_bit(PGDAT_DIRTY, &pgdat->flags);
1686 1708
1687 /* 1709 /*
1688 * If kswapd scans pages marked marked for immediate 1710 * If kswapd scans pages marked marked for immediate
@@ -1701,9 +1723,10 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
1701 */ 1723 */
1702 if (!sc->hibernation_mode && !current_is_kswapd() && 1724 if (!sc->hibernation_mode && !current_is_kswapd() &&
1703 current_may_throttle()) 1725 current_may_throttle())
1704 wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); 1726 wait_iff_congested(pgdat, BLK_RW_ASYNC, HZ/10);
1705 1727
1706 trace_mm_vmscan_lru_shrink_inactive(zone, nr_scanned, nr_reclaimed, 1728 trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
1729 nr_scanned, nr_reclaimed,
1707 sc->priority, file); 1730 sc->priority, file);
1708 return nr_reclaimed; 1731 return nr_reclaimed;
1709} 1732}
@@ -1731,20 +1754,20 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
1731 struct list_head *pages_to_free, 1754 struct list_head *pages_to_free,
1732 enum lru_list lru) 1755 enum lru_list lru)
1733{ 1756{
1734 struct zone *zone = lruvec_zone(lruvec); 1757 struct pglist_data *pgdat = lruvec_pgdat(lruvec);
1735 unsigned long pgmoved = 0; 1758 unsigned long pgmoved = 0;
1736 struct page *page; 1759 struct page *page;
1737 int nr_pages; 1760 int nr_pages;
1738 1761
1739 while (!list_empty(list)) { 1762 while (!list_empty(list)) {
1740 page = lru_to_page(list); 1763 page = lru_to_page(list);
1741 lruvec = mem_cgroup_page_lruvec(page, zone); 1764 lruvec = mem_cgroup_page_lruvec(page, pgdat);
1742 1765
1743 VM_BUG_ON_PAGE(PageLRU(page), page); 1766 VM_BUG_ON_PAGE(PageLRU(page), page);
1744 SetPageLRU(page); 1767 SetPageLRU(page);
1745 1768
1746 nr_pages = hpage_nr_pages(page); 1769 nr_pages = hpage_nr_pages(page);
1747 update_lru_size(lruvec, lru, nr_pages); 1770 update_lru_size(lruvec, lru, page_zonenum(page), nr_pages);
1748 list_move(&page->lru, &lruvec->lists[lru]); 1771 list_move(&page->lru, &lruvec->lists[lru]);
1749 pgmoved += nr_pages; 1772 pgmoved += nr_pages;
1750 1773
@@ -1754,10 +1777,10 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
1754 del_page_from_lru_list(page, lruvec, lru); 1777 del_page_from_lru_list(page, lruvec, lru);
1755 1778
1756 if (unlikely(PageCompound(page))) { 1779 if (unlikely(PageCompound(page))) {
1757 spin_unlock_irq(zone_lru_lock(zone)); 1780 spin_unlock_irq(&pgdat->lru_lock);
1758 mem_cgroup_uncharge(page); 1781 mem_cgroup_uncharge(page);
1759 (*get_compound_page_dtor(page))(page); 1782 (*get_compound_page_dtor(page))(page);
1760 spin_lock_irq(zone_lru_lock(zone)); 1783 spin_lock_irq(&pgdat->lru_lock);
1761 } else 1784 } else
1762 list_add(&page->lru, pages_to_free); 1785 list_add(&page->lru, pages_to_free);
1763 } 1786 }
@@ -1783,7 +1806,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
1783 unsigned long nr_rotated = 0; 1806 unsigned long nr_rotated = 0;
1784 isolate_mode_t isolate_mode = 0; 1807 isolate_mode_t isolate_mode = 0;
1785 int file = is_file_lru(lru); 1808 int file = is_file_lru(lru);
1786 struct zone *zone = lruvec_zone(lruvec); 1809 struct pglist_data *pgdat = lruvec_pgdat(lruvec);
1787 1810
1788 lru_add_drain(); 1811 lru_add_drain();
1789 1812
@@ -1792,20 +1815,19 @@ static void shrink_active_list(unsigned long nr_to_scan,
1792 if (!sc->may_writepage) 1815 if (!sc->may_writepage)
1793 isolate_mode |= ISOLATE_CLEAN; 1816 isolate_mode |= ISOLATE_CLEAN;
1794 1817
1795 spin_lock_irq(zone_lru_lock(zone)); 1818 spin_lock_irq(&pgdat->lru_lock);
1796 1819
1797 nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, 1820 nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
1798 &nr_scanned, sc, isolate_mode, lru); 1821 &nr_scanned, sc, isolate_mode, lru);
1799 1822
1800 update_lru_size(lruvec, lru, -nr_taken); 1823 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
1801 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
1802 reclaim_stat->recent_scanned[file] += nr_taken; 1824 reclaim_stat->recent_scanned[file] += nr_taken;
1803 1825
1804 if (global_reclaim(sc)) 1826 if (global_reclaim(sc))
1805 __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned); 1827 __mod_node_page_state(pgdat, NR_PAGES_SCANNED, nr_scanned);
1806 __count_zone_vm_events(PGREFILL, zone, nr_scanned); 1828 __count_vm_events(PGREFILL, nr_scanned);
1807 1829
1808 spin_unlock_irq(zone_lru_lock(zone)); 1830 spin_unlock_irq(&pgdat->lru_lock);
1809 1831
1810 while (!list_empty(&l_hold)) { 1832 while (!list_empty(&l_hold)) {
1811 cond_resched(); 1833 cond_resched();
@@ -1850,7 +1872,7 @@ static void shrink_active_list(unsigned long nr_to_scan,
1850 /* 1872 /*
1851 * Move pages back to the lru list. 1873 * Move pages back to the lru list.
1852 */ 1874 */
1853 spin_lock_irq(zone_lru_lock(zone)); 1875 spin_lock_irq(&pgdat->lru_lock);
1854 /* 1876 /*
1855 * Count referenced pages from currently used mappings as rotated, 1877 * Count referenced pages from currently used mappings as rotated,
1856 * even though only some of them are actually re-activated. This 1878 * even though only some of them are actually re-activated. This
@@ -1861,8 +1883,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
1861 1883
1862 move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); 1884 move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru);
1863 move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); 1885 move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE);
1864 __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); 1886 __mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
1865 spin_unlock_irq(zone_lru_lock(zone)); 1887 spin_unlock_irq(&pgdat->lru_lock);
1866 1888
1867 mem_cgroup_uncharge_list(&l_hold); 1889 mem_cgroup_uncharge_list(&l_hold);
1868 free_hot_cold_page_list(&l_hold, true); 1890 free_hot_cold_page_list(&l_hold, true);
@@ -1956,7 +1978,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
1956 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; 1978 struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
1957 u64 fraction[2]; 1979 u64 fraction[2];
1958 u64 denominator = 0; /* gcc */ 1980 u64 denominator = 0; /* gcc */
1959 struct zone *zone = lruvec_zone(lruvec); 1981 struct pglist_data *pgdat = lruvec_pgdat(lruvec);
1960 unsigned long anon_prio, file_prio; 1982 unsigned long anon_prio, file_prio;
1961 enum scan_balance scan_balance; 1983 enum scan_balance scan_balance;
1962 unsigned long anon, file; 1984 unsigned long anon, file;
@@ -1977,7 +1999,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
1977 * well. 1999 * well.
1978 */ 2000 */
1979 if (current_is_kswapd()) { 2001 if (current_is_kswapd()) {
1980 if (!zone_reclaimable(zone)) 2002 if (!pgdat_reclaimable(pgdat))
1981 force_scan = true; 2003 force_scan = true;
1982 if (!mem_cgroup_online(memcg)) 2004 if (!mem_cgroup_online(memcg))
1983 force_scan = true; 2005 force_scan = true;
@@ -2023,14 +2045,24 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
2023 * anon pages. Try to detect this based on file LRU size. 2045 * anon pages. Try to detect this based on file LRU size.
2024 */ 2046 */
2025 if (global_reclaim(sc)) { 2047 if (global_reclaim(sc)) {
2026 unsigned long zonefile; 2048 unsigned long pgdatfile;
2027 unsigned long zonefree; 2049 unsigned long pgdatfree;
2050 int z;
2051 unsigned long total_high_wmark = 0;
2028 2052
2029 zonefree = zone_page_state(zone, NR_FREE_PAGES); 2053 pgdatfree = sum_zone_node_page_state(pgdat->node_id, NR_FREE_PAGES);
2030 zonefile = zone_page_state(zone, NR_ACTIVE_FILE) + 2054 pgdatfile = node_page_state(pgdat, NR_ACTIVE_FILE) +
2031 zone_page_state(zone, NR_INACTIVE_FILE); 2055 node_page_state(pgdat, NR_INACTIVE_FILE);
2056
2057 for (z = 0; z < MAX_NR_ZONES; z++) {
2058 struct zone *zone = &pgdat->node_zones[z];
2059 if (!populated_zone(zone))
2060 continue;
2061
2062 total_high_wmark += high_wmark_pages(zone);
2063 }
2032 2064
2033 if (unlikely(zonefile + zonefree <= high_wmark_pages(zone))) { 2065 if (unlikely(pgdatfile + pgdatfree <= total_high_wmark)) {
2034 scan_balance = SCAN_ANON; 2066 scan_balance = SCAN_ANON;
2035 goto out; 2067 goto out;
2036 } 2068 }
@@ -2077,7 +2109,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
2077 file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) + 2109 file = lruvec_lru_size(lruvec, LRU_ACTIVE_FILE) +
2078 lruvec_lru_size(lruvec, LRU_INACTIVE_FILE); 2110 lruvec_lru_size(lruvec, LRU_INACTIVE_FILE);
2079 2111
2080 spin_lock_irq(zone_lru_lock(zone)); 2112 spin_lock_irq(&pgdat->lru_lock);
2081 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { 2113 if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) {
2082 reclaim_stat->recent_scanned[0] /= 2; 2114 reclaim_stat->recent_scanned[0] /= 2;
2083 reclaim_stat->recent_rotated[0] /= 2; 2115 reclaim_stat->recent_rotated[0] /= 2;
@@ -2098,7 +2130,7 @@ static void get_scan_count(struct lruvec *lruvec, struct mem_cgroup *memcg,
2098 2130
2099 fp = file_prio * (reclaim_stat->recent_scanned[1] + 1); 2131 fp = file_prio * (reclaim_stat->recent_scanned[1] + 1);
2100 fp /= reclaim_stat->recent_rotated[1] + 1; 2132 fp /= reclaim_stat->recent_rotated[1] + 1;
2101 spin_unlock_irq(zone_lru_lock(zone)); 2133 spin_unlock_irq(&pgdat->lru_lock);
2102 2134
2103 fraction[0] = ap; 2135 fraction[0] = ap;
2104 fraction[1] = fp; 2136 fraction[1] = fp;
@@ -2352,9 +2384,9 @@ static inline bool should_continue_reclaim(struct zone *zone,
2352 * inactive lists are large enough, continue reclaiming 2384 * inactive lists are large enough, continue reclaiming
2353 */ 2385 */
2354 pages_for_compaction = (2UL << sc->order); 2386 pages_for_compaction = (2UL << sc->order);
2355 inactive_lru_pages = zone_page_state(zone, NR_INACTIVE_FILE); 2387 inactive_lru_pages = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE);
2356 if (get_nr_swap_pages() > 0) 2388 if (get_nr_swap_pages() > 0)
2357 inactive_lru_pages += zone_page_state(zone, NR_INACTIVE_ANON); 2389 inactive_lru_pages += node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON);
2358 if (sc->nr_reclaimed < pages_for_compaction && 2390 if (sc->nr_reclaimed < pages_for_compaction &&
2359 inactive_lru_pages > pages_for_compaction) 2391 inactive_lru_pages > pages_for_compaction)
2360 return true; 2392 return true;
@@ -2554,7 +2586,7 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
2554 continue; 2586 continue;
2555 2587
2556 if (sc->priority != DEF_PRIORITY && 2588 if (sc->priority != DEF_PRIORITY &&
2557 !zone_reclaimable(zone)) 2589 !pgdat_reclaimable(zone->zone_pgdat))
2558 continue; /* Let kswapd poll it */ 2590 continue; /* Let kswapd poll it */
2559 2591
2560 /* 2592 /*
@@ -2692,7 +2724,7 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
2692 for (i = 0; i <= ZONE_NORMAL; i++) { 2724 for (i = 0; i <= ZONE_NORMAL; i++) {
2693 zone = &pgdat->node_zones[i]; 2725 zone = &pgdat->node_zones[i];
2694 if (!populated_zone(zone) || 2726 if (!populated_zone(zone) ||
2695 zone_reclaimable_pages(zone) == 0) 2727 pgdat_reclaimable_pages(pgdat) == 0)
2696 continue; 2728 continue;
2697 2729
2698 pfmemalloc_reserve += min_wmark_pages(zone); 2730 pfmemalloc_reserve += min_wmark_pages(zone);
@@ -3000,7 +3032,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
3000 * DEF_PRIORITY. Effectively, it considers them balanced so 3032 * DEF_PRIORITY. Effectively, it considers them balanced so
3001 * they must be considered balanced here as well! 3033 * they must be considered balanced here as well!
3002 */ 3034 */
3003 if (!zone_reclaimable(zone)) { 3035 if (!pgdat_reclaimable(zone->zone_pgdat)) {
3004 balanced_pages += zone->managed_pages; 3036 balanced_pages += zone->managed_pages;
3005 continue; 3037 continue;
3006 } 3038 }
@@ -3063,6 +3095,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
3063{ 3095{
3064 unsigned long balance_gap; 3096 unsigned long balance_gap;
3065 bool lowmem_pressure; 3097 bool lowmem_pressure;
3098 struct pglist_data *pgdat = zone->zone_pgdat;
3066 3099
3067 /* Reclaim above the high watermark. */ 3100 /* Reclaim above the high watermark. */
3068 sc->nr_to_reclaim = max(SWAP_CLUSTER_MAX, high_wmark_pages(zone)); 3101 sc->nr_to_reclaim = max(SWAP_CLUSTER_MAX, high_wmark_pages(zone));
@@ -3087,7 +3120,8 @@ static bool kswapd_shrink_zone(struct zone *zone,
3087 3120
3088 shrink_zone(zone, sc, zone_idx(zone) == classzone_idx); 3121 shrink_zone(zone, sc, zone_idx(zone) == classzone_idx);
3089 3122
3090 clear_bit(ZONE_WRITEBACK, &zone->flags); 3123 /* TODO: ANOMALY */
3124 clear_bit(PGDAT_WRITEBACK, &pgdat->flags);
3091 3125
3092 /* 3126 /*
3093 * If a zone reaches its high watermark, consider it to be no longer 3127 * If a zone reaches its high watermark, consider it to be no longer
@@ -3095,10 +3129,10 @@ static bool kswapd_shrink_zone(struct zone *zone,
3095 * BDIs but as pressure is relieved, speculatively avoid congestion 3129 * BDIs but as pressure is relieved, speculatively avoid congestion
3096 * waits. 3130 * waits.
3097 */ 3131 */
3098 if (zone_reclaimable(zone) && 3132 if (pgdat_reclaimable(zone->zone_pgdat) &&
3099 zone_balanced(zone, sc->order, false, 0, classzone_idx)) { 3133 zone_balanced(zone, sc->order, false, 0, classzone_idx)) {
3100 clear_bit(ZONE_CONGESTED, &zone->flags); 3134 clear_bit(PGDAT_CONGESTED, &pgdat->flags);
3101 clear_bit(ZONE_DIRTY, &zone->flags); 3135 clear_bit(PGDAT_DIRTY, &pgdat->flags);
3102 } 3136 }
3103 3137
3104 return sc->nr_scanned >= sc->nr_to_reclaim; 3138 return sc->nr_scanned >= sc->nr_to_reclaim;
@@ -3157,7 +3191,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
3157 continue; 3191 continue;
3158 3192
3159 if (sc.priority != DEF_PRIORITY && 3193 if (sc.priority != DEF_PRIORITY &&
3160 !zone_reclaimable(zone)) 3194 !pgdat_reclaimable(zone->zone_pgdat))
3161 continue; 3195 continue;
3162 3196
3163 /* 3197 /*
@@ -3184,9 +3218,11 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
3184 /* 3218 /*
3185 * If balanced, clear the dirty and congested 3219 * If balanced, clear the dirty and congested
3186 * flags 3220 * flags
3221 *
3222 * TODO: ANOMALY
3187 */ 3223 */
3188 clear_bit(ZONE_CONGESTED, &zone->flags); 3224 clear_bit(PGDAT_CONGESTED, &zone->zone_pgdat->flags);
3189 clear_bit(ZONE_DIRTY, &zone->flags); 3225 clear_bit(PGDAT_DIRTY, &zone->zone_pgdat->flags);
3190 } 3226 }
3191 } 3227 }
3192 3228
@@ -3216,7 +3252,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
3216 continue; 3252 continue;
3217 3253
3218 if (sc.priority != DEF_PRIORITY && 3254 if (sc.priority != DEF_PRIORITY &&
3219 !zone_reclaimable(zone)) 3255 !pgdat_reclaimable(zone->zone_pgdat))
3220 continue; 3256 continue;
3221 3257
3222 sc.nr_scanned = 0; 3258 sc.nr_scanned = 0;
@@ -3612,8 +3648,8 @@ int sysctl_min_slab_ratio = 5;
3612static inline unsigned long zone_unmapped_file_pages(struct zone *zone) 3648static inline unsigned long zone_unmapped_file_pages(struct zone *zone)
3613{ 3649{
3614 unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED); 3650 unsigned long file_mapped = zone_page_state(zone, NR_FILE_MAPPED);
3615 unsigned long file_lru = zone_page_state(zone, NR_INACTIVE_FILE) + 3651 unsigned long file_lru = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE) +
3616 zone_page_state(zone, NR_ACTIVE_FILE); 3652 node_page_state(zone->zone_pgdat, NR_ACTIVE_FILE);
3617 3653
3618 /* 3654 /*
3619 * It's possible for there to be more file mapped pages than 3655 * It's possible for there to be more file mapped pages than
@@ -3716,7 +3752,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
3716 zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages) 3752 zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
3717 return ZONE_RECLAIM_FULL; 3753 return ZONE_RECLAIM_FULL;
3718 3754
3719 if (!zone_reclaimable(zone)) 3755 if (!pgdat_reclaimable(zone->zone_pgdat))
3720 return ZONE_RECLAIM_FULL; 3756 return ZONE_RECLAIM_FULL;
3721 3757
3722 /* 3758 /*
@@ -3795,7 +3831,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
3795 zone = pagezone; 3831 zone = pagezone;
3796 spin_lock_irq(zone_lru_lock(zone)); 3832 spin_lock_irq(zone_lru_lock(zone));
3797 } 3833 }
3798 lruvec = mem_cgroup_page_lruvec(page, zone); 3834 lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
3799 3835
3800 if (!PageLRU(page) || !PageUnevictable(page)) 3836 if (!PageLRU(page) || !PageUnevictable(page))
3801 continue; 3837 continue;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 3345d396a99b..de0c17076270 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -936,11 +936,8 @@ const char * const vmstat_text[] = {
936 /* enum zone_stat_item countes */ 936 /* enum zone_stat_item countes */
937 "nr_free_pages", 937 "nr_free_pages",
938 "nr_alloc_batch", 938 "nr_alloc_batch",
939 "nr_inactive_anon", 939 "nr_zone_anon_lru",
940 "nr_active_anon", 940 "nr_zone_file_lru",
941 "nr_inactive_file",
942 "nr_active_file",
943 "nr_unevictable",
944 "nr_mlock", 941 "nr_mlock",
945 "nr_anon_pages", 942 "nr_anon_pages",
946 "nr_mapped", 943 "nr_mapped",
@@ -956,12 +953,9 @@ const char * const vmstat_text[] = {
956 "nr_vmscan_write", 953 "nr_vmscan_write",
957 "nr_vmscan_immediate_reclaim", 954 "nr_vmscan_immediate_reclaim",
958 "nr_writeback_temp", 955 "nr_writeback_temp",
959 "nr_isolated_anon",
960 "nr_isolated_file",
961 "nr_shmem", 956 "nr_shmem",
962 "nr_dirtied", 957 "nr_dirtied",
963 "nr_written", 958 "nr_written",
964 "nr_pages_scanned",
965#if IS_ENABLED(CONFIG_ZSMALLOC) 959#if IS_ENABLED(CONFIG_ZSMALLOC)
966 "nr_zspages", 960 "nr_zspages",
967#endif 961#endif
@@ -981,6 +975,16 @@ const char * const vmstat_text[] = {
981 "nr_shmem_pmdmapped", 975 "nr_shmem_pmdmapped",
982 "nr_free_cma", 976 "nr_free_cma",
983 977
978 /* Node-based counters */
979 "nr_inactive_anon",
980 "nr_active_anon",
981 "nr_inactive_file",
982 "nr_active_file",
983 "nr_unevictable",
984 "nr_isolated_anon",
985 "nr_isolated_file",
986 "nr_pages_scanned",
987
984 /* enum writeback_stat_item counters */ 988 /* enum writeback_stat_item counters */
985 "nr_dirty_threshold", 989 "nr_dirty_threshold",
986 "nr_dirty_background_threshold", 990 "nr_dirty_background_threshold",
@@ -1002,11 +1006,11 @@ const char * const vmstat_text[] = {
1002 "pgmajfault", 1006 "pgmajfault",
1003 "pglazyfreed", 1007 "pglazyfreed",
1004 1008
1005 TEXTS_FOR_ZONES("pgrefill") 1009 "pgrefill",
1006 TEXTS_FOR_ZONES("pgsteal_kswapd") 1010 "pgsteal_kswapd",
1007 TEXTS_FOR_ZONES("pgsteal_direct") 1011 "pgsteal_direct",
1008 TEXTS_FOR_ZONES("pgscan_kswapd") 1012 "pgscan_kswapd",
1009 TEXTS_FOR_ZONES("pgscan_direct") 1013 "pgscan_direct",
1010 "pgscan_direct_throttle", 1014 "pgscan_direct_throttle",
1011 1015
1012#ifdef CONFIG_NUMA 1016#ifdef CONFIG_NUMA
@@ -1434,7 +1438,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1434 "\n min %lu" 1438 "\n min %lu"
1435 "\n low %lu" 1439 "\n low %lu"
1436 "\n high %lu" 1440 "\n high %lu"
1437 "\n scanned %lu" 1441 "\n node_scanned %lu"
1438 "\n spanned %lu" 1442 "\n spanned %lu"
1439 "\n present %lu" 1443 "\n present %lu"
1440 "\n managed %lu", 1444 "\n managed %lu",
@@ -1442,13 +1446,13 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1442 min_wmark_pages(zone), 1446 min_wmark_pages(zone),
1443 low_wmark_pages(zone), 1447 low_wmark_pages(zone),
1444 high_wmark_pages(zone), 1448 high_wmark_pages(zone),
1445 zone_page_state(zone, NR_PAGES_SCANNED), 1449 node_page_state(zone->zone_pgdat, NR_PAGES_SCANNED),
1446 zone->spanned_pages, 1450 zone->spanned_pages,
1447 zone->present_pages, 1451 zone->present_pages,
1448 zone->managed_pages); 1452 zone->managed_pages);
1449 1453
1450 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) 1454 for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
1451 seq_printf(m, "\n %-12s %lu", vmstat_text[i], 1455 seq_printf(m, "\n %-12s %lu", vmstat_text[i],
1452 zone_page_state(zone, i)); 1456 zone_page_state(zone, i));
1453 1457
1454 seq_printf(m, 1458 seq_printf(m,
@@ -1478,12 +1482,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
1478#endif 1482#endif
1479 } 1483 }
1480 seq_printf(m, 1484 seq_printf(m,
1481 "\n all_unreclaimable: %u" 1485 "\n node_unreclaimable: %u"
1482 "\n start_pfn: %lu" 1486 "\n start_pfn: %lu"
1483 "\n inactive_ratio: %u", 1487 "\n node_inactive_ratio: %u",
1484 !zone_reclaimable(zone), 1488 !pgdat_reclaimable(zone->zone_pgdat),
1485 zone->zone_start_pfn, 1489 zone->zone_start_pfn,
1486 zone->inactive_ratio); 1490 zone->zone_pgdat->inactive_ratio);
1487 seq_putc(m, '\n'); 1491 seq_putc(m, '\n');
1488} 1492}
1489 1493
@@ -1574,7 +1578,6 @@ static int vmstat_show(struct seq_file *m, void *arg)
1574{ 1578{
1575 unsigned long *l = arg; 1579 unsigned long *l = arg;
1576 unsigned long off = l - (unsigned long *)m->private; 1580 unsigned long off = l - (unsigned long *)m->private;
1577
1578 seq_printf(m, "%s %lu\n", vmstat_text[off], *l); 1581 seq_printf(m, "%s %lu\n", vmstat_text[off], *l);
1579 return 0; 1582 return 0;
1580} 1583}
diff --git a/mm/workingset.c b/mm/workingset.c
index 5ffba0c0adc6..7820a7e1ca98 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -355,8 +355,8 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
355 pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid, 355 pages = mem_cgroup_node_nr_lru_pages(sc->memcg, sc->nid,
356 LRU_ALL_FILE); 356 LRU_ALL_FILE);
357 } else { 357 } else {
358 pages = sum_zone_node_page_state(sc->nid, NR_ACTIVE_FILE) + 358 pages = node_page_state(NODE_DATA(sc->nid), NR_ACTIVE_FILE) +
359 sum_zone_node_page_state(sc->nid, NR_INACTIVE_FILE); 359 node_page_state(NODE_DATA(sc->nid), NR_INACTIVE_FILE);
360 } 360 }
361 361
362 /* 362 /*