diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-28 19:36:48 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-28 19:36:48 -0400 |
commit | 1c88e19b0f6a8471ee50d5062721ba30b8fd4ba9 (patch) | |
tree | 6d227487ca2cf391589c73af1c40ec7b7126feec /include/linux | |
parent | 6039b80eb50a893476fea7d56e86ed2d19290054 (diff) | |
parent | c3486f5376696034d0fcbef8ba70c70cfcb26f51 (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
"The rest of MM"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (101 commits)
mm, compaction: simplify contended compaction handling
mm, compaction: introduce direct compaction priority
mm, thp: remove __GFP_NORETRY from khugepaged and madvised allocations
mm, page_alloc: make THP-specific decisions more generic
mm, page_alloc: restructure direct compaction handling in slowpath
mm, page_alloc: don't retry initial attempt in slowpath
mm, page_alloc: set alloc_flags only once in slowpath
lib/stackdepot.c: use __GFP_NOWARN for stack allocations
mm, kasan: switch SLUB to stackdepot, enable memory quarantine for SLUB
mm, kasan: account for object redzone in SLUB's nearest_obj()
mm: fix use-after-free if memory allocation failed in vma_adjust()
zsmalloc: Delete an unnecessary check before the function call "iput"
mm/memblock.c: fix index adjustment error in __next_mem_range_rev()
mem-hotplug: alloc new page from a nearest neighbor node when mem-offline
mm: optimize copy_page_to/from_iter_iovec
mm: add cond_resched() to generic_swapfile_activate()
Revert "mm, mempool: only set __GFP_NOMEMALLOC if there are free elements"
mm, compaction: don't isolate PageWriteback pages in MIGRATE_SYNC_LIGHT mode
mm: hwpoison: remove incorrect comments
make __section_nr() more efficient
...
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/backing-dev.h | 2 | ||||
-rw-r--r-- | include/linux/compaction.h | 33 | ||||
-rw-r--r-- | include/linux/gfp.h | 14 | ||||
-rw-r--r-- | include/linux/huge_mm.h | 2 | ||||
-rw-r--r-- | include/linux/kasan.h | 2 | ||||
-rw-r--r-- | include/linux/kdb.h | 2 | ||||
-rw-r--r-- | include/linux/memblock.h | 1 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 70 | ||||
-rw-r--r-- | include/linux/memremap.h | 2 | ||||
-rw-r--r-- | include/linux/mm.h | 17 | ||||
-rw-r--r-- | include/linux/mm_inline.h | 19 | ||||
-rw-r--r-- | include/linux/mm_types.h | 2 | ||||
-rw-r--r-- | include/linux/mmzone.h | 170 | ||||
-rw-r--r-- | include/linux/oom.h | 26 | ||||
-rw-r--r-- | include/linux/sched.h | 27 | ||||
-rw-r--r-- | include/linux/slab_def.h | 3 | ||||
-rw-r--r-- | include/linux/slub_def.h | 14 | ||||
-rw-r--r-- | include/linux/swap.h | 23 | ||||
-rw-r--r-- | include/linux/topology.h | 2 | ||||
-rw-r--r-- | include/linux/vm_event_item.h | 14 | ||||
-rw-r--r-- | include/linux/vmstat.h | 111 | ||||
-rw-r--r-- | include/linux/writeback.h | 2 |
22 files changed, 370 insertions, 188 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c82794f20110..491a91717788 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -197,7 +197,7 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) | |||
197 | } | 197 | } |
198 | 198 | ||
199 | long congestion_wait(int sync, long timeout); | 199 | long congestion_wait(int sync, long timeout); |
200 | long wait_iff_congested(struct zone *zone, int sync, long timeout); | 200 | long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout); |
201 | int pdflush_proc_obsolete(struct ctl_table *table, int write, | 201 | int pdflush_proc_obsolete(struct ctl_table *table, int write, |
202 | void __user *buffer, size_t *lenp, loff_t *ppos); | 202 | void __user *buffer, size_t *lenp, loff_t *ppos); |
203 | 203 | ||
diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 1a02dab16646..d4e106b5dc27 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h | |||
@@ -1,6 +1,18 @@ | |||
1 | #ifndef _LINUX_COMPACTION_H | 1 | #ifndef _LINUX_COMPACTION_H |
2 | #define _LINUX_COMPACTION_H | 2 | #define _LINUX_COMPACTION_H |
3 | 3 | ||
4 | /* | ||
5 | * Determines how hard direct compaction should try to succeed. | ||
6 | * Lower value means higher priority, analogically to reclaim priority. | ||
7 | */ | ||
8 | enum compact_priority { | ||
9 | COMPACT_PRIO_SYNC_LIGHT, | ||
10 | MIN_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_LIGHT, | ||
11 | DEF_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_LIGHT, | ||
12 | COMPACT_PRIO_ASYNC, | ||
13 | INIT_COMPACT_PRIORITY = COMPACT_PRIO_ASYNC | ||
14 | }; | ||
15 | |||
4 | /* Return values for compact_zone() and try_to_compact_pages() */ | 16 | /* Return values for compact_zone() and try_to_compact_pages() */ |
5 | /* When adding new states, please adjust include/trace/events/compaction.h */ | 17 | /* When adding new states, please adjust include/trace/events/compaction.h */ |
6 | enum compact_result { | 18 | enum compact_result { |
@@ -43,14 +55,6 @@ enum compact_result { | |||
43 | COMPACT_PARTIAL, | 55 | COMPACT_PARTIAL, |
44 | }; | 56 | }; |
45 | 57 | ||
46 | /* Used to signal whether compaction detected need_sched() or lock contention */ | ||
47 | /* No contention detected */ | ||
48 | #define COMPACT_CONTENDED_NONE 0 | ||
49 | /* Either need_sched() was true or fatal signal pending */ | ||
50 | #define COMPACT_CONTENDED_SCHED 1 | ||
51 | /* Zone lock or lru_lock was contended in async compaction */ | ||
52 | #define COMPACT_CONTENDED_LOCK 2 | ||
53 | |||
54 | struct alloc_context; /* in mm/internal.h */ | 58 | struct alloc_context; /* in mm/internal.h */ |
55 | 59 | ||
56 | #ifdef CONFIG_COMPACTION | 60 | #ifdef CONFIG_COMPACTION |
@@ -64,9 +68,8 @@ extern int sysctl_compact_unevictable_allowed; | |||
64 | 68 | ||
65 | extern int fragmentation_index(struct zone *zone, unsigned int order); | 69 | extern int fragmentation_index(struct zone *zone, unsigned int order); |
66 | extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, | 70 | extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, |
67 | unsigned int order, | 71 | unsigned int order, unsigned int alloc_flags, |
68 | unsigned int alloc_flags, const struct alloc_context *ac, | 72 | const struct alloc_context *ac, enum compact_priority prio); |
69 | enum migrate_mode mode, int *contended); | ||
70 | extern void compact_pgdat(pg_data_t *pgdat, int order); | 73 | extern void compact_pgdat(pg_data_t *pgdat, int order); |
71 | extern void reset_isolation_suitable(pg_data_t *pgdat); | 74 | extern void reset_isolation_suitable(pg_data_t *pgdat); |
72 | extern enum compact_result compaction_suitable(struct zone *zone, int order, | 75 | extern enum compact_result compaction_suitable(struct zone *zone, int order, |
@@ -151,14 +154,6 @@ extern void kcompactd_stop(int nid); | |||
151 | extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx); | 154 | extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx); |
152 | 155 | ||
153 | #else | 156 | #else |
154 | static inline enum compact_result try_to_compact_pages(gfp_t gfp_mask, | ||
155 | unsigned int order, int alloc_flags, | ||
156 | const struct alloc_context *ac, | ||
157 | enum migrate_mode mode, int *contended) | ||
158 | { | ||
159 | return COMPACT_CONTINUE; | ||
160 | } | ||
161 | |||
162 | static inline void compact_pgdat(pg_data_t *pgdat, int order) | 157 | static inline void compact_pgdat(pg_data_t *pgdat, int order) |
163 | { | 158 | { |
164 | } | 159 | } |
diff --git a/include/linux/gfp.h b/include/linux/gfp.h index c29e9d347bc6..f8041f9de31e 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h | |||
@@ -237,9 +237,11 @@ struct vm_area_struct; | |||
237 | * are expected to be movable via page reclaim or page migration. Typically, | 237 | * are expected to be movable via page reclaim or page migration. Typically, |
238 | * pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE. | 238 | * pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE. |
239 | * | 239 | * |
240 | * GFP_TRANSHUGE is used for THP allocations. They are compound allocations | 240 | * GFP_TRANSHUGE and GFP_TRANSHUGE_LIGHT are used for THP allocations. They are |
241 | * that will fail quickly if memory is not available and will not wake | 241 | * compound allocations that will generally fail quickly if memory is not |
242 | * kswapd on failure. | 242 | * available and will not wake kswapd/kcompactd on failure. The _LIGHT |
243 | * version does not attempt reclaim/compaction at all and is by default used | ||
244 | * in page fault path, while the non-light is used by khugepaged. | ||
243 | */ | 245 | */ |
244 | #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) | 246 | #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) |
245 | #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) | 247 | #define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) |
@@ -254,9 +256,9 @@ struct vm_area_struct; | |||
254 | #define GFP_DMA32 __GFP_DMA32 | 256 | #define GFP_DMA32 __GFP_DMA32 |
255 | #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) | 257 | #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) |
256 | #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) | 258 | #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) |
257 | #define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ | 259 | #define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ |
258 | __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \ | 260 | __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) |
259 | ~__GFP_RECLAIM) | 261 | #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) |
260 | 262 | ||
261 | /* Convert GFP flags to their corresponding migrate type */ | 263 | /* Convert GFP flags to their corresponding migrate type */ |
262 | #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) | 264 | #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) |
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 92ce91c03cd0..6f14de45b5ce 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
@@ -11,7 +11,7 @@ extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, | |||
11 | unsigned long addr, | 11 | unsigned long addr, |
12 | pmd_t *pmd, | 12 | pmd_t *pmd, |
13 | unsigned int flags); | 13 | unsigned int flags); |
14 | extern int madvise_free_huge_pmd(struct mmu_gather *tlb, | 14 | extern bool madvise_free_huge_pmd(struct mmu_gather *tlb, |
15 | struct vm_area_struct *vma, | 15 | struct vm_area_struct *vma, |
16 | pmd_t *pmd, unsigned long addr, unsigned long next); | 16 | pmd_t *pmd, unsigned long addr, unsigned long next); |
17 | extern int zap_huge_pmd(struct mmu_gather *tlb, | 17 | extern int zap_huge_pmd(struct mmu_gather *tlb, |
diff --git a/include/linux/kasan.h b/include/linux/kasan.h index ac4b3c46a84d..c9cf374445d8 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h | |||
@@ -77,6 +77,7 @@ void kasan_free_shadow(const struct vm_struct *vm); | |||
77 | 77 | ||
78 | size_t ksize(const void *); | 78 | size_t ksize(const void *); |
79 | static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); } | 79 | static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); } |
80 | size_t kasan_metadata_size(struct kmem_cache *cache); | ||
80 | 81 | ||
81 | #else /* CONFIG_KASAN */ | 82 | #else /* CONFIG_KASAN */ |
82 | 83 | ||
@@ -121,6 +122,7 @@ static inline int kasan_module_alloc(void *addr, size_t size) { return 0; } | |||
121 | static inline void kasan_free_shadow(const struct vm_struct *vm) {} | 122 | static inline void kasan_free_shadow(const struct vm_struct *vm) {} |
122 | 123 | ||
123 | static inline void kasan_unpoison_slab(const void *ptr) { } | 124 | static inline void kasan_unpoison_slab(const void *ptr) { } |
125 | static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; } | ||
124 | 126 | ||
125 | #endif /* CONFIG_KASAN */ | 127 | #endif /* CONFIG_KASAN */ |
126 | 128 | ||
diff --git a/include/linux/kdb.h b/include/linux/kdb.h index a19bcf9e762e..410decacff8f 100644 --- a/include/linux/kdb.h +++ b/include/linux/kdb.h | |||
@@ -177,7 +177,7 @@ extern int kdb_get_kbd_char(void); | |||
177 | static inline | 177 | static inline |
178 | int kdb_process_cpu(const struct task_struct *p) | 178 | int kdb_process_cpu(const struct task_struct *p) |
179 | { | 179 | { |
180 | unsigned int cpu = task_thread_info(p)->cpu; | 180 | unsigned int cpu = task_cpu(p); |
181 | if (cpu > num_possible_cpus()) | 181 | if (cpu > num_possible_cpus()) |
182 | cpu = 0; | 182 | cpu = 0; |
183 | return cpu; | 183 | return cpu; |
diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 6c14b6179727..2925da23505d 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h | |||
@@ -332,6 +332,7 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn); | |||
332 | phys_addr_t memblock_start_of_DRAM(void); | 332 | phys_addr_t memblock_start_of_DRAM(void); |
333 | phys_addr_t memblock_end_of_DRAM(void); | 333 | phys_addr_t memblock_end_of_DRAM(void); |
334 | void memblock_enforce_memory_limit(phys_addr_t memory_limit); | 334 | void memblock_enforce_memory_limit(phys_addr_t memory_limit); |
335 | void memblock_mem_limit_remove_map(phys_addr_t limit); | ||
335 | bool memblock_is_memory(phys_addr_t addr); | 336 | bool memblock_is_memory(phys_addr_t addr); |
336 | int memblock_is_map_memory(phys_addr_t addr); | 337 | int memblock_is_map_memory(phys_addr_t addr); |
337 | int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); | 338 | int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); |
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 71aff733a497..5d8ca6e02e39 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -52,7 +52,7 @@ enum mem_cgroup_stat_index { | |||
52 | MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ | 52 | MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ |
53 | MEM_CGROUP_STAT_NSTATS, | 53 | MEM_CGROUP_STAT_NSTATS, |
54 | /* default hierarchy stats */ | 54 | /* default hierarchy stats */ |
55 | MEMCG_KERNEL_STACK = MEM_CGROUP_STAT_NSTATS, | 55 | MEMCG_KERNEL_STACK_KB = MEM_CGROUP_STAT_NSTATS, |
56 | MEMCG_SLAB_RECLAIMABLE, | 56 | MEMCG_SLAB_RECLAIMABLE, |
57 | MEMCG_SLAB_UNRECLAIMABLE, | 57 | MEMCG_SLAB_UNRECLAIMABLE, |
58 | MEMCG_SOCK, | 58 | MEMCG_SOCK, |
@@ -60,7 +60,7 @@ enum mem_cgroup_stat_index { | |||
60 | }; | 60 | }; |
61 | 61 | ||
62 | struct mem_cgroup_reclaim_cookie { | 62 | struct mem_cgroup_reclaim_cookie { |
63 | struct zone *zone; | 63 | pg_data_t *pgdat; |
64 | int priority; | 64 | int priority; |
65 | unsigned int generation; | 65 | unsigned int generation; |
66 | }; | 66 | }; |
@@ -118,7 +118,7 @@ struct mem_cgroup_reclaim_iter { | |||
118 | /* | 118 | /* |
119 | * per-zone information in memory controller. | 119 | * per-zone information in memory controller. |
120 | */ | 120 | */ |
121 | struct mem_cgroup_per_zone { | 121 | struct mem_cgroup_per_node { |
122 | struct lruvec lruvec; | 122 | struct lruvec lruvec; |
123 | unsigned long lru_size[NR_LRU_LISTS]; | 123 | unsigned long lru_size[NR_LRU_LISTS]; |
124 | 124 | ||
@@ -132,10 +132,6 @@ struct mem_cgroup_per_zone { | |||
132 | /* use container_of */ | 132 | /* use container_of */ |
133 | }; | 133 | }; |
134 | 134 | ||
135 | struct mem_cgroup_per_node { | ||
136 | struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES]; | ||
137 | }; | ||
138 | |||
139 | struct mem_cgroup_threshold { | 135 | struct mem_cgroup_threshold { |
140 | struct eventfd_ctx *eventfd; | 136 | struct eventfd_ctx *eventfd; |
141 | unsigned long threshold; | 137 | unsigned long threshold; |
@@ -314,8 +310,46 @@ void mem_cgroup_uncharge_list(struct list_head *page_list); | |||
314 | 310 | ||
315 | void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); | 311 | void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); |
316 | 312 | ||
317 | struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); | 313 | static struct mem_cgroup_per_node * |
318 | struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); | 314 | mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid) |
315 | { | ||
316 | return memcg->nodeinfo[nid]; | ||
317 | } | ||
318 | |||
319 | /** | ||
320 | * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone | ||
321 | * @node: node of the wanted lruvec | ||
322 | * @memcg: memcg of the wanted lruvec | ||
323 | * | ||
324 | * Returns the lru list vector holding pages for a given @node or a given | ||
325 | * @memcg and @zone. This can be the node lruvec, if the memory controller | ||
326 | * is disabled. | ||
327 | */ | ||
328 | static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat, | ||
329 | struct mem_cgroup *memcg) | ||
330 | { | ||
331 | struct mem_cgroup_per_node *mz; | ||
332 | struct lruvec *lruvec; | ||
333 | |||
334 | if (mem_cgroup_disabled()) { | ||
335 | lruvec = node_lruvec(pgdat); | ||
336 | goto out; | ||
337 | } | ||
338 | |||
339 | mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id); | ||
340 | lruvec = &mz->lruvec; | ||
341 | out: | ||
342 | /* | ||
343 | * Since a node can be onlined after the mem_cgroup was created, | ||
344 | * we have to be prepared to initialize lruvec->pgdat here; | ||
345 | * and if offlined then reonlined, we need to reinitialize it. | ||
346 | */ | ||
347 | if (unlikely(lruvec->pgdat != pgdat)) | ||
348 | lruvec->pgdat = pgdat; | ||
349 | return lruvec; | ||
350 | } | ||
351 | |||
352 | struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *); | ||
319 | 353 | ||
320 | bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg); | 354 | bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg); |
321 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); | 355 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); |
@@ -404,9 +438,9 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg, | |||
404 | static inline | 438 | static inline |
405 | unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) | 439 | unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) |
406 | { | 440 | { |
407 | struct mem_cgroup_per_zone *mz; | 441 | struct mem_cgroup_per_node *mz; |
408 | 442 | ||
409 | mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); | 443 | mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec); |
410 | return mz->lru_size[lru]; | 444 | return mz->lru_size[lru]; |
411 | } | 445 | } |
412 | 446 | ||
@@ -477,7 +511,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, | |||
477 | mem_cgroup_update_page_stat(page, idx, -1); | 511 | mem_cgroup_update_page_stat(page, idx, -1); |
478 | } | 512 | } |
479 | 513 | ||
480 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | 514 | unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, |
481 | gfp_t gfp_mask, | 515 | gfp_t gfp_mask, |
482 | unsigned long *total_scanned); | 516 | unsigned long *total_scanned); |
483 | 517 | ||
@@ -568,16 +602,16 @@ static inline void mem_cgroup_migrate(struct page *old, struct page *new) | |||
568 | { | 602 | { |
569 | } | 603 | } |
570 | 604 | ||
571 | static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, | 605 | static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat, |
572 | struct mem_cgroup *memcg) | 606 | struct mem_cgroup *memcg) |
573 | { | 607 | { |
574 | return &zone->lruvec; | 608 | return node_lruvec(pgdat); |
575 | } | 609 | } |
576 | 610 | ||
577 | static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, | 611 | static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, |
578 | struct zone *zone) | 612 | struct pglist_data *pgdat) |
579 | { | 613 | { |
580 | return &zone->lruvec; | 614 | return &pgdat->lruvec; |
581 | } | 615 | } |
582 | 616 | ||
583 | static inline bool mm_match_cgroup(struct mm_struct *mm, | 617 | static inline bool mm_match_cgroup(struct mm_struct *mm, |
@@ -681,7 +715,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page, | |||
681 | } | 715 | } |
682 | 716 | ||
683 | static inline | 717 | static inline |
684 | unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | 718 | unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, |
685 | gfp_t gfp_mask, | 719 | gfp_t gfp_mask, |
686 | unsigned long *total_scanned) | 720 | unsigned long *total_scanned) |
687 | { | 721 | { |
diff --git a/include/linux/memremap.h b/include/linux/memremap.h index bcaa634139a9..93416196ba64 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h | |||
@@ -26,7 +26,7 @@ struct vmem_altmap { | |||
26 | unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); | 26 | unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); |
27 | void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); | 27 | void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); |
28 | 28 | ||
29 | #if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_ZONE_DEVICE) | 29 | #ifdef CONFIG_ZONE_DEVICE |
30 | struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start); | 30 | struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start); |
31 | #else | 31 | #else |
32 | static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) | 32 | static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 192c1bbe5fcd..08ed53eeedd5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -933,6 +933,11 @@ static inline struct zone *page_zone(const struct page *page) | |||
933 | return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; | 933 | return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; |
934 | } | 934 | } |
935 | 935 | ||
936 | static inline pg_data_t *page_pgdat(const struct page *page) | ||
937 | { | ||
938 | return NODE_DATA(page_to_nid(page)); | ||
939 | } | ||
940 | |||
936 | #ifdef SECTION_IN_PAGE_FLAGS | 941 | #ifdef SECTION_IN_PAGE_FLAGS |
937 | static inline void set_page_section(struct page *page, unsigned long section) | 942 | static inline void set_page_section(struct page *page, unsigned long section) |
938 | { | 943 | { |
@@ -973,11 +978,21 @@ static inline struct mem_cgroup *page_memcg(struct page *page) | |||
973 | { | 978 | { |
974 | return page->mem_cgroup; | 979 | return page->mem_cgroup; |
975 | } | 980 | } |
981 | static inline struct mem_cgroup *page_memcg_rcu(struct page *page) | ||
982 | { | ||
983 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
984 | return READ_ONCE(page->mem_cgroup); | ||
985 | } | ||
976 | #else | 986 | #else |
977 | static inline struct mem_cgroup *page_memcg(struct page *page) | 987 | static inline struct mem_cgroup *page_memcg(struct page *page) |
978 | { | 988 | { |
979 | return NULL; | 989 | return NULL; |
980 | } | 990 | } |
991 | static inline struct mem_cgroup *page_memcg_rcu(struct page *page) | ||
992 | { | ||
993 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
994 | return NULL; | ||
995 | } | ||
981 | #endif | 996 | #endif |
982 | 997 | ||
983 | /* | 998 | /* |
@@ -2284,6 +2299,8 @@ static inline int in_gate_area(struct mm_struct *mm, unsigned long addr) | |||
2284 | } | 2299 | } |
2285 | #endif /* __HAVE_ARCH_GATE_AREA */ | 2300 | #endif /* __HAVE_ARCH_GATE_AREA */ |
2286 | 2301 | ||
2302 | extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); | ||
2303 | |||
2287 | #ifdef CONFIG_SYSCTL | 2304 | #ifdef CONFIG_SYSCTL |
2288 | extern int sysctl_drop_caches; | 2305 | extern int sysctl_drop_caches; |
2289 | int drop_caches_sysctl_handler(struct ctl_table *, int, | 2306 | int drop_caches_sysctl_handler(struct ctl_table *, int, |
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 5bd29ba4f174..71613e8a720f 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h | |||
@@ -23,25 +23,30 @@ static inline int page_is_file_cache(struct page *page) | |||
23 | } | 23 | } |
24 | 24 | ||
25 | static __always_inline void __update_lru_size(struct lruvec *lruvec, | 25 | static __always_inline void __update_lru_size(struct lruvec *lruvec, |
26 | enum lru_list lru, int nr_pages) | 26 | enum lru_list lru, enum zone_type zid, |
27 | int nr_pages) | ||
27 | { | 28 | { |
28 | __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages); | 29 | struct pglist_data *pgdat = lruvec_pgdat(lruvec); |
30 | |||
31 | __mod_node_page_state(pgdat, NR_LRU_BASE + lru, nr_pages); | ||
32 | __mod_zone_page_state(&pgdat->node_zones[zid], | ||
33 | NR_ZONE_LRU_BASE + lru, nr_pages); | ||
29 | } | 34 | } |
30 | 35 | ||
31 | static __always_inline void update_lru_size(struct lruvec *lruvec, | 36 | static __always_inline void update_lru_size(struct lruvec *lruvec, |
32 | enum lru_list lru, int nr_pages) | 37 | enum lru_list lru, enum zone_type zid, |
38 | int nr_pages) | ||
33 | { | 39 | { |
40 | __update_lru_size(lruvec, lru, zid, nr_pages); | ||
34 | #ifdef CONFIG_MEMCG | 41 | #ifdef CONFIG_MEMCG |
35 | mem_cgroup_update_lru_size(lruvec, lru, nr_pages); | 42 | mem_cgroup_update_lru_size(lruvec, lru, nr_pages); |
36 | #else | ||
37 | __update_lru_size(lruvec, lru, nr_pages); | ||
38 | #endif | 43 | #endif |
39 | } | 44 | } |
40 | 45 | ||
41 | static __always_inline void add_page_to_lru_list(struct page *page, | 46 | static __always_inline void add_page_to_lru_list(struct page *page, |
42 | struct lruvec *lruvec, enum lru_list lru) | 47 | struct lruvec *lruvec, enum lru_list lru) |
43 | { | 48 | { |
44 | update_lru_size(lruvec, lru, hpage_nr_pages(page)); | 49 | update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page)); |
45 | list_add(&page->lru, &lruvec->lists[lru]); | 50 | list_add(&page->lru, &lruvec->lists[lru]); |
46 | } | 51 | } |
47 | 52 | ||
@@ -49,7 +54,7 @@ static __always_inline void del_page_from_lru_list(struct page *page, | |||
49 | struct lruvec *lruvec, enum lru_list lru) | 54 | struct lruvec *lruvec, enum lru_list lru) |
50 | { | 55 | { |
51 | list_del(&page->lru); | 56 | list_del(&page->lru); |
52 | update_lru_size(lruvec, lru, -hpage_nr_pages(page)); | 57 | update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page)); |
53 | } | 58 | } |
54 | 59 | ||
55 | /** | 60 | /** |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 79472b22d23f..903200f4ec41 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -118,7 +118,7 @@ struct page { | |||
118 | */ | 118 | */ |
119 | union { | 119 | union { |
120 | struct list_head lru; /* Pageout list, eg. active_list | 120 | struct list_head lru; /* Pageout list, eg. active_list |
121 | * protected by zone->lru_lock ! | 121 | * protected by zone_lru_lock ! |
122 | * Can be used as a generic list | 122 | * Can be used as a generic list |
123 | * by the page owner. | 123 | * by the page owner. |
124 | */ | 124 | */ |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 19425e988bdc..f2e4e90621ec 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -93,7 +93,7 @@ struct free_area { | |||
93 | struct pglist_data; | 93 | struct pglist_data; |
94 | 94 | ||
95 | /* | 95 | /* |
96 | * zone->lock and zone->lru_lock are two of the hottest locks in the kernel. | 96 | * zone->lock and the zone lru_lock are two of the hottest locks in the kernel. |
97 | * So add a wild amount of padding here to ensure that they fall into separate | 97 | * So add a wild amount of padding here to ensure that they fall into separate |
98 | * cachelines. There are very few zone structures in the machine, so space | 98 | * cachelines. There are very few zone structures in the machine, so space |
99 | * consumption is not a concern here. | 99 | * consumption is not a concern here. |
@@ -110,36 +110,20 @@ struct zone_padding { | |||
110 | enum zone_stat_item { | 110 | enum zone_stat_item { |
111 | /* First 128 byte cacheline (assuming 64 bit words) */ | 111 | /* First 128 byte cacheline (assuming 64 bit words) */ |
112 | NR_FREE_PAGES, | 112 | NR_FREE_PAGES, |
113 | NR_ALLOC_BATCH, | 113 | NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */ |
114 | NR_LRU_BASE, | 114 | NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE, |
115 | NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ | 115 | NR_ZONE_ACTIVE_ANON, |
116 | NR_ACTIVE_ANON, /* " " " " " */ | 116 | NR_ZONE_INACTIVE_FILE, |
117 | NR_INACTIVE_FILE, /* " " " " " */ | 117 | NR_ZONE_ACTIVE_FILE, |
118 | NR_ACTIVE_FILE, /* " " " " " */ | 118 | NR_ZONE_UNEVICTABLE, |
119 | NR_UNEVICTABLE, /* " " " " " */ | 119 | NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ |
120 | NR_MLOCK, /* mlock()ed pages found and moved off LRU */ | 120 | NR_MLOCK, /* mlock()ed pages found and moved off LRU */ |
121 | NR_ANON_PAGES, /* Mapped anonymous pages */ | ||
122 | NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. | ||
123 | only modified from process context */ | ||
124 | NR_FILE_PAGES, | ||
125 | NR_FILE_DIRTY, | ||
126 | NR_WRITEBACK, | ||
127 | NR_SLAB_RECLAIMABLE, | 121 | NR_SLAB_RECLAIMABLE, |
128 | NR_SLAB_UNRECLAIMABLE, | 122 | NR_SLAB_UNRECLAIMABLE, |
129 | NR_PAGETABLE, /* used for pagetables */ | 123 | NR_PAGETABLE, /* used for pagetables */ |
130 | NR_KERNEL_STACK, | 124 | NR_KERNEL_STACK_KB, /* measured in KiB */ |
131 | /* Second 128 byte cacheline */ | 125 | /* Second 128 byte cacheline */ |
132 | NR_UNSTABLE_NFS, /* NFS unstable pages */ | ||
133 | NR_BOUNCE, | 126 | NR_BOUNCE, |
134 | NR_VMSCAN_WRITE, | ||
135 | NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ | ||
136 | NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ | ||
137 | NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ | ||
138 | NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ | ||
139 | NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ | ||
140 | NR_DIRTIED, /* page dirtyings since bootup */ | ||
141 | NR_WRITTEN, /* page writings since bootup */ | ||
142 | NR_PAGES_SCANNED, /* pages scanned since last reclaim */ | ||
143 | #if IS_ENABLED(CONFIG_ZSMALLOC) | 127 | #if IS_ENABLED(CONFIG_ZSMALLOC) |
144 | NR_ZSPAGES, /* allocated in zsmalloc */ | 128 | NR_ZSPAGES, /* allocated in zsmalloc */ |
145 | #endif | 129 | #endif |
@@ -151,14 +135,40 @@ enum zone_stat_item { | |||
151 | NUMA_LOCAL, /* allocation from local node */ | 135 | NUMA_LOCAL, /* allocation from local node */ |
152 | NUMA_OTHER, /* allocation from other node */ | 136 | NUMA_OTHER, /* allocation from other node */ |
153 | #endif | 137 | #endif |
138 | NR_FREE_CMA_PAGES, | ||
139 | NR_VM_ZONE_STAT_ITEMS }; | ||
140 | |||
141 | enum node_stat_item { | ||
142 | NR_LRU_BASE, | ||
143 | NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ | ||
144 | NR_ACTIVE_ANON, /* " " " " " */ | ||
145 | NR_INACTIVE_FILE, /* " " " " " */ | ||
146 | NR_ACTIVE_FILE, /* " " " " " */ | ||
147 | NR_UNEVICTABLE, /* " " " " " */ | ||
148 | NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ | ||
149 | NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ | ||
150 | NR_PAGES_SCANNED, /* pages scanned since last reclaim */ | ||
154 | WORKINGSET_REFAULT, | 151 | WORKINGSET_REFAULT, |
155 | WORKINGSET_ACTIVATE, | 152 | WORKINGSET_ACTIVATE, |
156 | WORKINGSET_NODERECLAIM, | 153 | WORKINGSET_NODERECLAIM, |
157 | NR_ANON_THPS, | 154 | NR_ANON_MAPPED, /* Mapped anonymous pages */ |
155 | NR_FILE_MAPPED, /* pagecache pages mapped into pagetables. | ||
156 | only modified from process context */ | ||
157 | NR_FILE_PAGES, | ||
158 | NR_FILE_DIRTY, | ||
159 | NR_WRITEBACK, | ||
160 | NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */ | ||
161 | NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */ | ||
158 | NR_SHMEM_THPS, | 162 | NR_SHMEM_THPS, |
159 | NR_SHMEM_PMDMAPPED, | 163 | NR_SHMEM_PMDMAPPED, |
160 | NR_FREE_CMA_PAGES, | 164 | NR_ANON_THPS, |
161 | NR_VM_ZONE_STAT_ITEMS }; | 165 | NR_UNSTABLE_NFS, /* NFS unstable pages */ |
166 | NR_VMSCAN_WRITE, | ||
167 | NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */ | ||
168 | NR_DIRTIED, /* page dirtyings since bootup */ | ||
169 | NR_WRITTEN, /* page writings since bootup */ | ||
170 | NR_VM_NODE_STAT_ITEMS | ||
171 | }; | ||
162 | 172 | ||
163 | /* | 173 | /* |
164 | * We do arithmetic on the LRU lists in various places in the code, | 174 | * We do arithmetic on the LRU lists in various places in the code, |
@@ -215,7 +225,7 @@ struct lruvec { | |||
215 | /* Evictions & activations on the inactive file list */ | 225 | /* Evictions & activations on the inactive file list */ |
216 | atomic_long_t inactive_age; | 226 | atomic_long_t inactive_age; |
217 | #ifdef CONFIG_MEMCG | 227 | #ifdef CONFIG_MEMCG |
218 | struct zone *zone; | 228 | struct pglist_data *pgdat; |
219 | #endif | 229 | #endif |
220 | }; | 230 | }; |
221 | 231 | ||
@@ -267,6 +277,11 @@ struct per_cpu_pageset { | |||
267 | #endif | 277 | #endif |
268 | }; | 278 | }; |
269 | 279 | ||
280 | struct per_cpu_nodestat { | ||
281 | s8 stat_threshold; | ||
282 | s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS]; | ||
283 | }; | ||
284 | |||
270 | #endif /* !__GENERATING_BOUNDS.H */ | 285 | #endif /* !__GENERATING_BOUNDS.H */ |
271 | 286 | ||
272 | enum zone_type { | 287 | enum zone_type { |
@@ -348,22 +363,9 @@ struct zone { | |||
348 | #ifdef CONFIG_NUMA | 363 | #ifdef CONFIG_NUMA |
349 | int node; | 364 | int node; |
350 | #endif | 365 | #endif |
351 | |||
352 | /* | ||
353 | * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on | ||
354 | * this zone's LRU. Maintained by the pageout code. | ||
355 | */ | ||
356 | unsigned int inactive_ratio; | ||
357 | |||
358 | struct pglist_data *zone_pgdat; | 366 | struct pglist_data *zone_pgdat; |
359 | struct per_cpu_pageset __percpu *pageset; | 367 | struct per_cpu_pageset __percpu *pageset; |
360 | 368 | ||
361 | /* | ||
362 | * This is a per-zone reserve of pages that are not available | ||
363 | * to userspace allocations. | ||
364 | */ | ||
365 | unsigned long totalreserve_pages; | ||
366 | |||
367 | #ifndef CONFIG_SPARSEMEM | 369 | #ifndef CONFIG_SPARSEMEM |
368 | /* | 370 | /* |
369 | * Flags for a pageblock_nr_pages block. See pageblock-flags.h. | 371 | * Flags for a pageblock_nr_pages block. See pageblock-flags.h. |
@@ -372,14 +374,6 @@ struct zone { | |||
372 | unsigned long *pageblock_flags; | 374 | unsigned long *pageblock_flags; |
373 | #endif /* CONFIG_SPARSEMEM */ | 375 | #endif /* CONFIG_SPARSEMEM */ |
374 | 376 | ||
375 | #ifdef CONFIG_NUMA | ||
376 | /* | ||
377 | * zone reclaim becomes active if more unmapped pages exist. | ||
378 | */ | ||
379 | unsigned long min_unmapped_pages; | ||
380 | unsigned long min_slab_pages; | ||
381 | #endif /* CONFIG_NUMA */ | ||
382 | |||
383 | /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ | 377 | /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ |
384 | unsigned long zone_start_pfn; | 378 | unsigned long zone_start_pfn; |
385 | 379 | ||
@@ -472,24 +466,21 @@ struct zone { | |||
472 | unsigned long wait_table_hash_nr_entries; | 466 | unsigned long wait_table_hash_nr_entries; |
473 | unsigned long wait_table_bits; | 467 | unsigned long wait_table_bits; |
474 | 468 | ||
469 | /* Write-intensive fields used from the page allocator */ | ||
475 | ZONE_PADDING(_pad1_) | 470 | ZONE_PADDING(_pad1_) |
471 | |||
476 | /* free areas of different sizes */ | 472 | /* free areas of different sizes */ |
477 | struct free_area free_area[MAX_ORDER]; | 473 | struct free_area free_area[MAX_ORDER]; |
478 | 474 | ||
479 | /* zone flags, see below */ | 475 | /* zone flags, see below */ |
480 | unsigned long flags; | 476 | unsigned long flags; |
481 | 477 | ||
482 | /* Write-intensive fields used from the page allocator */ | 478 | /* Primarily protects free_area */ |
483 | spinlock_t lock; | 479 | spinlock_t lock; |
484 | 480 | ||
481 | /* Write-intensive fields used by compaction and vmstats. */ | ||
485 | ZONE_PADDING(_pad2_) | 482 | ZONE_PADDING(_pad2_) |
486 | 483 | ||
487 | /* Write-intensive fields used by page reclaim */ | ||
488 | |||
489 | /* Fields commonly accessed by the page reclaim scanner */ | ||
490 | spinlock_t lru_lock; | ||
491 | struct lruvec lruvec; | ||
492 | |||
493 | /* | 484 | /* |
494 | * When free pages are below this point, additional steps are taken | 485 | * When free pages are below this point, additional steps are taken |
495 | * when reading the number of free pages to avoid per-cpu counter | 486 | * when reading the number of free pages to avoid per-cpu counter |
@@ -527,19 +518,18 @@ struct zone { | |||
527 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; | 518 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; |
528 | } ____cacheline_internodealigned_in_smp; | 519 | } ____cacheline_internodealigned_in_smp; |
529 | 520 | ||
530 | enum zone_flags { | 521 | enum pgdat_flags { |
531 | ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */ | 522 | PGDAT_CONGESTED, /* pgdat has many dirty pages backed by |
532 | ZONE_CONGESTED, /* zone has many dirty pages backed by | ||
533 | * a congested BDI | 523 | * a congested BDI |
534 | */ | 524 | */ |
535 | ZONE_DIRTY, /* reclaim scanning has recently found | 525 | PGDAT_DIRTY, /* reclaim scanning has recently found |
536 | * many dirty file pages at the tail | 526 | * many dirty file pages at the tail |
537 | * of the LRU. | 527 | * of the LRU. |
538 | */ | 528 | */ |
539 | ZONE_WRITEBACK, /* reclaim scanning has recently found | 529 | PGDAT_WRITEBACK, /* reclaim scanning has recently found |
540 | * many pages under writeback | 530 | * many pages under writeback |
541 | */ | 531 | */ |
542 | ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */ | 532 | PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ |
543 | }; | 533 | }; |
544 | 534 | ||
545 | static inline unsigned long zone_end_pfn(const struct zone *zone) | 535 | static inline unsigned long zone_end_pfn(const struct zone *zone) |
@@ -663,8 +653,9 @@ typedef struct pglist_data { | |||
663 | wait_queue_head_t pfmemalloc_wait; | 653 | wait_queue_head_t pfmemalloc_wait; |
664 | struct task_struct *kswapd; /* Protected by | 654 | struct task_struct *kswapd; /* Protected by |
665 | mem_hotplug_begin/end() */ | 655 | mem_hotplug_begin/end() */ |
666 | int kswapd_max_order; | 656 | int kswapd_order; |
667 | enum zone_type classzone_idx; | 657 | enum zone_type kswapd_classzone_idx; |
658 | |||
668 | #ifdef CONFIG_COMPACTION | 659 | #ifdef CONFIG_COMPACTION |
669 | int kcompactd_max_order; | 660 | int kcompactd_max_order; |
670 | enum zone_type kcompactd_classzone_idx; | 661 | enum zone_type kcompactd_classzone_idx; |
@@ -681,6 +672,23 @@ typedef struct pglist_data { | |||
681 | /* Number of pages migrated during the rate limiting time interval */ | 672 | /* Number of pages migrated during the rate limiting time interval */ |
682 | unsigned long numabalancing_migrate_nr_pages; | 673 | unsigned long numabalancing_migrate_nr_pages; |
683 | #endif | 674 | #endif |
675 | /* | ||
676 | * This is a per-node reserve of pages that are not available | ||
677 | * to userspace allocations. | ||
678 | */ | ||
679 | unsigned long totalreserve_pages; | ||
680 | |||
681 | #ifdef CONFIG_NUMA | ||
682 | /* | ||
683 | * zone reclaim becomes active if more unmapped pages exist. | ||
684 | */ | ||
685 | unsigned long min_unmapped_pages; | ||
686 | unsigned long min_slab_pages; | ||
687 | #endif /* CONFIG_NUMA */ | ||
688 | |||
689 | /* Write-intensive fields used by page reclaim */ | ||
690 | ZONE_PADDING(_pad1_) | ||
691 | spinlock_t lru_lock; | ||
684 | 692 | ||
685 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT | 693 | #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT |
686 | /* | 694 | /* |
@@ -695,6 +703,23 @@ typedef struct pglist_data { | |||
695 | struct list_head split_queue; | 703 | struct list_head split_queue; |
696 | unsigned long split_queue_len; | 704 | unsigned long split_queue_len; |
697 | #endif | 705 | #endif |
706 | |||
707 | /* Fields commonly accessed by the page reclaim scanner */ | ||
708 | struct lruvec lruvec; | ||
709 | |||
710 | /* | ||
711 | * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on | ||
712 | * this node's LRU. Maintained by the pageout code. | ||
713 | */ | ||
714 | unsigned int inactive_ratio; | ||
715 | |||
716 | unsigned long flags; | ||
717 | |||
718 | ZONE_PADDING(_pad2_) | ||
719 | |||
720 | /* Per-node vmstats */ | ||
721 | struct per_cpu_nodestat __percpu *per_cpu_nodestats; | ||
722 | atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS]; | ||
698 | } pg_data_t; | 723 | } pg_data_t; |
699 | 724 | ||
700 | #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) | 725 | #define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) |
@@ -708,6 +733,15 @@ typedef struct pglist_data { | |||
708 | 733 | ||
709 | #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) | 734 | #define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) |
710 | #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) | 735 | #define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) |
736 | static inline spinlock_t *zone_lru_lock(struct zone *zone) | ||
737 | { | ||
738 | return &zone->zone_pgdat->lru_lock; | ||
739 | } | ||
740 | |||
741 | static inline struct lruvec *node_lruvec(struct pglist_data *pgdat) | ||
742 | { | ||
743 | return &pgdat->lruvec; | ||
744 | } | ||
711 | 745 | ||
712 | static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) | 746 | static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) |
713 | { | 747 | { |
@@ -760,12 +794,12 @@ extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, | |||
760 | 794 | ||
761 | extern void lruvec_init(struct lruvec *lruvec); | 795 | extern void lruvec_init(struct lruvec *lruvec); |
762 | 796 | ||
763 | static inline struct zone *lruvec_zone(struct lruvec *lruvec) | 797 | static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec) |
764 | { | 798 | { |
765 | #ifdef CONFIG_MEMCG | 799 | #ifdef CONFIG_MEMCG |
766 | return lruvec->zone; | 800 | return lruvec->pgdat; |
767 | #else | 801 | #else |
768 | return container_of(lruvec, struct zone, lruvec); | 802 | return container_of(lruvec, struct pglist_data, lruvec); |
769 | #endif | 803 | #endif |
770 | } | 804 | } |
771 | 805 | ||
diff --git a/include/linux/oom.h b/include/linux/oom.h index 606137b3b778..5bc0457ee3a8 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h | |||
@@ -73,9 +73,9 @@ static inline bool oom_task_origin(const struct task_struct *p) | |||
73 | extern void mark_oom_victim(struct task_struct *tsk); | 73 | extern void mark_oom_victim(struct task_struct *tsk); |
74 | 74 | ||
75 | #ifdef CONFIG_MMU | 75 | #ifdef CONFIG_MMU |
76 | extern void try_oom_reaper(struct task_struct *tsk); | 76 | extern void wake_oom_reaper(struct task_struct *tsk); |
77 | #else | 77 | #else |
78 | static inline void try_oom_reaper(struct task_struct *tsk) | 78 | static inline void wake_oom_reaper(struct task_struct *tsk) |
79 | { | 79 | { |
80 | } | 80 | } |
81 | #endif | 81 | #endif |
@@ -107,27 +107,7 @@ extern void oom_killer_enable(void); | |||
107 | 107 | ||
108 | extern struct task_struct *find_lock_task_mm(struct task_struct *p); | 108 | extern struct task_struct *find_lock_task_mm(struct task_struct *p); |
109 | 109 | ||
110 | static inline bool task_will_free_mem(struct task_struct *task) | 110 | bool task_will_free_mem(struct task_struct *task); |
111 | { | ||
112 | struct signal_struct *sig = task->signal; | ||
113 | |||
114 | /* | ||
115 | * A coredumping process may sleep for an extended period in exit_mm(), | ||
116 | * so the oom killer cannot assume that the process will promptly exit | ||
117 | * and release memory. | ||
118 | */ | ||
119 | if (sig->flags & SIGNAL_GROUP_COREDUMP) | ||
120 | return false; | ||
121 | |||
122 | if (!(task->flags & PF_EXITING)) | ||
123 | return false; | ||
124 | |||
125 | /* Make sure that the whole thread group is going down */ | ||
126 | if (!thread_group_empty(task) && !(sig->flags & SIGNAL_GROUP_EXIT)) | ||
127 | return false; | ||
128 | |||
129 | return true; | ||
130 | } | ||
131 | 111 | ||
132 | /* sysctls */ | 112 | /* sysctls */ |
133 | extern int sysctl_oom_dump_tasks; | 113 | extern int sysctl_oom_dump_tasks; |
diff --git a/include/linux/sched.h b/include/linux/sched.h index d99218a1e043..553af2923824 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -523,6 +523,7 @@ static inline int get_dumpable(struct mm_struct *mm) | |||
523 | #define MMF_HAS_UPROBES 19 /* has uprobes */ | 523 | #define MMF_HAS_UPROBES 19 /* has uprobes */ |
524 | #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ | 524 | #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ |
525 | #define MMF_OOM_REAPED 21 /* mm has been already reaped */ | 525 | #define MMF_OOM_REAPED 21 /* mm has been already reaped */ |
526 | #define MMF_OOM_NOT_REAPABLE 22 /* mm couldn't be reaped */ | ||
526 | 527 | ||
527 | #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) | 528 | #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) |
528 | 529 | ||
@@ -1949,6 +1950,32 @@ static inline int tsk_nr_cpus_allowed(struct task_struct *p) | |||
1949 | #define TNF_FAULT_LOCAL 0x08 | 1950 | #define TNF_FAULT_LOCAL 0x08 |
1950 | #define TNF_MIGRATE_FAIL 0x10 | 1951 | #define TNF_MIGRATE_FAIL 0x10 |
1951 | 1952 | ||
1953 | static inline bool in_vfork(struct task_struct *tsk) | ||
1954 | { | ||
1955 | bool ret; | ||
1956 | |||
1957 | /* | ||
1958 | * need RCU to access ->real_parent if CLONE_VM was used along with | ||
1959 | * CLONE_PARENT. | ||
1960 | * | ||
1961 | * We check real_parent->mm == tsk->mm because CLONE_VFORK does not | ||
1962 | * imply CLONE_VM | ||
1963 | * | ||
1964 | * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus | ||
1965 | * ->real_parent is not necessarily the task doing vfork(), so in | ||
1966 | * theory we can't rely on task_lock() if we want to dereference it. | ||
1967 | * | ||
1968 | * And in this case we can't trust the real_parent->mm == tsk->mm | ||
1969 | * check, it can be false negative. But we do not care, if init or | ||
1970 | * another oom-unkillable task does this it should blame itself. | ||
1971 | */ | ||
1972 | rcu_read_lock(); | ||
1973 | ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; | ||
1974 | rcu_read_unlock(); | ||
1975 | |||
1976 | return ret; | ||
1977 | } | ||
1978 | |||
1952 | #ifdef CONFIG_NUMA_BALANCING | 1979 | #ifdef CONFIG_NUMA_BALANCING |
1953 | extern void task_numa_fault(int last_node, int node, int pages, int flags); | 1980 | extern void task_numa_fault(int last_node, int node, int pages, int flags); |
1954 | extern pid_t task_numa_group_id(struct task_struct *p); | 1981 | extern pid_t task_numa_group_id(struct task_struct *p); |
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 339ba027ade9..4ad2c5a26399 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h | |||
@@ -88,7 +88,8 @@ struct kmem_cache { | |||
88 | }; | 88 | }; |
89 | 89 | ||
90 | static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, | 90 | static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, |
91 | void *x) { | 91 | void *x) |
92 | { | ||
92 | void *object = x - (x - page->s_mem) % cache->size; | 93 | void *object = x - (x - page->s_mem) % cache->size; |
93 | void *last_object = page->s_mem + (cache->num - 1) * cache->size; | 94 | void *last_object = page->s_mem + (cache->num - 1) * cache->size; |
94 | 95 | ||
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 5624c1f3eb0a..75f56c2ef2d4 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h | |||
@@ -104,6 +104,10 @@ struct kmem_cache { | |||
104 | unsigned int *random_seq; | 104 | unsigned int *random_seq; |
105 | #endif | 105 | #endif |
106 | 106 | ||
107 | #ifdef CONFIG_KASAN | ||
108 | struct kasan_cache kasan_info; | ||
109 | #endif | ||
110 | |||
107 | struct kmem_cache_node *node[MAX_NUMNODES]; | 111 | struct kmem_cache_node *node[MAX_NUMNODES]; |
108 | }; | 112 | }; |
109 | 113 | ||
@@ -119,15 +123,17 @@ static inline void sysfs_slab_remove(struct kmem_cache *s) | |||
119 | void object_err(struct kmem_cache *s, struct page *page, | 123 | void object_err(struct kmem_cache *s, struct page *page, |
120 | u8 *object, char *reason); | 124 | u8 *object, char *reason); |
121 | 125 | ||
126 | void *fixup_red_left(struct kmem_cache *s, void *p); | ||
127 | |||
122 | static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, | 128 | static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, |
123 | void *x) { | 129 | void *x) { |
124 | void *object = x - (x - page_address(page)) % cache->size; | 130 | void *object = x - (x - page_address(page)) % cache->size; |
125 | void *last_object = page_address(page) + | 131 | void *last_object = page_address(page) + |
126 | (page->objects - 1) * cache->size; | 132 | (page->objects - 1) * cache->size; |
127 | if (unlikely(object > last_object)) | 133 | void *result = (unlikely(object > last_object)) ? last_object : object; |
128 | return last_object; | 134 | |
129 | else | 135 | result = fixup_red_left(cache, result); |
130 | return object; | 136 | return result; |
131 | } | 137 | } |
132 | 138 | ||
133 | #endif /* _LINUX_SLUB_DEF_H */ | 139 | #endif /* _LINUX_SLUB_DEF_H */ |
diff --git a/include/linux/swap.h b/include/linux/swap.h index 0af2bb2028fd..b17cc4830fa6 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h | |||
@@ -157,15 +157,6 @@ enum { | |||
157 | #define SWAP_CLUSTER_MAX 32UL | 157 | #define SWAP_CLUSTER_MAX 32UL |
158 | #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX | 158 | #define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX |
159 | 159 | ||
160 | /* | ||
161 | * Ratio between zone->managed_pages and the "gap" that above the per-zone | ||
162 | * "high_wmark". While balancing nodes, We allow kswapd to shrink zones that | ||
163 | * do not meet the (high_wmark + gap) watermark, even which already met the | ||
164 | * high_wmark, in order to provide better per-zone lru behavior. We are ok to | ||
165 | * spend not more than 1% of the memory for this zone balancing "gap". | ||
166 | */ | ||
167 | #define KSWAPD_ZONE_BALANCE_GAP_RATIO 100 | ||
168 | |||
169 | #define SWAP_MAP_MAX 0x3e /* Max duplication count, in first swap_map */ | 160 | #define SWAP_MAP_MAX 0x3e /* Max duplication count, in first swap_map */ |
170 | #define SWAP_MAP_BAD 0x3f /* Note pageblock is bad, in first swap_map */ | 161 | #define SWAP_MAP_BAD 0x3f /* Note pageblock is bad, in first swap_map */ |
171 | #define SWAP_HAS_CACHE 0x40 /* Flag page is cached, in first swap_map */ | 162 | #define SWAP_HAS_CACHE 0x40 /* Flag page is cached, in first swap_map */ |
@@ -317,6 +308,7 @@ extern void lru_cache_add_active_or_unevictable(struct page *page, | |||
317 | 308 | ||
318 | /* linux/mm/vmscan.c */ | 309 | /* linux/mm/vmscan.c */ |
319 | extern unsigned long zone_reclaimable_pages(struct zone *zone); | 310 | extern unsigned long zone_reclaimable_pages(struct zone *zone); |
311 | extern unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat); | ||
320 | extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | 312 | extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, |
321 | gfp_t gfp_mask, nodemask_t *mask); | 313 | gfp_t gfp_mask, nodemask_t *mask); |
322 | extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); | 314 | extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); |
@@ -324,9 +316,9 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, | |||
324 | unsigned long nr_pages, | 316 | unsigned long nr_pages, |
325 | gfp_t gfp_mask, | 317 | gfp_t gfp_mask, |
326 | bool may_swap); | 318 | bool may_swap); |
327 | extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, | 319 | extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem, |
328 | gfp_t gfp_mask, bool noswap, | 320 | gfp_t gfp_mask, bool noswap, |
329 | struct zone *zone, | 321 | pg_data_t *pgdat, |
330 | unsigned long *nr_scanned); | 322 | unsigned long *nr_scanned); |
331 | extern unsigned long shrink_all_memory(unsigned long nr_pages); | 323 | extern unsigned long shrink_all_memory(unsigned long nr_pages); |
332 | extern int vm_swappiness; | 324 | extern int vm_swappiness; |
@@ -334,13 +326,14 @@ extern int remove_mapping(struct address_space *mapping, struct page *page); | |||
334 | extern unsigned long vm_total_pages; | 326 | extern unsigned long vm_total_pages; |
335 | 327 | ||
336 | #ifdef CONFIG_NUMA | 328 | #ifdef CONFIG_NUMA |
337 | extern int zone_reclaim_mode; | 329 | extern int node_reclaim_mode; |
338 | extern int sysctl_min_unmapped_ratio; | 330 | extern int sysctl_min_unmapped_ratio; |
339 | extern int sysctl_min_slab_ratio; | 331 | extern int sysctl_min_slab_ratio; |
340 | extern int zone_reclaim(struct zone *, gfp_t, unsigned int); | 332 | extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int); |
341 | #else | 333 | #else |
342 | #define zone_reclaim_mode 0 | 334 | #define node_reclaim_mode 0 |
343 | static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) | 335 | static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask, |
336 | unsigned int order) | ||
344 | { | 337 | { |
345 | return 0; | 338 | return 0; |
346 | } | 339 | } |
diff --git a/include/linux/topology.h b/include/linux/topology.h index afce69296ac0..cb0775e1ee4b 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h | |||
@@ -54,7 +54,7 @@ int arch_update_cpu_topology(void); | |||
54 | /* | 54 | /* |
55 | * If the distance between nodes in a system is larger than RECLAIM_DISTANCE | 55 | * If the distance between nodes in a system is larger than RECLAIM_DISTANCE |
56 | * (in whatever arch specific measurement units returned by node_distance()) | 56 | * (in whatever arch specific measurement units returned by node_distance()) |
57 | * and zone_reclaim_mode is enabled then the VM will only call zone_reclaim() | 57 | * and node_reclaim_mode is enabled then the VM will only call node_reclaim() |
58 | * on nodes within this distance. | 58 | * on nodes within this distance. |
59 | */ | 59 | */ |
60 | #define RECLAIM_DISTANCE 30 | 60 | #define RECLAIM_DISTANCE 30 |
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 42604173f122..4d6ec58a8d45 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h | |||
@@ -23,21 +23,23 @@ | |||
23 | 23 | ||
24 | enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, | 24 | enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, |
25 | FOR_ALL_ZONES(PGALLOC), | 25 | FOR_ALL_ZONES(PGALLOC), |
26 | FOR_ALL_ZONES(ALLOCSTALL), | ||
27 | FOR_ALL_ZONES(PGSCAN_SKIP), | ||
26 | PGFREE, PGACTIVATE, PGDEACTIVATE, | 28 | PGFREE, PGACTIVATE, PGDEACTIVATE, |
27 | PGFAULT, PGMAJFAULT, | 29 | PGFAULT, PGMAJFAULT, |
28 | PGLAZYFREED, | 30 | PGLAZYFREED, |
29 | FOR_ALL_ZONES(PGREFILL), | 31 | PGREFILL, |
30 | FOR_ALL_ZONES(PGSTEAL_KSWAPD), | 32 | PGSTEAL_KSWAPD, |
31 | FOR_ALL_ZONES(PGSTEAL_DIRECT), | 33 | PGSTEAL_DIRECT, |
32 | FOR_ALL_ZONES(PGSCAN_KSWAPD), | 34 | PGSCAN_KSWAPD, |
33 | FOR_ALL_ZONES(PGSCAN_DIRECT), | 35 | PGSCAN_DIRECT, |
34 | PGSCAN_DIRECT_THROTTLE, | 36 | PGSCAN_DIRECT_THROTTLE, |
35 | #ifdef CONFIG_NUMA | 37 | #ifdef CONFIG_NUMA |
36 | PGSCAN_ZONE_RECLAIM_FAILED, | 38 | PGSCAN_ZONE_RECLAIM_FAILED, |
37 | #endif | 39 | #endif |
38 | PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL, | 40 | PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL, |
39 | KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY, | 41 | KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY, |
40 | PAGEOUTRUN, ALLOCSTALL, PGROTATED, | 42 | PAGEOUTRUN, PGROTATED, |
41 | DROP_PAGECACHE, DROP_SLAB, | 43 | DROP_PAGECACHE, DROP_SLAB, |
42 | #ifdef CONFIG_NUMA_BALANCING | 44 | #ifdef CONFIG_NUMA_BALANCING |
43 | NUMA_PTE_UPDATES, | 45 | NUMA_PTE_UPDATES, |
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index d2da8e053210..613771909b6e 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h | |||
@@ -101,25 +101,42 @@ static inline void vm_events_fold_cpu(int cpu) | |||
101 | #define count_vm_vmacache_event(x) do {} while (0) | 101 | #define count_vm_vmacache_event(x) do {} while (0) |
102 | #endif | 102 | #endif |
103 | 103 | ||
104 | #define __count_zone_vm_events(item, zone, delta) \ | 104 | #define __count_zid_vm_events(item, zid, delta) \ |
105 | __count_vm_events(item##_NORMAL - ZONE_NORMAL + \ | 105 | __count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta) |
106 | zone_idx(zone), delta) | ||
107 | 106 | ||
108 | /* | 107 | /* |
109 | * Zone based page accounting with per cpu differentials. | 108 | * Zone and node-based page accounting with per cpu differentials. |
110 | */ | 109 | */ |
111 | extern atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; | 110 | extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS]; |
111 | extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS]; | ||
112 | 112 | ||
113 | static inline void zone_page_state_add(long x, struct zone *zone, | 113 | static inline void zone_page_state_add(long x, struct zone *zone, |
114 | enum zone_stat_item item) | 114 | enum zone_stat_item item) |
115 | { | 115 | { |
116 | atomic_long_add(x, &zone->vm_stat[item]); | 116 | atomic_long_add(x, &zone->vm_stat[item]); |
117 | atomic_long_add(x, &vm_stat[item]); | 117 | atomic_long_add(x, &vm_zone_stat[item]); |
118 | } | ||
119 | |||
120 | static inline void node_page_state_add(long x, struct pglist_data *pgdat, | ||
121 | enum node_stat_item item) | ||
122 | { | ||
123 | atomic_long_add(x, &pgdat->vm_stat[item]); | ||
124 | atomic_long_add(x, &vm_node_stat[item]); | ||
118 | } | 125 | } |
119 | 126 | ||
120 | static inline unsigned long global_page_state(enum zone_stat_item item) | 127 | static inline unsigned long global_page_state(enum zone_stat_item item) |
121 | { | 128 | { |
122 | long x = atomic_long_read(&vm_stat[item]); | 129 | long x = atomic_long_read(&vm_zone_stat[item]); |
130 | #ifdef CONFIG_SMP | ||
131 | if (x < 0) | ||
132 | x = 0; | ||
133 | #endif | ||
134 | return x; | ||
135 | } | ||
136 | |||
137 | static inline unsigned long global_node_page_state(enum node_stat_item item) | ||
138 | { | ||
139 | long x = atomic_long_read(&vm_node_stat[item]); | ||
123 | #ifdef CONFIG_SMP | 140 | #ifdef CONFIG_SMP |
124 | if (x < 0) | 141 | if (x < 0) |
125 | x = 0; | 142 | x = 0; |
@@ -160,32 +177,61 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, | |||
160 | return x; | 177 | return x; |
161 | } | 178 | } |
162 | 179 | ||
163 | #ifdef CONFIG_NUMA | 180 | static inline unsigned long node_page_state_snapshot(pg_data_t *pgdat, |
181 | enum node_stat_item item) | ||
182 | { | ||
183 | long x = atomic_long_read(&pgdat->vm_stat[item]); | ||
164 | 184 | ||
165 | extern unsigned long node_page_state(int node, enum zone_stat_item item); | 185 | #ifdef CONFIG_SMP |
186 | int cpu; | ||
187 | for_each_online_cpu(cpu) | ||
188 | x += per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->vm_node_stat_diff[item]; | ||
166 | 189 | ||
167 | #else | 190 | if (x < 0) |
191 | x = 0; | ||
192 | #endif | ||
193 | return x; | ||
194 | } | ||
168 | 195 | ||
169 | #define node_page_state(node, item) global_page_state(item) | ||
170 | 196 | ||
197 | #ifdef CONFIG_NUMA | ||
198 | extern unsigned long sum_zone_node_page_state(int node, | ||
199 | enum zone_stat_item item); | ||
200 | extern unsigned long node_page_state(struct pglist_data *pgdat, | ||
201 | enum node_stat_item item); | ||
202 | #else | ||
203 | #define sum_zone_node_page_state(node, item) global_page_state(item) | ||
204 | #define node_page_state(node, item) global_node_page_state(item) | ||
171 | #endif /* CONFIG_NUMA */ | 205 | #endif /* CONFIG_NUMA */ |
172 | 206 | ||
173 | #define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d) | 207 | #define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d) |
174 | #define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d)) | 208 | #define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d)) |
209 | #define add_node_page_state(__p, __i, __d) mod_node_page_state(__p, __i, __d) | ||
210 | #define sub_node_page_state(__p, __i, __d) mod_node_page_state(__p, __i, -(__d)) | ||
175 | 211 | ||
176 | #ifdef CONFIG_SMP | 212 | #ifdef CONFIG_SMP |
177 | void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long); | 213 | void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long); |
178 | void __inc_zone_page_state(struct page *, enum zone_stat_item); | 214 | void __inc_zone_page_state(struct page *, enum zone_stat_item); |
179 | void __dec_zone_page_state(struct page *, enum zone_stat_item); | 215 | void __dec_zone_page_state(struct page *, enum zone_stat_item); |
180 | 216 | ||
217 | void __mod_node_page_state(struct pglist_data *, enum node_stat_item item, long); | ||
218 | void __inc_node_page_state(struct page *, enum node_stat_item); | ||
219 | void __dec_node_page_state(struct page *, enum node_stat_item); | ||
220 | |||
181 | void mod_zone_page_state(struct zone *, enum zone_stat_item, long); | 221 | void mod_zone_page_state(struct zone *, enum zone_stat_item, long); |
182 | void inc_zone_page_state(struct page *, enum zone_stat_item); | 222 | void inc_zone_page_state(struct page *, enum zone_stat_item); |
183 | void dec_zone_page_state(struct page *, enum zone_stat_item); | 223 | void dec_zone_page_state(struct page *, enum zone_stat_item); |
184 | 224 | ||
185 | extern void inc_zone_state(struct zone *, enum zone_stat_item); | 225 | void mod_node_page_state(struct pglist_data *, enum node_stat_item, long); |
226 | void inc_node_page_state(struct page *, enum node_stat_item); | ||
227 | void dec_node_page_state(struct page *, enum node_stat_item); | ||
228 | |||
229 | extern void inc_node_state(struct pglist_data *, enum node_stat_item); | ||
186 | extern void __inc_zone_state(struct zone *, enum zone_stat_item); | 230 | extern void __inc_zone_state(struct zone *, enum zone_stat_item); |
231 | extern void __inc_node_state(struct pglist_data *, enum node_stat_item); | ||
187 | extern void dec_zone_state(struct zone *, enum zone_stat_item); | 232 | extern void dec_zone_state(struct zone *, enum zone_stat_item); |
188 | extern void __dec_zone_state(struct zone *, enum zone_stat_item); | 233 | extern void __dec_zone_state(struct zone *, enum zone_stat_item); |
234 | extern void __dec_node_state(struct pglist_data *, enum node_stat_item); | ||
189 | 235 | ||
190 | void quiet_vmstat(void); | 236 | void quiet_vmstat(void); |
191 | void cpu_vm_stats_fold(int cpu); | 237 | void cpu_vm_stats_fold(int cpu); |
@@ -213,16 +259,34 @@ static inline void __mod_zone_page_state(struct zone *zone, | |||
213 | zone_page_state_add(delta, zone, item); | 259 | zone_page_state_add(delta, zone, item); |
214 | } | 260 | } |
215 | 261 | ||
262 | static inline void __mod_node_page_state(struct pglist_data *pgdat, | ||
263 | enum node_stat_item item, int delta) | ||
264 | { | ||
265 | node_page_state_add(delta, pgdat, item); | ||
266 | } | ||
267 | |||
216 | static inline void __inc_zone_state(struct zone *zone, enum zone_stat_item item) | 268 | static inline void __inc_zone_state(struct zone *zone, enum zone_stat_item item) |
217 | { | 269 | { |
218 | atomic_long_inc(&zone->vm_stat[item]); | 270 | atomic_long_inc(&zone->vm_stat[item]); |
219 | atomic_long_inc(&vm_stat[item]); | 271 | atomic_long_inc(&vm_zone_stat[item]); |
272 | } | ||
273 | |||
274 | static inline void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item) | ||
275 | { | ||
276 | atomic_long_inc(&pgdat->vm_stat[item]); | ||
277 | atomic_long_inc(&vm_node_stat[item]); | ||
220 | } | 278 | } |
221 | 279 | ||
222 | static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item) | 280 | static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item) |
223 | { | 281 | { |
224 | atomic_long_dec(&zone->vm_stat[item]); | 282 | atomic_long_dec(&zone->vm_stat[item]); |
225 | atomic_long_dec(&vm_stat[item]); | 283 | atomic_long_dec(&vm_zone_stat[item]); |
284 | } | ||
285 | |||
286 | static inline void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item) | ||
287 | { | ||
288 | atomic_long_dec(&pgdat->vm_stat[item]); | ||
289 | atomic_long_dec(&vm_node_stat[item]); | ||
226 | } | 290 | } |
227 | 291 | ||
228 | static inline void __inc_zone_page_state(struct page *page, | 292 | static inline void __inc_zone_page_state(struct page *page, |
@@ -231,12 +295,26 @@ static inline void __inc_zone_page_state(struct page *page, | |||
231 | __inc_zone_state(page_zone(page), item); | 295 | __inc_zone_state(page_zone(page), item); |
232 | } | 296 | } |
233 | 297 | ||
298 | static inline void __inc_node_page_state(struct page *page, | ||
299 | enum node_stat_item item) | ||
300 | { | ||
301 | __inc_node_state(page_pgdat(page), item); | ||
302 | } | ||
303 | |||
304 | |||
234 | static inline void __dec_zone_page_state(struct page *page, | 305 | static inline void __dec_zone_page_state(struct page *page, |
235 | enum zone_stat_item item) | 306 | enum zone_stat_item item) |
236 | { | 307 | { |
237 | __dec_zone_state(page_zone(page), item); | 308 | __dec_zone_state(page_zone(page), item); |
238 | } | 309 | } |
239 | 310 | ||
311 | static inline void __dec_node_page_state(struct page *page, | ||
312 | enum node_stat_item item) | ||
313 | { | ||
314 | __dec_node_state(page_pgdat(page), item); | ||
315 | } | ||
316 | |||
317 | |||
240 | /* | 318 | /* |
241 | * We only use atomic operations to update counters. So there is no need to | 319 | * We only use atomic operations to update counters. So there is no need to |
242 | * disable interrupts. | 320 | * disable interrupts. |
@@ -245,7 +323,12 @@ static inline void __dec_zone_page_state(struct page *page, | |||
245 | #define dec_zone_page_state __dec_zone_page_state | 323 | #define dec_zone_page_state __dec_zone_page_state |
246 | #define mod_zone_page_state __mod_zone_page_state | 324 | #define mod_zone_page_state __mod_zone_page_state |
247 | 325 | ||
326 | #define inc_node_page_state __inc_node_page_state | ||
327 | #define dec_node_page_state __dec_node_page_state | ||
328 | #define mod_node_page_state __mod_node_page_state | ||
329 | |||
248 | #define inc_zone_state __inc_zone_state | 330 | #define inc_zone_state __inc_zone_state |
331 | #define inc_node_state __inc_node_state | ||
249 | #define dec_zone_state __dec_zone_state | 332 | #define dec_zone_state __dec_zone_state |
250 | 333 | ||
251 | #define set_pgdat_percpu_threshold(pgdat, callback) { } | 334 | #define set_pgdat_percpu_threshold(pgdat, callback) { } |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 717e6149e753..fc1e16c25a29 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -320,7 +320,7 @@ void laptop_mode_timer_fn(unsigned long data); | |||
320 | static inline void laptop_sync_completion(void) { } | 320 | static inline void laptop_sync_completion(void) { } |
321 | #endif | 321 | #endif |
322 | void throttle_vm_writeout(gfp_t gfp_mask); | 322 | void throttle_vm_writeout(gfp_t gfp_mask); |
323 | bool zone_dirty_ok(struct zone *zone); | 323 | bool node_dirty_ok(struct pglist_data *pgdat); |
324 | int wb_domain_init(struct wb_domain *dom, gfp_t gfp); | 324 | int wb_domain_init(struct wb_domain *dom, gfp_t gfp); |
325 | #ifdef CONFIG_CGROUP_WRITEBACK | 325 | #ifdef CONFIG_CGROUP_WRITEBACK |
326 | void wb_domain_exit(struct wb_domain *dom); | 326 | void wb_domain_exit(struct wb_domain *dom); |