summaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 19:36:48 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 19:36:48 -0400
commit1c88e19b0f6a8471ee50d5062721ba30b8fd4ba9 (patch)
tree6d227487ca2cf391589c73af1c40ec7b7126feec /include/linux
parent6039b80eb50a893476fea7d56e86ed2d19290054 (diff)
parentc3486f5376696034d0fcbef8ba70c70cfcb26f51 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: "The rest of MM" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (101 commits) mm, compaction: simplify contended compaction handling mm, compaction: introduce direct compaction priority mm, thp: remove __GFP_NORETRY from khugepaged and madvised allocations mm, page_alloc: make THP-specific decisions more generic mm, page_alloc: restructure direct compaction handling in slowpath mm, page_alloc: don't retry initial attempt in slowpath mm, page_alloc: set alloc_flags only once in slowpath lib/stackdepot.c: use __GFP_NOWARN for stack allocations mm, kasan: switch SLUB to stackdepot, enable memory quarantine for SLUB mm, kasan: account for object redzone in SLUB's nearest_obj() mm: fix use-after-free if memory allocation failed in vma_adjust() zsmalloc: Delete an unnecessary check before the function call "iput" mm/memblock.c: fix index adjustment error in __next_mem_range_rev() mem-hotplug: alloc new page from a nearest neighbor node when mem-offline mm: optimize copy_page_to/from_iter_iovec mm: add cond_resched() to generic_swapfile_activate() Revert "mm, mempool: only set __GFP_NOMEMALLOC if there are free elements" mm, compaction: don't isolate PageWriteback pages in MIGRATE_SYNC_LIGHT mode mm: hwpoison: remove incorrect comments make __section_nr() more efficient ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/backing-dev.h2
-rw-r--r--include/linux/compaction.h33
-rw-r--r--include/linux/gfp.h14
-rw-r--r--include/linux/huge_mm.h2
-rw-r--r--include/linux/kasan.h2
-rw-r--r--include/linux/kdb.h2
-rw-r--r--include/linux/memblock.h1
-rw-r--r--include/linux/memcontrol.h70
-rw-r--r--include/linux/memremap.h2
-rw-r--r--include/linux/mm.h17
-rw-r--r--include/linux/mm_inline.h19
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/mmzone.h170
-rw-r--r--include/linux/oom.h26
-rw-r--r--include/linux/sched.h27
-rw-r--r--include/linux/slab_def.h3
-rw-r--r--include/linux/slub_def.h14
-rw-r--r--include/linux/swap.h23
-rw-r--r--include/linux/topology.h2
-rw-r--r--include/linux/vm_event_item.h14
-rw-r--r--include/linux/vmstat.h111
-rw-r--r--include/linux/writeback.h2
22 files changed, 370 insertions, 188 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index c82794f20110..491a91717788 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -197,7 +197,7 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits)
197} 197}
198 198
199long congestion_wait(int sync, long timeout); 199long congestion_wait(int sync, long timeout);
200long wait_iff_congested(struct zone *zone, int sync, long timeout); 200long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout);
201int pdflush_proc_obsolete(struct ctl_table *table, int write, 201int pdflush_proc_obsolete(struct ctl_table *table, int write,
202 void __user *buffer, size_t *lenp, loff_t *ppos); 202 void __user *buffer, size_t *lenp, loff_t *ppos);
203 203
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 1a02dab16646..d4e106b5dc27 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -1,6 +1,18 @@
1#ifndef _LINUX_COMPACTION_H 1#ifndef _LINUX_COMPACTION_H
2#define _LINUX_COMPACTION_H 2#define _LINUX_COMPACTION_H
3 3
4/*
5 * Determines how hard direct compaction should try to succeed.
6 * Lower value means higher priority, analogically to reclaim priority.
7 */
8enum compact_priority {
9 COMPACT_PRIO_SYNC_LIGHT,
10 MIN_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_LIGHT,
11 DEF_COMPACT_PRIORITY = COMPACT_PRIO_SYNC_LIGHT,
12 COMPACT_PRIO_ASYNC,
13 INIT_COMPACT_PRIORITY = COMPACT_PRIO_ASYNC
14};
15
4/* Return values for compact_zone() and try_to_compact_pages() */ 16/* Return values for compact_zone() and try_to_compact_pages() */
5/* When adding new states, please adjust include/trace/events/compaction.h */ 17/* When adding new states, please adjust include/trace/events/compaction.h */
6enum compact_result { 18enum compact_result {
@@ -43,14 +55,6 @@ enum compact_result {
43 COMPACT_PARTIAL, 55 COMPACT_PARTIAL,
44}; 56};
45 57
46/* Used to signal whether compaction detected need_sched() or lock contention */
47/* No contention detected */
48#define COMPACT_CONTENDED_NONE 0
49/* Either need_sched() was true or fatal signal pending */
50#define COMPACT_CONTENDED_SCHED 1
51/* Zone lock or lru_lock was contended in async compaction */
52#define COMPACT_CONTENDED_LOCK 2
53
54struct alloc_context; /* in mm/internal.h */ 58struct alloc_context; /* in mm/internal.h */
55 59
56#ifdef CONFIG_COMPACTION 60#ifdef CONFIG_COMPACTION
@@ -64,9 +68,8 @@ extern int sysctl_compact_unevictable_allowed;
64 68
65extern int fragmentation_index(struct zone *zone, unsigned int order); 69extern int fragmentation_index(struct zone *zone, unsigned int order);
66extern enum compact_result try_to_compact_pages(gfp_t gfp_mask, 70extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
67 unsigned int order, 71 unsigned int order, unsigned int alloc_flags,
68 unsigned int alloc_flags, const struct alloc_context *ac, 72 const struct alloc_context *ac, enum compact_priority prio);
69 enum migrate_mode mode, int *contended);
70extern void compact_pgdat(pg_data_t *pgdat, int order); 73extern void compact_pgdat(pg_data_t *pgdat, int order);
71extern void reset_isolation_suitable(pg_data_t *pgdat); 74extern void reset_isolation_suitable(pg_data_t *pgdat);
72extern enum compact_result compaction_suitable(struct zone *zone, int order, 75extern enum compact_result compaction_suitable(struct zone *zone, int order,
@@ -151,14 +154,6 @@ extern void kcompactd_stop(int nid);
151extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx); 154extern void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx);
152 155
153#else 156#else
154static inline enum compact_result try_to_compact_pages(gfp_t gfp_mask,
155 unsigned int order, int alloc_flags,
156 const struct alloc_context *ac,
157 enum migrate_mode mode, int *contended)
158{
159 return COMPACT_CONTINUE;
160}
161
162static inline void compact_pgdat(pg_data_t *pgdat, int order) 157static inline void compact_pgdat(pg_data_t *pgdat, int order)
163{ 158{
164} 159}
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c29e9d347bc6..f8041f9de31e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -237,9 +237,11 @@ struct vm_area_struct;
237 * are expected to be movable via page reclaim or page migration. Typically, 237 * are expected to be movable via page reclaim or page migration. Typically,
238 * pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE. 238 * pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE.
239 * 239 *
240 * GFP_TRANSHUGE is used for THP allocations. They are compound allocations 240 * GFP_TRANSHUGE and GFP_TRANSHUGE_LIGHT are used for THP allocations. They are
241 * that will fail quickly if memory is not available and will not wake 241 * compound allocations that will generally fail quickly if memory is not
242 * kswapd on failure. 242 * available and will not wake kswapd/kcompactd on failure. The _LIGHT
243 * version does not attempt reclaim/compaction at all and is by default used
244 * in page fault path, while the non-light is used by khugepaged.
243 */ 245 */
244#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) 246#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
245#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) 247#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
@@ -254,9 +256,9 @@ struct vm_area_struct;
254#define GFP_DMA32 __GFP_DMA32 256#define GFP_DMA32 __GFP_DMA32
255#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) 257#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
256#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) 258#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE)
257#define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ 259#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
258 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \ 260 __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
259 ~__GFP_RECLAIM) 261#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
260 262
261/* Convert GFP flags to their corresponding migrate type */ 263/* Convert GFP flags to their corresponding migrate type */
262#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) 264#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 92ce91c03cd0..6f14de45b5ce 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -11,7 +11,7 @@ extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
11 unsigned long addr, 11 unsigned long addr,
12 pmd_t *pmd, 12 pmd_t *pmd,
13 unsigned int flags); 13 unsigned int flags);
14extern int madvise_free_huge_pmd(struct mmu_gather *tlb, 14extern bool madvise_free_huge_pmd(struct mmu_gather *tlb,
15 struct vm_area_struct *vma, 15 struct vm_area_struct *vma,
16 pmd_t *pmd, unsigned long addr, unsigned long next); 16 pmd_t *pmd, unsigned long addr, unsigned long next);
17extern int zap_huge_pmd(struct mmu_gather *tlb, 17extern int zap_huge_pmd(struct mmu_gather *tlb,
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index ac4b3c46a84d..c9cf374445d8 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -77,6 +77,7 @@ void kasan_free_shadow(const struct vm_struct *vm);
77 77
78size_t ksize(const void *); 78size_t ksize(const void *);
79static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); } 79static inline void kasan_unpoison_slab(const void *ptr) { ksize(ptr); }
80size_t kasan_metadata_size(struct kmem_cache *cache);
80 81
81#else /* CONFIG_KASAN */ 82#else /* CONFIG_KASAN */
82 83
@@ -121,6 +122,7 @@ static inline int kasan_module_alloc(void *addr, size_t size) { return 0; }
121static inline void kasan_free_shadow(const struct vm_struct *vm) {} 122static inline void kasan_free_shadow(const struct vm_struct *vm) {}
122 123
123static inline void kasan_unpoison_slab(const void *ptr) { } 124static inline void kasan_unpoison_slab(const void *ptr) { }
125static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
124 126
125#endif /* CONFIG_KASAN */ 127#endif /* CONFIG_KASAN */
126 128
diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index a19bcf9e762e..410decacff8f 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -177,7 +177,7 @@ extern int kdb_get_kbd_char(void);
177static inline 177static inline
178int kdb_process_cpu(const struct task_struct *p) 178int kdb_process_cpu(const struct task_struct *p)
179{ 179{
180 unsigned int cpu = task_thread_info(p)->cpu; 180 unsigned int cpu = task_cpu(p);
181 if (cpu > num_possible_cpus()) 181 if (cpu > num_possible_cpus())
182 cpu = 0; 182 cpu = 0;
183 return cpu; 183 return cpu;
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 6c14b6179727..2925da23505d 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -332,6 +332,7 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn);
332phys_addr_t memblock_start_of_DRAM(void); 332phys_addr_t memblock_start_of_DRAM(void);
333phys_addr_t memblock_end_of_DRAM(void); 333phys_addr_t memblock_end_of_DRAM(void);
334void memblock_enforce_memory_limit(phys_addr_t memory_limit); 334void memblock_enforce_memory_limit(phys_addr_t memory_limit);
335void memblock_mem_limit_remove_map(phys_addr_t limit);
335bool memblock_is_memory(phys_addr_t addr); 336bool memblock_is_memory(phys_addr_t addr);
336int memblock_is_map_memory(phys_addr_t addr); 337int memblock_is_map_memory(phys_addr_t addr);
337int memblock_is_region_memory(phys_addr_t base, phys_addr_t size); 338int memblock_is_region_memory(phys_addr_t base, phys_addr_t size);
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 71aff733a497..5d8ca6e02e39 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -52,7 +52,7 @@ enum mem_cgroup_stat_index {
52 MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ 52 MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */
53 MEM_CGROUP_STAT_NSTATS, 53 MEM_CGROUP_STAT_NSTATS,
54 /* default hierarchy stats */ 54 /* default hierarchy stats */
55 MEMCG_KERNEL_STACK = MEM_CGROUP_STAT_NSTATS, 55 MEMCG_KERNEL_STACK_KB = MEM_CGROUP_STAT_NSTATS,
56 MEMCG_SLAB_RECLAIMABLE, 56 MEMCG_SLAB_RECLAIMABLE,
57 MEMCG_SLAB_UNRECLAIMABLE, 57 MEMCG_SLAB_UNRECLAIMABLE,
58 MEMCG_SOCK, 58 MEMCG_SOCK,
@@ -60,7 +60,7 @@ enum mem_cgroup_stat_index {
60}; 60};
61 61
62struct mem_cgroup_reclaim_cookie { 62struct mem_cgroup_reclaim_cookie {
63 struct zone *zone; 63 pg_data_t *pgdat;
64 int priority; 64 int priority;
65 unsigned int generation; 65 unsigned int generation;
66}; 66};
@@ -118,7 +118,7 @@ struct mem_cgroup_reclaim_iter {
118/* 118/*
119 * per-zone information in memory controller. 119 * per-zone information in memory controller.
120 */ 120 */
121struct mem_cgroup_per_zone { 121struct mem_cgroup_per_node {
122 struct lruvec lruvec; 122 struct lruvec lruvec;
123 unsigned long lru_size[NR_LRU_LISTS]; 123 unsigned long lru_size[NR_LRU_LISTS];
124 124
@@ -132,10 +132,6 @@ struct mem_cgroup_per_zone {
132 /* use container_of */ 132 /* use container_of */
133}; 133};
134 134
135struct mem_cgroup_per_node {
136 struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
137};
138
139struct mem_cgroup_threshold { 135struct mem_cgroup_threshold {
140 struct eventfd_ctx *eventfd; 136 struct eventfd_ctx *eventfd;
141 unsigned long threshold; 137 unsigned long threshold;
@@ -314,8 +310,46 @@ void mem_cgroup_uncharge_list(struct list_head *page_list);
314 310
315void mem_cgroup_migrate(struct page *oldpage, struct page *newpage); 311void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
316 312
317struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); 313static struct mem_cgroup_per_node *
318struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); 314mem_cgroup_nodeinfo(struct mem_cgroup *memcg, int nid)
315{
316 return memcg->nodeinfo[nid];
317}
318
319/**
320 * mem_cgroup_lruvec - get the lru list vector for a node or a memcg zone
321 * @node: node of the wanted lruvec
322 * @memcg: memcg of the wanted lruvec
323 *
324 * Returns the lru list vector holding pages for a given @node or a given
325 * @memcg and @zone. This can be the node lruvec, if the memory controller
326 * is disabled.
327 */
328static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
329 struct mem_cgroup *memcg)
330{
331 struct mem_cgroup_per_node *mz;
332 struct lruvec *lruvec;
333
334 if (mem_cgroup_disabled()) {
335 lruvec = node_lruvec(pgdat);
336 goto out;
337 }
338
339 mz = mem_cgroup_nodeinfo(memcg, pgdat->node_id);
340 lruvec = &mz->lruvec;
341out:
342 /*
343 * Since a node can be onlined after the mem_cgroup was created,
344 * we have to be prepared to initialize lruvec->pgdat here;
345 * and if offlined then reonlined, we need to reinitialize it.
346 */
347 if (unlikely(lruvec->pgdat != pgdat))
348 lruvec->pgdat = pgdat;
349 return lruvec;
350}
351
352struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
319 353
320bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg); 354bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
321struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); 355struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
@@ -404,9 +438,9 @@ unsigned long mem_cgroup_node_nr_lru_pages(struct mem_cgroup *memcg,
404static inline 438static inline
405unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) 439unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru)
406{ 440{
407 struct mem_cgroup_per_zone *mz; 441 struct mem_cgroup_per_node *mz;
408 442
409 mz = container_of(lruvec, struct mem_cgroup_per_zone, lruvec); 443 mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
410 return mz->lru_size[lru]; 444 return mz->lru_size[lru];
411} 445}
412 446
@@ -477,7 +511,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
477 mem_cgroup_update_page_stat(page, idx, -1); 511 mem_cgroup_update_page_stat(page, idx, -1);
478} 512}
479 513
480unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, 514unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
481 gfp_t gfp_mask, 515 gfp_t gfp_mask,
482 unsigned long *total_scanned); 516 unsigned long *total_scanned);
483 517
@@ -568,16 +602,16 @@ static inline void mem_cgroup_migrate(struct page *old, struct page *new)
568{ 602{
569} 603}
570 604
571static inline struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, 605static inline struct lruvec *mem_cgroup_lruvec(struct pglist_data *pgdat,
572 struct mem_cgroup *memcg) 606 struct mem_cgroup *memcg)
573{ 607{
574 return &zone->lruvec; 608 return node_lruvec(pgdat);
575} 609}
576 610
577static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page, 611static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
578 struct zone *zone) 612 struct pglist_data *pgdat)
579{ 613{
580 return &zone->lruvec; 614 return &pgdat->lruvec;
581} 615}
582 616
583static inline bool mm_match_cgroup(struct mm_struct *mm, 617static inline bool mm_match_cgroup(struct mm_struct *mm,
@@ -681,7 +715,7 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,
681} 715}
682 716
683static inline 717static inline
684unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, 718unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
685 gfp_t gfp_mask, 719 gfp_t gfp_mask,
686 unsigned long *total_scanned) 720 unsigned long *total_scanned)
687{ 721{
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index bcaa634139a9..93416196ba64 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -26,7 +26,7 @@ struct vmem_altmap {
26unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); 26unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
27void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); 27void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
28 28
29#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_ZONE_DEVICE) 29#ifdef CONFIG_ZONE_DEVICE
30struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start); 30struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
31#else 31#else
32static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) 32static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 192c1bbe5fcd..08ed53eeedd5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -933,6 +933,11 @@ static inline struct zone *page_zone(const struct page *page)
933 return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)]; 933 return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
934} 934}
935 935
936static inline pg_data_t *page_pgdat(const struct page *page)
937{
938 return NODE_DATA(page_to_nid(page));
939}
940
936#ifdef SECTION_IN_PAGE_FLAGS 941#ifdef SECTION_IN_PAGE_FLAGS
937static inline void set_page_section(struct page *page, unsigned long section) 942static inline void set_page_section(struct page *page, unsigned long section)
938{ 943{
@@ -973,11 +978,21 @@ static inline struct mem_cgroup *page_memcg(struct page *page)
973{ 978{
974 return page->mem_cgroup; 979 return page->mem_cgroup;
975} 980}
981static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
982{
983 WARN_ON_ONCE(!rcu_read_lock_held());
984 return READ_ONCE(page->mem_cgroup);
985}
976#else 986#else
977static inline struct mem_cgroup *page_memcg(struct page *page) 987static inline struct mem_cgroup *page_memcg(struct page *page)
978{ 988{
979 return NULL; 989 return NULL;
980} 990}
991static inline struct mem_cgroup *page_memcg_rcu(struct page *page)
992{
993 WARN_ON_ONCE(!rcu_read_lock_held());
994 return NULL;
995}
981#endif 996#endif
982 997
983/* 998/*
@@ -2284,6 +2299,8 @@ static inline int in_gate_area(struct mm_struct *mm, unsigned long addr)
2284} 2299}
2285#endif /* __HAVE_ARCH_GATE_AREA */ 2300#endif /* __HAVE_ARCH_GATE_AREA */
2286 2301
2302extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm);
2303
2287#ifdef CONFIG_SYSCTL 2304#ifdef CONFIG_SYSCTL
2288extern int sysctl_drop_caches; 2305extern int sysctl_drop_caches;
2289int drop_caches_sysctl_handler(struct ctl_table *, int, 2306int drop_caches_sysctl_handler(struct ctl_table *, int,
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 5bd29ba4f174..71613e8a720f 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -23,25 +23,30 @@ static inline int page_is_file_cache(struct page *page)
23} 23}
24 24
25static __always_inline void __update_lru_size(struct lruvec *lruvec, 25static __always_inline void __update_lru_size(struct lruvec *lruvec,
26 enum lru_list lru, int nr_pages) 26 enum lru_list lru, enum zone_type zid,
27 int nr_pages)
27{ 28{
28 __mod_zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru, nr_pages); 29 struct pglist_data *pgdat = lruvec_pgdat(lruvec);
30
31 __mod_node_page_state(pgdat, NR_LRU_BASE + lru, nr_pages);
32 __mod_zone_page_state(&pgdat->node_zones[zid],
33 NR_ZONE_LRU_BASE + lru, nr_pages);
29} 34}
30 35
31static __always_inline void update_lru_size(struct lruvec *lruvec, 36static __always_inline void update_lru_size(struct lruvec *lruvec,
32 enum lru_list lru, int nr_pages) 37 enum lru_list lru, enum zone_type zid,
38 int nr_pages)
33{ 39{
40 __update_lru_size(lruvec, lru, zid, nr_pages);
34#ifdef CONFIG_MEMCG 41#ifdef CONFIG_MEMCG
35 mem_cgroup_update_lru_size(lruvec, lru, nr_pages); 42 mem_cgroup_update_lru_size(lruvec, lru, nr_pages);
36#else
37 __update_lru_size(lruvec, lru, nr_pages);
38#endif 43#endif
39} 44}
40 45
41static __always_inline void add_page_to_lru_list(struct page *page, 46static __always_inline void add_page_to_lru_list(struct page *page,
42 struct lruvec *lruvec, enum lru_list lru) 47 struct lruvec *lruvec, enum lru_list lru)
43{ 48{
44 update_lru_size(lruvec, lru, hpage_nr_pages(page)); 49 update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page));
45 list_add(&page->lru, &lruvec->lists[lru]); 50 list_add(&page->lru, &lruvec->lists[lru]);
46} 51}
47 52
@@ -49,7 +54,7 @@ static __always_inline void del_page_from_lru_list(struct page *page,
49 struct lruvec *lruvec, enum lru_list lru) 54 struct lruvec *lruvec, enum lru_list lru)
50{ 55{
51 list_del(&page->lru); 56 list_del(&page->lru);
52 update_lru_size(lruvec, lru, -hpage_nr_pages(page)); 57 update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page));
53} 58}
54 59
55/** 60/**
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 79472b22d23f..903200f4ec41 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -118,7 +118,7 @@ struct page {
118 */ 118 */
119 union { 119 union {
120 struct list_head lru; /* Pageout list, eg. active_list 120 struct list_head lru; /* Pageout list, eg. active_list
121 * protected by zone->lru_lock ! 121 * protected by zone_lru_lock !
122 * Can be used as a generic list 122 * Can be used as a generic list
123 * by the page owner. 123 * by the page owner.
124 */ 124 */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 19425e988bdc..f2e4e90621ec 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -93,7 +93,7 @@ struct free_area {
93struct pglist_data; 93struct pglist_data;
94 94
95/* 95/*
96 * zone->lock and zone->lru_lock are two of the hottest locks in the kernel. 96 * zone->lock and the zone lru_lock are two of the hottest locks in the kernel.
97 * So add a wild amount of padding here to ensure that they fall into separate 97 * So add a wild amount of padding here to ensure that they fall into separate
98 * cachelines. There are very few zone structures in the machine, so space 98 * cachelines. There are very few zone structures in the machine, so space
99 * consumption is not a concern here. 99 * consumption is not a concern here.
@@ -110,36 +110,20 @@ struct zone_padding {
110enum zone_stat_item { 110enum zone_stat_item {
111 /* First 128 byte cacheline (assuming 64 bit words) */ 111 /* First 128 byte cacheline (assuming 64 bit words) */
112 NR_FREE_PAGES, 112 NR_FREE_PAGES,
113 NR_ALLOC_BATCH, 113 NR_ZONE_LRU_BASE, /* Used only for compaction and reclaim retry */
114 NR_LRU_BASE, 114 NR_ZONE_INACTIVE_ANON = NR_ZONE_LRU_BASE,
115 NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */ 115 NR_ZONE_ACTIVE_ANON,
116 NR_ACTIVE_ANON, /* " " " " " */ 116 NR_ZONE_INACTIVE_FILE,
117 NR_INACTIVE_FILE, /* " " " " " */ 117 NR_ZONE_ACTIVE_FILE,
118 NR_ACTIVE_FILE, /* " " " " " */ 118 NR_ZONE_UNEVICTABLE,
119 NR_UNEVICTABLE, /* " " " " " */ 119 NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
120 NR_MLOCK, /* mlock()ed pages found and moved off LRU */ 120 NR_MLOCK, /* mlock()ed pages found and moved off LRU */
121 NR_ANON_PAGES, /* Mapped anonymous pages */
122 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
123 only modified from process context */
124 NR_FILE_PAGES,
125 NR_FILE_DIRTY,
126 NR_WRITEBACK,
127 NR_SLAB_RECLAIMABLE, 121 NR_SLAB_RECLAIMABLE,
128 NR_SLAB_UNRECLAIMABLE, 122 NR_SLAB_UNRECLAIMABLE,
129 NR_PAGETABLE, /* used for pagetables */ 123 NR_PAGETABLE, /* used for pagetables */
130 NR_KERNEL_STACK, 124 NR_KERNEL_STACK_KB, /* measured in KiB */
131 /* Second 128 byte cacheline */ 125 /* Second 128 byte cacheline */
132 NR_UNSTABLE_NFS, /* NFS unstable pages */
133 NR_BOUNCE, 126 NR_BOUNCE,
134 NR_VMSCAN_WRITE,
135 NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
136 NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
137 NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
138 NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
139 NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
140 NR_DIRTIED, /* page dirtyings since bootup */
141 NR_WRITTEN, /* page writings since bootup */
142 NR_PAGES_SCANNED, /* pages scanned since last reclaim */
143#if IS_ENABLED(CONFIG_ZSMALLOC) 127#if IS_ENABLED(CONFIG_ZSMALLOC)
144 NR_ZSPAGES, /* allocated in zsmalloc */ 128 NR_ZSPAGES, /* allocated in zsmalloc */
145#endif 129#endif
@@ -151,14 +135,40 @@ enum zone_stat_item {
151 NUMA_LOCAL, /* allocation from local node */ 135 NUMA_LOCAL, /* allocation from local node */
152 NUMA_OTHER, /* allocation from other node */ 136 NUMA_OTHER, /* allocation from other node */
153#endif 137#endif
138 NR_FREE_CMA_PAGES,
139 NR_VM_ZONE_STAT_ITEMS };
140
141enum node_stat_item {
142 NR_LRU_BASE,
143 NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
144 NR_ACTIVE_ANON, /* " " " " " */
145 NR_INACTIVE_FILE, /* " " " " " */
146 NR_ACTIVE_FILE, /* " " " " " */
147 NR_UNEVICTABLE, /* " " " " " */
148 NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */
149 NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */
150 NR_PAGES_SCANNED, /* pages scanned since last reclaim */
154 WORKINGSET_REFAULT, 151 WORKINGSET_REFAULT,
155 WORKINGSET_ACTIVATE, 152 WORKINGSET_ACTIVATE,
156 WORKINGSET_NODERECLAIM, 153 WORKINGSET_NODERECLAIM,
157 NR_ANON_THPS, 154 NR_ANON_MAPPED, /* Mapped anonymous pages */
155 NR_FILE_MAPPED, /* pagecache pages mapped into pagetables.
156 only modified from process context */
157 NR_FILE_PAGES,
158 NR_FILE_DIRTY,
159 NR_WRITEBACK,
160 NR_WRITEBACK_TEMP, /* Writeback using temporary buffers */
161 NR_SHMEM, /* shmem pages (included tmpfs/GEM pages) */
158 NR_SHMEM_THPS, 162 NR_SHMEM_THPS,
159 NR_SHMEM_PMDMAPPED, 163 NR_SHMEM_PMDMAPPED,
160 NR_FREE_CMA_PAGES, 164 NR_ANON_THPS,
161 NR_VM_ZONE_STAT_ITEMS }; 165 NR_UNSTABLE_NFS, /* NFS unstable pages */
166 NR_VMSCAN_WRITE,
167 NR_VMSCAN_IMMEDIATE, /* Prioritise for reclaim when writeback ends */
168 NR_DIRTIED, /* page dirtyings since bootup */
169 NR_WRITTEN, /* page writings since bootup */
170 NR_VM_NODE_STAT_ITEMS
171};
162 172
163/* 173/*
164 * We do arithmetic on the LRU lists in various places in the code, 174 * We do arithmetic on the LRU lists in various places in the code,
@@ -215,7 +225,7 @@ struct lruvec {
215 /* Evictions & activations on the inactive file list */ 225 /* Evictions & activations on the inactive file list */
216 atomic_long_t inactive_age; 226 atomic_long_t inactive_age;
217#ifdef CONFIG_MEMCG 227#ifdef CONFIG_MEMCG
218 struct zone *zone; 228 struct pglist_data *pgdat;
219#endif 229#endif
220}; 230};
221 231
@@ -267,6 +277,11 @@ struct per_cpu_pageset {
267#endif 277#endif
268}; 278};
269 279
280struct per_cpu_nodestat {
281 s8 stat_threshold;
282 s8 vm_node_stat_diff[NR_VM_NODE_STAT_ITEMS];
283};
284
270#endif /* !__GENERATING_BOUNDS.H */ 285#endif /* !__GENERATING_BOUNDS.H */
271 286
272enum zone_type { 287enum zone_type {
@@ -348,22 +363,9 @@ struct zone {
348#ifdef CONFIG_NUMA 363#ifdef CONFIG_NUMA
349 int node; 364 int node;
350#endif 365#endif
351
352 /*
353 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
354 * this zone's LRU. Maintained by the pageout code.
355 */
356 unsigned int inactive_ratio;
357
358 struct pglist_data *zone_pgdat; 366 struct pglist_data *zone_pgdat;
359 struct per_cpu_pageset __percpu *pageset; 367 struct per_cpu_pageset __percpu *pageset;
360 368
361 /*
362 * This is a per-zone reserve of pages that are not available
363 * to userspace allocations.
364 */
365 unsigned long totalreserve_pages;
366
367#ifndef CONFIG_SPARSEMEM 369#ifndef CONFIG_SPARSEMEM
368 /* 370 /*
369 * Flags for a pageblock_nr_pages block. See pageblock-flags.h. 371 * Flags for a pageblock_nr_pages block. See pageblock-flags.h.
@@ -372,14 +374,6 @@ struct zone {
372 unsigned long *pageblock_flags; 374 unsigned long *pageblock_flags;
373#endif /* CONFIG_SPARSEMEM */ 375#endif /* CONFIG_SPARSEMEM */
374 376
375#ifdef CONFIG_NUMA
376 /*
377 * zone reclaim becomes active if more unmapped pages exist.
378 */
379 unsigned long min_unmapped_pages;
380 unsigned long min_slab_pages;
381#endif /* CONFIG_NUMA */
382
383 /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */ 377 /* zone_start_pfn == zone_start_paddr >> PAGE_SHIFT */
384 unsigned long zone_start_pfn; 378 unsigned long zone_start_pfn;
385 379
@@ -472,24 +466,21 @@ struct zone {
472 unsigned long wait_table_hash_nr_entries; 466 unsigned long wait_table_hash_nr_entries;
473 unsigned long wait_table_bits; 467 unsigned long wait_table_bits;
474 468
469 /* Write-intensive fields used from the page allocator */
475 ZONE_PADDING(_pad1_) 470 ZONE_PADDING(_pad1_)
471
476 /* free areas of different sizes */ 472 /* free areas of different sizes */
477 struct free_area free_area[MAX_ORDER]; 473 struct free_area free_area[MAX_ORDER];
478 474
479 /* zone flags, see below */ 475 /* zone flags, see below */
480 unsigned long flags; 476 unsigned long flags;
481 477
482 /* Write-intensive fields used from the page allocator */ 478 /* Primarily protects free_area */
483 spinlock_t lock; 479 spinlock_t lock;
484 480
481 /* Write-intensive fields used by compaction and vmstats. */
485 ZONE_PADDING(_pad2_) 482 ZONE_PADDING(_pad2_)
486 483
487 /* Write-intensive fields used by page reclaim */
488
489 /* Fields commonly accessed by the page reclaim scanner */
490 spinlock_t lru_lock;
491 struct lruvec lruvec;
492
493 /* 484 /*
494 * When free pages are below this point, additional steps are taken 485 * When free pages are below this point, additional steps are taken
495 * when reading the number of free pages to avoid per-cpu counter 486 * when reading the number of free pages to avoid per-cpu counter
@@ -527,19 +518,18 @@ struct zone {
527 atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; 518 atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
528} ____cacheline_internodealigned_in_smp; 519} ____cacheline_internodealigned_in_smp;
529 520
530enum zone_flags { 521enum pgdat_flags {
531 ZONE_RECLAIM_LOCKED, /* prevents concurrent reclaim */ 522 PGDAT_CONGESTED, /* pgdat has many dirty pages backed by
532 ZONE_CONGESTED, /* zone has many dirty pages backed by
533 * a congested BDI 523 * a congested BDI
534 */ 524 */
535 ZONE_DIRTY, /* reclaim scanning has recently found 525 PGDAT_DIRTY, /* reclaim scanning has recently found
536 * many dirty file pages at the tail 526 * many dirty file pages at the tail
537 * of the LRU. 527 * of the LRU.
538 */ 528 */
539 ZONE_WRITEBACK, /* reclaim scanning has recently found 529 PGDAT_WRITEBACK, /* reclaim scanning has recently found
540 * many pages under writeback 530 * many pages under writeback
541 */ 531 */
542 ZONE_FAIR_DEPLETED, /* fair zone policy batch depleted */ 532 PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */
543}; 533};
544 534
545static inline unsigned long zone_end_pfn(const struct zone *zone) 535static inline unsigned long zone_end_pfn(const struct zone *zone)
@@ -663,8 +653,9 @@ typedef struct pglist_data {
663 wait_queue_head_t pfmemalloc_wait; 653 wait_queue_head_t pfmemalloc_wait;
664 struct task_struct *kswapd; /* Protected by 654 struct task_struct *kswapd; /* Protected by
665 mem_hotplug_begin/end() */ 655 mem_hotplug_begin/end() */
666 int kswapd_max_order; 656 int kswapd_order;
667 enum zone_type classzone_idx; 657 enum zone_type kswapd_classzone_idx;
658
668#ifdef CONFIG_COMPACTION 659#ifdef CONFIG_COMPACTION
669 int kcompactd_max_order; 660 int kcompactd_max_order;
670 enum zone_type kcompactd_classzone_idx; 661 enum zone_type kcompactd_classzone_idx;
@@ -681,6 +672,23 @@ typedef struct pglist_data {
681 /* Number of pages migrated during the rate limiting time interval */ 672 /* Number of pages migrated during the rate limiting time interval */
682 unsigned long numabalancing_migrate_nr_pages; 673 unsigned long numabalancing_migrate_nr_pages;
683#endif 674#endif
675 /*
676 * This is a per-node reserve of pages that are not available
677 * to userspace allocations.
678 */
679 unsigned long totalreserve_pages;
680
681#ifdef CONFIG_NUMA
682 /*
683 * zone reclaim becomes active if more unmapped pages exist.
684 */
685 unsigned long min_unmapped_pages;
686 unsigned long min_slab_pages;
687#endif /* CONFIG_NUMA */
688
689 /* Write-intensive fields used by page reclaim */
690 ZONE_PADDING(_pad1_)
691 spinlock_t lru_lock;
684 692
685#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT 693#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
686 /* 694 /*
@@ -695,6 +703,23 @@ typedef struct pglist_data {
695 struct list_head split_queue; 703 struct list_head split_queue;
696 unsigned long split_queue_len; 704 unsigned long split_queue_len;
697#endif 705#endif
706
707 /* Fields commonly accessed by the page reclaim scanner */
708 struct lruvec lruvec;
709
710 /*
711 * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on
712 * this node's LRU. Maintained by the pageout code.
713 */
714 unsigned int inactive_ratio;
715
716 unsigned long flags;
717
718 ZONE_PADDING(_pad2_)
719
720 /* Per-node vmstats */
721 struct per_cpu_nodestat __percpu *per_cpu_nodestats;
722 atomic_long_t vm_stat[NR_VM_NODE_STAT_ITEMS];
698} pg_data_t; 723} pg_data_t;
699 724
700#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages) 725#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
@@ -708,6 +733,15 @@ typedef struct pglist_data {
708 733
709#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn) 734#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)
710#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid)) 735#define node_end_pfn(nid) pgdat_end_pfn(NODE_DATA(nid))
736static inline spinlock_t *zone_lru_lock(struct zone *zone)
737{
738 return &zone->zone_pgdat->lru_lock;
739}
740
741static inline struct lruvec *node_lruvec(struct pglist_data *pgdat)
742{
743 return &pgdat->lruvec;
744}
711 745
712static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat) 746static inline unsigned long pgdat_end_pfn(pg_data_t *pgdat)
713{ 747{
@@ -760,12 +794,12 @@ extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn,
760 794
761extern void lruvec_init(struct lruvec *lruvec); 795extern void lruvec_init(struct lruvec *lruvec);
762 796
763static inline struct zone *lruvec_zone(struct lruvec *lruvec) 797static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
764{ 798{
765#ifdef CONFIG_MEMCG 799#ifdef CONFIG_MEMCG
766 return lruvec->zone; 800 return lruvec->pgdat;
767#else 801#else
768 return container_of(lruvec, struct zone, lruvec); 802 return container_of(lruvec, struct pglist_data, lruvec);
769#endif 803#endif
770} 804}
771 805
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 606137b3b778..5bc0457ee3a8 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -73,9 +73,9 @@ static inline bool oom_task_origin(const struct task_struct *p)
73extern void mark_oom_victim(struct task_struct *tsk); 73extern void mark_oom_victim(struct task_struct *tsk);
74 74
75#ifdef CONFIG_MMU 75#ifdef CONFIG_MMU
76extern void try_oom_reaper(struct task_struct *tsk); 76extern void wake_oom_reaper(struct task_struct *tsk);
77#else 77#else
78static inline void try_oom_reaper(struct task_struct *tsk) 78static inline void wake_oom_reaper(struct task_struct *tsk)
79{ 79{
80} 80}
81#endif 81#endif
@@ -107,27 +107,7 @@ extern void oom_killer_enable(void);
107 107
108extern struct task_struct *find_lock_task_mm(struct task_struct *p); 108extern struct task_struct *find_lock_task_mm(struct task_struct *p);
109 109
110static inline bool task_will_free_mem(struct task_struct *task) 110bool task_will_free_mem(struct task_struct *task);
111{
112 struct signal_struct *sig = task->signal;
113
114 /*
115 * A coredumping process may sleep for an extended period in exit_mm(),
116 * so the oom killer cannot assume that the process will promptly exit
117 * and release memory.
118 */
119 if (sig->flags & SIGNAL_GROUP_COREDUMP)
120 return false;
121
122 if (!(task->flags & PF_EXITING))
123 return false;
124
125 /* Make sure that the whole thread group is going down */
126 if (!thread_group_empty(task) && !(sig->flags & SIGNAL_GROUP_EXIT))
127 return false;
128
129 return true;
130}
131 111
132/* sysctls */ 112/* sysctls */
133extern int sysctl_oom_dump_tasks; 113extern int sysctl_oom_dump_tasks;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d99218a1e043..553af2923824 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -523,6 +523,7 @@ static inline int get_dumpable(struct mm_struct *mm)
523#define MMF_HAS_UPROBES 19 /* has uprobes */ 523#define MMF_HAS_UPROBES 19 /* has uprobes */
524#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ 524#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */
525#define MMF_OOM_REAPED 21 /* mm has been already reaped */ 525#define MMF_OOM_REAPED 21 /* mm has been already reaped */
526#define MMF_OOM_NOT_REAPABLE 22 /* mm couldn't be reaped */
526 527
527#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK) 528#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
528 529
@@ -1949,6 +1950,32 @@ static inline int tsk_nr_cpus_allowed(struct task_struct *p)
1949#define TNF_FAULT_LOCAL 0x08 1950#define TNF_FAULT_LOCAL 0x08
1950#define TNF_MIGRATE_FAIL 0x10 1951#define TNF_MIGRATE_FAIL 0x10
1951 1952
1953static inline bool in_vfork(struct task_struct *tsk)
1954{
1955 bool ret;
1956
1957 /*
1958 * need RCU to access ->real_parent if CLONE_VM was used along with
1959 * CLONE_PARENT.
1960 *
1961 * We check real_parent->mm == tsk->mm because CLONE_VFORK does not
1962 * imply CLONE_VM
1963 *
1964 * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus
1965 * ->real_parent is not necessarily the task doing vfork(), so in
1966 * theory we can't rely on task_lock() if we want to dereference it.
1967 *
1968 * And in this case we can't trust the real_parent->mm == tsk->mm
1969 * check, it can be false negative. But we do not care, if init or
1970 * another oom-unkillable task does this it should blame itself.
1971 */
1972 rcu_read_lock();
1973 ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
1974 rcu_read_unlock();
1975
1976 return ret;
1977}
1978
1952#ifdef CONFIG_NUMA_BALANCING 1979#ifdef CONFIG_NUMA_BALANCING
1953extern void task_numa_fault(int last_node, int node, int pages, int flags); 1980extern void task_numa_fault(int last_node, int node, int pages, int flags);
1954extern pid_t task_numa_group_id(struct task_struct *p); 1981extern pid_t task_numa_group_id(struct task_struct *p);
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 339ba027ade9..4ad2c5a26399 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -88,7 +88,8 @@ struct kmem_cache {
88}; 88};
89 89
90static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, 90static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
91 void *x) { 91 void *x)
92{
92 void *object = x - (x - page->s_mem) % cache->size; 93 void *object = x - (x - page->s_mem) % cache->size;
93 void *last_object = page->s_mem + (cache->num - 1) * cache->size; 94 void *last_object = page->s_mem + (cache->num - 1) * cache->size;
94 95
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 5624c1f3eb0a..75f56c2ef2d4 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -104,6 +104,10 @@ struct kmem_cache {
104 unsigned int *random_seq; 104 unsigned int *random_seq;
105#endif 105#endif
106 106
107#ifdef CONFIG_KASAN
108 struct kasan_cache kasan_info;
109#endif
110
107 struct kmem_cache_node *node[MAX_NUMNODES]; 111 struct kmem_cache_node *node[MAX_NUMNODES];
108}; 112};
109 113
@@ -119,15 +123,17 @@ static inline void sysfs_slab_remove(struct kmem_cache *s)
119void object_err(struct kmem_cache *s, struct page *page, 123void object_err(struct kmem_cache *s, struct page *page,
120 u8 *object, char *reason); 124 u8 *object, char *reason);
121 125
126void *fixup_red_left(struct kmem_cache *s, void *p);
127
122static inline void *nearest_obj(struct kmem_cache *cache, struct page *page, 128static inline void *nearest_obj(struct kmem_cache *cache, struct page *page,
123 void *x) { 129 void *x) {
124 void *object = x - (x - page_address(page)) % cache->size; 130 void *object = x - (x - page_address(page)) % cache->size;
125 void *last_object = page_address(page) + 131 void *last_object = page_address(page) +
126 (page->objects - 1) * cache->size; 132 (page->objects - 1) * cache->size;
127 if (unlikely(object > last_object)) 133 void *result = (unlikely(object > last_object)) ? last_object : object;
128 return last_object; 134
129 else 135 result = fixup_red_left(cache, result);
130 return object; 136 return result;
131} 137}
132 138
133#endif /* _LINUX_SLUB_DEF_H */ 139#endif /* _LINUX_SLUB_DEF_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0af2bb2028fd..b17cc4830fa6 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -157,15 +157,6 @@ enum {
157#define SWAP_CLUSTER_MAX 32UL 157#define SWAP_CLUSTER_MAX 32UL
158#define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX 158#define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
159 159
160/*
161 * Ratio between zone->managed_pages and the "gap" that above the per-zone
162 * "high_wmark". While balancing nodes, We allow kswapd to shrink zones that
163 * do not meet the (high_wmark + gap) watermark, even which already met the
164 * high_wmark, in order to provide better per-zone lru behavior. We are ok to
165 * spend not more than 1% of the memory for this zone balancing "gap".
166 */
167#define KSWAPD_ZONE_BALANCE_GAP_RATIO 100
168
169#define SWAP_MAP_MAX 0x3e /* Max duplication count, in first swap_map */ 160#define SWAP_MAP_MAX 0x3e /* Max duplication count, in first swap_map */
170#define SWAP_MAP_BAD 0x3f /* Note pageblock is bad, in first swap_map */ 161#define SWAP_MAP_BAD 0x3f /* Note pageblock is bad, in first swap_map */
171#define SWAP_HAS_CACHE 0x40 /* Flag page is cached, in first swap_map */ 162#define SWAP_HAS_CACHE 0x40 /* Flag page is cached, in first swap_map */
@@ -317,6 +308,7 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
317 308
318/* linux/mm/vmscan.c */ 309/* linux/mm/vmscan.c */
319extern unsigned long zone_reclaimable_pages(struct zone *zone); 310extern unsigned long zone_reclaimable_pages(struct zone *zone);
311extern unsigned long pgdat_reclaimable_pages(struct pglist_data *pgdat);
320extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, 312extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
321 gfp_t gfp_mask, nodemask_t *mask); 313 gfp_t gfp_mask, nodemask_t *mask);
322extern int __isolate_lru_page(struct page *page, isolate_mode_t mode); 314extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
@@ -324,9 +316,9 @@ extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
324 unsigned long nr_pages, 316 unsigned long nr_pages,
325 gfp_t gfp_mask, 317 gfp_t gfp_mask,
326 bool may_swap); 318 bool may_swap);
327extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem, 319extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
328 gfp_t gfp_mask, bool noswap, 320 gfp_t gfp_mask, bool noswap,
329 struct zone *zone, 321 pg_data_t *pgdat,
330 unsigned long *nr_scanned); 322 unsigned long *nr_scanned);
331extern unsigned long shrink_all_memory(unsigned long nr_pages); 323extern unsigned long shrink_all_memory(unsigned long nr_pages);
332extern int vm_swappiness; 324extern int vm_swappiness;
@@ -334,13 +326,14 @@ extern int remove_mapping(struct address_space *mapping, struct page *page);
334extern unsigned long vm_total_pages; 326extern unsigned long vm_total_pages;
335 327
336#ifdef CONFIG_NUMA 328#ifdef CONFIG_NUMA
337extern int zone_reclaim_mode; 329extern int node_reclaim_mode;
338extern int sysctl_min_unmapped_ratio; 330extern int sysctl_min_unmapped_ratio;
339extern int sysctl_min_slab_ratio; 331extern int sysctl_min_slab_ratio;
340extern int zone_reclaim(struct zone *, gfp_t, unsigned int); 332extern int node_reclaim(struct pglist_data *, gfp_t, unsigned int);
341#else 333#else
342#define zone_reclaim_mode 0 334#define node_reclaim_mode 0
343static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order) 335static inline int node_reclaim(struct pglist_data *pgdat, gfp_t mask,
336 unsigned int order)
344{ 337{
345 return 0; 338 return 0;
346} 339}
diff --git a/include/linux/topology.h b/include/linux/topology.h
index afce69296ac0..cb0775e1ee4b 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -54,7 +54,7 @@ int arch_update_cpu_topology(void);
54/* 54/*
55 * If the distance between nodes in a system is larger than RECLAIM_DISTANCE 55 * If the distance between nodes in a system is larger than RECLAIM_DISTANCE
56 * (in whatever arch specific measurement units returned by node_distance()) 56 * (in whatever arch specific measurement units returned by node_distance())
57 * and zone_reclaim_mode is enabled then the VM will only call zone_reclaim() 57 * and node_reclaim_mode is enabled then the VM will only call node_reclaim()
58 * on nodes within this distance. 58 * on nodes within this distance.
59 */ 59 */
60#define RECLAIM_DISTANCE 30 60#define RECLAIM_DISTANCE 30
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 42604173f122..4d6ec58a8d45 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -23,21 +23,23 @@
23 23
24enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, 24enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
25 FOR_ALL_ZONES(PGALLOC), 25 FOR_ALL_ZONES(PGALLOC),
26 FOR_ALL_ZONES(ALLOCSTALL),
27 FOR_ALL_ZONES(PGSCAN_SKIP),
26 PGFREE, PGACTIVATE, PGDEACTIVATE, 28 PGFREE, PGACTIVATE, PGDEACTIVATE,
27 PGFAULT, PGMAJFAULT, 29 PGFAULT, PGMAJFAULT,
28 PGLAZYFREED, 30 PGLAZYFREED,
29 FOR_ALL_ZONES(PGREFILL), 31 PGREFILL,
30 FOR_ALL_ZONES(PGSTEAL_KSWAPD), 32 PGSTEAL_KSWAPD,
31 FOR_ALL_ZONES(PGSTEAL_DIRECT), 33 PGSTEAL_DIRECT,
32 FOR_ALL_ZONES(PGSCAN_KSWAPD), 34 PGSCAN_KSWAPD,
33 FOR_ALL_ZONES(PGSCAN_DIRECT), 35 PGSCAN_DIRECT,
34 PGSCAN_DIRECT_THROTTLE, 36 PGSCAN_DIRECT_THROTTLE,
35#ifdef CONFIG_NUMA 37#ifdef CONFIG_NUMA
36 PGSCAN_ZONE_RECLAIM_FAILED, 38 PGSCAN_ZONE_RECLAIM_FAILED,
37#endif 39#endif
38 PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL, 40 PGINODESTEAL, SLABS_SCANNED, KSWAPD_INODESTEAL,
39 KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY, 41 KSWAPD_LOW_WMARK_HIT_QUICKLY, KSWAPD_HIGH_WMARK_HIT_QUICKLY,
40 PAGEOUTRUN, ALLOCSTALL, PGROTATED, 42 PAGEOUTRUN, PGROTATED,
41 DROP_PAGECACHE, DROP_SLAB, 43 DROP_PAGECACHE, DROP_SLAB,
42#ifdef CONFIG_NUMA_BALANCING 44#ifdef CONFIG_NUMA_BALANCING
43 NUMA_PTE_UPDATES, 45 NUMA_PTE_UPDATES,
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index d2da8e053210..613771909b6e 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -101,25 +101,42 @@ static inline void vm_events_fold_cpu(int cpu)
101#define count_vm_vmacache_event(x) do {} while (0) 101#define count_vm_vmacache_event(x) do {} while (0)
102#endif 102#endif
103 103
104#define __count_zone_vm_events(item, zone, delta) \ 104#define __count_zid_vm_events(item, zid, delta) \
105 __count_vm_events(item##_NORMAL - ZONE_NORMAL + \ 105 __count_vm_events(item##_NORMAL - ZONE_NORMAL + zid, delta)
106 zone_idx(zone), delta)
107 106
108/* 107/*
109 * Zone based page accounting with per cpu differentials. 108 * Zone and node-based page accounting with per cpu differentials.
110 */ 109 */
111extern atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; 110extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
111extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
112 112
113static inline void zone_page_state_add(long x, struct zone *zone, 113static inline void zone_page_state_add(long x, struct zone *zone,
114 enum zone_stat_item item) 114 enum zone_stat_item item)
115{ 115{
116 atomic_long_add(x, &zone->vm_stat[item]); 116 atomic_long_add(x, &zone->vm_stat[item]);
117 atomic_long_add(x, &vm_stat[item]); 117 atomic_long_add(x, &vm_zone_stat[item]);
118}
119
120static inline void node_page_state_add(long x, struct pglist_data *pgdat,
121 enum node_stat_item item)
122{
123 atomic_long_add(x, &pgdat->vm_stat[item]);
124 atomic_long_add(x, &vm_node_stat[item]);
118} 125}
119 126
120static inline unsigned long global_page_state(enum zone_stat_item item) 127static inline unsigned long global_page_state(enum zone_stat_item item)
121{ 128{
122 long x = atomic_long_read(&vm_stat[item]); 129 long x = atomic_long_read(&vm_zone_stat[item]);
130#ifdef CONFIG_SMP
131 if (x < 0)
132 x = 0;
133#endif
134 return x;
135}
136
137static inline unsigned long global_node_page_state(enum node_stat_item item)
138{
139 long x = atomic_long_read(&vm_node_stat[item]);
123#ifdef CONFIG_SMP 140#ifdef CONFIG_SMP
124 if (x < 0) 141 if (x < 0)
125 x = 0; 142 x = 0;
@@ -160,32 +177,61 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
160 return x; 177 return x;
161} 178}
162 179
163#ifdef CONFIG_NUMA 180static inline unsigned long node_page_state_snapshot(pg_data_t *pgdat,
181 enum node_stat_item item)
182{
183 long x = atomic_long_read(&pgdat->vm_stat[item]);
164 184
165extern unsigned long node_page_state(int node, enum zone_stat_item item); 185#ifdef CONFIG_SMP
186 int cpu;
187 for_each_online_cpu(cpu)
188 x += per_cpu_ptr(pgdat->per_cpu_nodestats, cpu)->vm_node_stat_diff[item];
166 189
167#else 190 if (x < 0)
191 x = 0;
192#endif
193 return x;
194}
168 195
169#define node_page_state(node, item) global_page_state(item)
170 196
197#ifdef CONFIG_NUMA
198extern unsigned long sum_zone_node_page_state(int node,
199 enum zone_stat_item item);
200extern unsigned long node_page_state(struct pglist_data *pgdat,
201 enum node_stat_item item);
202#else
203#define sum_zone_node_page_state(node, item) global_page_state(item)
204#define node_page_state(node, item) global_node_page_state(item)
171#endif /* CONFIG_NUMA */ 205#endif /* CONFIG_NUMA */
172 206
173#define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d) 207#define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d)
174#define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d)) 208#define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d))
209#define add_node_page_state(__p, __i, __d) mod_node_page_state(__p, __i, __d)
210#define sub_node_page_state(__p, __i, __d) mod_node_page_state(__p, __i, -(__d))
175 211
176#ifdef CONFIG_SMP 212#ifdef CONFIG_SMP
177void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long); 213void __mod_zone_page_state(struct zone *, enum zone_stat_item item, long);
178void __inc_zone_page_state(struct page *, enum zone_stat_item); 214void __inc_zone_page_state(struct page *, enum zone_stat_item);
179void __dec_zone_page_state(struct page *, enum zone_stat_item); 215void __dec_zone_page_state(struct page *, enum zone_stat_item);
180 216
217void __mod_node_page_state(struct pglist_data *, enum node_stat_item item, long);
218void __inc_node_page_state(struct page *, enum node_stat_item);
219void __dec_node_page_state(struct page *, enum node_stat_item);
220
181void mod_zone_page_state(struct zone *, enum zone_stat_item, long); 221void mod_zone_page_state(struct zone *, enum zone_stat_item, long);
182void inc_zone_page_state(struct page *, enum zone_stat_item); 222void inc_zone_page_state(struct page *, enum zone_stat_item);
183void dec_zone_page_state(struct page *, enum zone_stat_item); 223void dec_zone_page_state(struct page *, enum zone_stat_item);
184 224
185extern void inc_zone_state(struct zone *, enum zone_stat_item); 225void mod_node_page_state(struct pglist_data *, enum node_stat_item, long);
226void inc_node_page_state(struct page *, enum node_stat_item);
227void dec_node_page_state(struct page *, enum node_stat_item);
228
229extern void inc_node_state(struct pglist_data *, enum node_stat_item);
186extern void __inc_zone_state(struct zone *, enum zone_stat_item); 230extern void __inc_zone_state(struct zone *, enum zone_stat_item);
231extern void __inc_node_state(struct pglist_data *, enum node_stat_item);
187extern void dec_zone_state(struct zone *, enum zone_stat_item); 232extern void dec_zone_state(struct zone *, enum zone_stat_item);
188extern void __dec_zone_state(struct zone *, enum zone_stat_item); 233extern void __dec_zone_state(struct zone *, enum zone_stat_item);
234extern void __dec_node_state(struct pglist_data *, enum node_stat_item);
189 235
190void quiet_vmstat(void); 236void quiet_vmstat(void);
191void cpu_vm_stats_fold(int cpu); 237void cpu_vm_stats_fold(int cpu);
@@ -213,16 +259,34 @@ static inline void __mod_zone_page_state(struct zone *zone,
213 zone_page_state_add(delta, zone, item); 259 zone_page_state_add(delta, zone, item);
214} 260}
215 261
262static inline void __mod_node_page_state(struct pglist_data *pgdat,
263 enum node_stat_item item, int delta)
264{
265 node_page_state_add(delta, pgdat, item);
266}
267
216static inline void __inc_zone_state(struct zone *zone, enum zone_stat_item item) 268static inline void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
217{ 269{
218 atomic_long_inc(&zone->vm_stat[item]); 270 atomic_long_inc(&zone->vm_stat[item]);
219 atomic_long_inc(&vm_stat[item]); 271 atomic_long_inc(&vm_zone_stat[item]);
272}
273
274static inline void __inc_node_state(struct pglist_data *pgdat, enum node_stat_item item)
275{
276 atomic_long_inc(&pgdat->vm_stat[item]);
277 atomic_long_inc(&vm_node_stat[item]);
220} 278}
221 279
222static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item) 280static inline void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
223{ 281{
224 atomic_long_dec(&zone->vm_stat[item]); 282 atomic_long_dec(&zone->vm_stat[item]);
225 atomic_long_dec(&vm_stat[item]); 283 atomic_long_dec(&vm_zone_stat[item]);
284}
285
286static inline void __dec_node_state(struct pglist_data *pgdat, enum node_stat_item item)
287{
288 atomic_long_dec(&pgdat->vm_stat[item]);
289 atomic_long_dec(&vm_node_stat[item]);
226} 290}
227 291
228static inline void __inc_zone_page_state(struct page *page, 292static inline void __inc_zone_page_state(struct page *page,
@@ -231,12 +295,26 @@ static inline void __inc_zone_page_state(struct page *page,
231 __inc_zone_state(page_zone(page), item); 295 __inc_zone_state(page_zone(page), item);
232} 296}
233 297
298static inline void __inc_node_page_state(struct page *page,
299 enum node_stat_item item)
300{
301 __inc_node_state(page_pgdat(page), item);
302}
303
304
234static inline void __dec_zone_page_state(struct page *page, 305static inline void __dec_zone_page_state(struct page *page,
235 enum zone_stat_item item) 306 enum zone_stat_item item)
236{ 307{
237 __dec_zone_state(page_zone(page), item); 308 __dec_zone_state(page_zone(page), item);
238} 309}
239 310
311static inline void __dec_node_page_state(struct page *page,
312 enum node_stat_item item)
313{
314 __dec_node_state(page_pgdat(page), item);
315}
316
317
240/* 318/*
241 * We only use atomic operations to update counters. So there is no need to 319 * We only use atomic operations to update counters. So there is no need to
242 * disable interrupts. 320 * disable interrupts.
@@ -245,7 +323,12 @@ static inline void __dec_zone_page_state(struct page *page,
245#define dec_zone_page_state __dec_zone_page_state 323#define dec_zone_page_state __dec_zone_page_state
246#define mod_zone_page_state __mod_zone_page_state 324#define mod_zone_page_state __mod_zone_page_state
247 325
326#define inc_node_page_state __inc_node_page_state
327#define dec_node_page_state __dec_node_page_state
328#define mod_node_page_state __mod_node_page_state
329
248#define inc_zone_state __inc_zone_state 330#define inc_zone_state __inc_zone_state
331#define inc_node_state __inc_node_state
249#define dec_zone_state __dec_zone_state 332#define dec_zone_state __dec_zone_state
250 333
251#define set_pgdat_percpu_threshold(pgdat, callback) { } 334#define set_pgdat_percpu_threshold(pgdat, callback) { }
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 717e6149e753..fc1e16c25a29 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -320,7 +320,7 @@ void laptop_mode_timer_fn(unsigned long data);
320static inline void laptop_sync_completion(void) { } 320static inline void laptop_sync_completion(void) { }
321#endif 321#endif
322void throttle_vm_writeout(gfp_t gfp_mask); 322void throttle_vm_writeout(gfp_t gfp_mask);
323bool zone_dirty_ok(struct zone *zone); 323bool node_dirty_ok(struct pglist_data *pgdat);
324int wb_domain_init(struct wb_domain *dom, gfp_t gfp); 324int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
325#ifdef CONFIG_CGROUP_WRITEBACK 325#ifdef CONFIG_CGROUP_WRITEBACK
326void wb_domain_exit(struct wb_domain *dom); 326void wb_domain_exit(struct wb_domain *dom);