aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/gup.c7
-rw-r--r--mm/huge_memory.c9
-rw-r--r--mm/hugetlb.c9
-rw-r--r--mm/khugepaged.c15
-rw-r--r--mm/memblock.c28
-rw-r--r--mm/mempolicy.c3
-rw-r--r--mm/page_alloc.c17
-rw-r--r--mm/percpu-km.c8
-rw-r--r--mm/percpu-vm.c18
-rw-r--r--mm/percpu.c67
-rw-r--r--mm/shmem.c31
-rw-r--r--mm/vmscan.c31
12 files changed, 134 insertions, 109 deletions
diff --git a/mm/gup.c b/mm/gup.c
index 1b46e6e74881..6afae32571ca 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -516,7 +516,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
516 } 516 }
517 517
518 if (ret & VM_FAULT_RETRY) { 518 if (ret & VM_FAULT_RETRY) {
519 if (nonblocking) 519 if (nonblocking && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
520 *nonblocking = 0; 520 *nonblocking = 0;
521 return -EBUSY; 521 return -EBUSY;
522 } 522 }
@@ -890,7 +890,10 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
890 break; 890 break;
891 } 891 }
892 if (*locked) { 892 if (*locked) {
893 /* VM_FAULT_RETRY didn't trigger */ 893 /*
894 * VM_FAULT_RETRY didn't trigger or it was a
895 * FOLL_NOWAIT.
896 */
894 if (!pages_done) 897 if (!pages_done)
895 pages_done = ret; 898 pages_done = ret;
896 break; 899 break;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 87ab9b8f56b5..5a68730eebd6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -555,7 +555,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
555 555
556 VM_BUG_ON_PAGE(!PageCompound(page), page); 556 VM_BUG_ON_PAGE(!PageCompound(page), page);
557 557
558 if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) { 558 if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,
559 true)) {
559 put_page(page); 560 put_page(page);
560 count_vm_event(THP_FAULT_FALLBACK); 561 count_vm_event(THP_FAULT_FALLBACK);
561 return VM_FAULT_FALLBACK; 562 return VM_FAULT_FALLBACK;
@@ -1316,7 +1317,7 @@ alloc:
1316 } 1317 }
1317 1318
1318 if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm, 1319 if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
1319 huge_gfp, &memcg, true))) { 1320 huge_gfp | __GFP_NORETRY, &memcg, true))) {
1320 put_page(new_page); 1321 put_page(new_page);
1321 split_huge_pmd(vma, vmf->pmd, vmf->address); 1322 split_huge_pmd(vma, vmf->pmd, vmf->address);
1322 if (page) 1323 if (page)
@@ -2783,11 +2784,13 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
2783 2784
2784 list_for_each_safe(pos, next, &list) { 2785 list_for_each_safe(pos, next, &list) {
2785 page = list_entry((void *)pos, struct page, mapping); 2786 page = list_entry((void *)pos, struct page, mapping);
2786 lock_page(page); 2787 if (!trylock_page(page))
2788 goto next;
2787 /* split_huge_page() removes page from list on success */ 2789 /* split_huge_page() removes page from list on success */
2788 if (!split_huge_page(page)) 2790 if (!split_huge_page(page))
2789 split++; 2791 split++;
2790 unlock_page(page); 2792 unlock_page(page);
2793next:
2791 put_page(page); 2794 put_page(page);
2792 } 2795 }
2793 2796
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7c204e3d132b..976bbc5646fe 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -18,6 +18,7 @@
18#include <linux/bootmem.h> 18#include <linux/bootmem.h>
19#include <linux/sysfs.h> 19#include <linux/sysfs.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/mmdebug.h>
21#include <linux/sched/signal.h> 22#include <linux/sched/signal.h>
22#include <linux/rmap.h> 23#include <linux/rmap.h>
23#include <linux/string_helpers.h> 24#include <linux/string_helpers.h>
@@ -1583,7 +1584,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
1583 page = NULL; 1584 page = NULL;
1584 } else { 1585 } else {
1585 h->surplus_huge_pages++; 1586 h->surplus_huge_pages++;
1586 h->nr_huge_pages_node[page_to_nid(page)]++; 1587 h->surplus_huge_pages_node[page_to_nid(page)]++;
1587 } 1588 }
1588 1589
1589out_unlock: 1590out_unlock:
@@ -4374,6 +4375,12 @@ int hugetlb_reserve_pages(struct inode *inode,
4374 struct resv_map *resv_map; 4375 struct resv_map *resv_map;
4375 long gbl_reserve; 4376 long gbl_reserve;
4376 4377
4378 /* This should never happen */
4379 if (from > to) {
4380 VM_WARN(1, "%s called with a negative range\n", __func__);
4381 return -EINVAL;
4382 }
4383
4377 /* 4384 /*
4378 * Only apply hugepage reservation if asked. At fault time, an 4385 * Only apply hugepage reservation if asked. At fault time, an
4379 * attempt will be made for VM_NORESERVE to allocate a page 4386 * attempt will be made for VM_NORESERVE to allocate a page
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b7e2268dfc9a..e42568284e06 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -530,7 +530,12 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
530 goto out; 530 goto out;
531 } 531 }
532 532
533 VM_BUG_ON_PAGE(PageCompound(page), page); 533 /* TODO: teach khugepaged to collapse THP mapped with pte */
534 if (PageCompound(page)) {
535 result = SCAN_PAGE_COMPOUND;
536 goto out;
537 }
538
534 VM_BUG_ON_PAGE(!PageAnon(page), page); 539 VM_BUG_ON_PAGE(!PageAnon(page), page);
535 540
536 /* 541 /*
@@ -960,7 +965,9 @@ static void collapse_huge_page(struct mm_struct *mm,
960 goto out_nolock; 965 goto out_nolock;
961 } 966 }
962 967
963 if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) { 968 /* Do not oom kill for khugepaged charges */
969 if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
970 &memcg, true))) {
964 result = SCAN_CGROUP_CHARGE_FAIL; 971 result = SCAN_CGROUP_CHARGE_FAIL;
965 goto out_nolock; 972 goto out_nolock;
966 } 973 }
@@ -1319,7 +1326,9 @@ static void collapse_shmem(struct mm_struct *mm,
1319 goto out; 1326 goto out;
1320 } 1327 }
1321 1328
1322 if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) { 1329 /* Do not oom kill for khugepaged charges */
1330 if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
1331 &memcg, true))) {
1323 result = SCAN_CGROUP_CHARGE_FAIL; 1332 result = SCAN_CGROUP_CHARGE_FAIL;
1324 goto out; 1333 goto out;
1325 } 1334 }
diff --git a/mm/memblock.c b/mm/memblock.c
index 5a9ca2a1751b..48376bd33274 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1101,34 +1101,6 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
1101 *out_nid = r->nid; 1101 *out_nid = r->nid;
1102} 1102}
1103 1103
1104unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
1105 unsigned long max_pfn)
1106{
1107 struct memblock_type *type = &memblock.memory;
1108 unsigned int right = type->cnt;
1109 unsigned int mid, left = 0;
1110 phys_addr_t addr = PFN_PHYS(pfn + 1);
1111
1112 do {
1113 mid = (right + left) / 2;
1114
1115 if (addr < type->regions[mid].base)
1116 right = mid;
1117 else if (addr >= (type->regions[mid].base +
1118 type->regions[mid].size))
1119 left = mid + 1;
1120 else {
1121 /* addr is within the region, so pfn + 1 is valid */
1122 return min(pfn + 1, max_pfn);
1123 }
1124 } while (left < right);
1125
1126 if (right == type->cnt)
1127 return max_pfn;
1128 else
1129 return min(PHYS_PFN(type->regions[right].base), max_pfn);
1130}
1131
1132/** 1104/**
1133 * memblock_set_node - set node ID on memblock regions 1105 * memblock_set_node - set node ID on memblock regions
1134 * @base: base of area to set node ID for 1106 * @base: base of area to set node ID for
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d879f1d8a44a..32cba0332787 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2124,6 +2124,9 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
2124 case MPOL_INTERLEAVE: 2124 case MPOL_INTERLEAVE:
2125 return !!nodes_equal(a->v.nodes, b->v.nodes); 2125 return !!nodes_equal(a->v.nodes, b->v.nodes);
2126 case MPOL_PREFERRED: 2126 case MPOL_PREFERRED:
2127 /* a's ->flags is the same as b's */
2128 if (a->flags & MPOL_F_LOCAL)
2129 return true;
2127 return a->v.preferred_node == b->v.preferred_node; 2130 return a->v.preferred_node == b->v.preferred_node;
2128 default: 2131 default:
2129 BUG(); 2132 BUG();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cb416723538f..1741dd23e7c1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1910,7 +1910,9 @@ static int move_freepages(struct zone *zone,
1910 * Remove at a later date when no bug reports exist related to 1910 * Remove at a later date when no bug reports exist related to
1911 * grouping pages by mobility 1911 * grouping pages by mobility
1912 */ 1912 */
1913 VM_BUG_ON(page_zone(start_page) != page_zone(end_page)); 1913 VM_BUG_ON(pfn_valid(page_to_pfn(start_page)) &&
1914 pfn_valid(page_to_pfn(end_page)) &&
1915 page_zone(start_page) != page_zone(end_page));
1914#endif 1916#endif
1915 1917
1916 if (num_movable) 1918 if (num_movable)
@@ -3594,7 +3596,7 @@ static bool __need_fs_reclaim(gfp_t gfp_mask)
3594 return false; 3596 return false;
3595 3597
3596 /* this guy won't enter reclaim */ 3598 /* this guy won't enter reclaim */
3597 if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) 3599 if (current->flags & PF_MEMALLOC)
3598 return false; 3600 return false;
3599 3601
3600 /* We're only interested __GFP_FS allocations for now */ 3602 /* We're only interested __GFP_FS allocations for now */
@@ -5354,17 +5356,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
5354 if (context != MEMMAP_EARLY) 5356 if (context != MEMMAP_EARLY)
5355 goto not_early; 5357 goto not_early;
5356 5358
5357 if (!early_pfn_valid(pfn)) { 5359 if (!early_pfn_valid(pfn))
5358#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
5359 /*
5360 * Skip to the pfn preceding the next valid one (or
5361 * end_pfn), such that we hit a valid pfn (or end_pfn)
5362 * on our next iteration of the loop.
5363 */
5364 pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
5365#endif
5366 continue; 5360 continue;
5367 }
5368 if (!early_pfn_in_nid(pfn, nid)) 5361 if (!early_pfn_in_nid(pfn, nid))
5369 continue; 5362 continue;
5370 if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised)) 5363 if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
diff --git a/mm/percpu-km.c b/mm/percpu-km.c
index d2a76642c4ae..38de70ab1a0d 100644
--- a/mm/percpu-km.c
+++ b/mm/percpu-km.c
@@ -34,7 +34,7 @@
34#include <linux/log2.h> 34#include <linux/log2.h>
35 35
36static int pcpu_populate_chunk(struct pcpu_chunk *chunk, 36static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
37 int page_start, int page_end) 37 int page_start, int page_end, gfp_t gfp)
38{ 38{
39 return 0; 39 return 0;
40} 40}
@@ -45,18 +45,18 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
45 /* nada */ 45 /* nada */
46} 46}
47 47
48static struct pcpu_chunk *pcpu_create_chunk(void) 48static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
49{ 49{
50 const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; 50 const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
51 struct pcpu_chunk *chunk; 51 struct pcpu_chunk *chunk;
52 struct page *pages; 52 struct page *pages;
53 int i; 53 int i;
54 54
55 chunk = pcpu_alloc_chunk(); 55 chunk = pcpu_alloc_chunk(gfp);
56 if (!chunk) 56 if (!chunk)
57 return NULL; 57 return NULL;
58 58
59 pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages)); 59 pages = alloc_pages(gfp, order_base_2(nr_pages));
60 if (!pages) { 60 if (!pages) {
61 pcpu_free_chunk(chunk); 61 pcpu_free_chunk(chunk);
62 return NULL; 62 return NULL;
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 9158e5a81391..d8078de912de 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void)
37 lockdep_assert_held(&pcpu_alloc_mutex); 37 lockdep_assert_held(&pcpu_alloc_mutex);
38 38
39 if (!pages) 39 if (!pages)
40 pages = pcpu_mem_zalloc(pages_size); 40 pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL);
41 return pages; 41 return pages;
42} 42}
43 43
@@ -73,18 +73,21 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk,
73 * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() 73 * @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
74 * @page_start: page index of the first page to be allocated 74 * @page_start: page index of the first page to be allocated
75 * @page_end: page index of the last page to be allocated + 1 75 * @page_end: page index of the last page to be allocated + 1
76 * @gfp: allocation flags passed to the underlying allocator
76 * 77 *
77 * Allocate pages [@page_start,@page_end) into @pages for all units. 78 * Allocate pages [@page_start,@page_end) into @pages for all units.
78 * The allocation is for @chunk. Percpu core doesn't care about the 79 * The allocation is for @chunk. Percpu core doesn't care about the
79 * content of @pages and will pass it verbatim to pcpu_map_pages(). 80 * content of @pages and will pass it verbatim to pcpu_map_pages().
80 */ 81 */
81static int pcpu_alloc_pages(struct pcpu_chunk *chunk, 82static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
82 struct page **pages, int page_start, int page_end) 83 struct page **pages, int page_start, int page_end,
84 gfp_t gfp)
83{ 85{
84 const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM;
85 unsigned int cpu, tcpu; 86 unsigned int cpu, tcpu;
86 int i; 87 int i;
87 88
89 gfp |= __GFP_HIGHMEM;
90
88 for_each_possible_cpu(cpu) { 91 for_each_possible_cpu(cpu) {
89 for (i = page_start; i < page_end; i++) { 92 for (i = page_start; i < page_end; i++) {
90 struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; 93 struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
@@ -262,6 +265,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
262 * @chunk: chunk of interest 265 * @chunk: chunk of interest
263 * @page_start: the start page 266 * @page_start: the start page
264 * @page_end: the end page 267 * @page_end: the end page
268 * @gfp: allocation flags passed to the underlying memory allocator
265 * 269 *
266 * For each cpu, populate and map pages [@page_start,@page_end) into 270 * For each cpu, populate and map pages [@page_start,@page_end) into
267 * @chunk. 271 * @chunk.
@@ -270,7 +274,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
270 * pcpu_alloc_mutex, does GFP_KERNEL allocation. 274 * pcpu_alloc_mutex, does GFP_KERNEL allocation.
271 */ 275 */
272static int pcpu_populate_chunk(struct pcpu_chunk *chunk, 276static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
273 int page_start, int page_end) 277 int page_start, int page_end, gfp_t gfp)
274{ 278{
275 struct page **pages; 279 struct page **pages;
276 280
@@ -278,7 +282,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
278 if (!pages) 282 if (!pages)
279 return -ENOMEM; 283 return -ENOMEM;
280 284
281 if (pcpu_alloc_pages(chunk, pages, page_start, page_end)) 285 if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp))
282 return -ENOMEM; 286 return -ENOMEM;
283 287
284 if (pcpu_map_pages(chunk, pages, page_start, page_end)) { 288 if (pcpu_map_pages(chunk, pages, page_start, page_end)) {
@@ -325,12 +329,12 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
325 pcpu_free_pages(chunk, pages, page_start, page_end); 329 pcpu_free_pages(chunk, pages, page_start, page_end);
326} 330}
327 331
328static struct pcpu_chunk *pcpu_create_chunk(void) 332static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
329{ 333{
330 struct pcpu_chunk *chunk; 334 struct pcpu_chunk *chunk;
331 struct vm_struct **vms; 335 struct vm_struct **vms;
332 336
333 chunk = pcpu_alloc_chunk(); 337 chunk = pcpu_alloc_chunk(gfp);
334 if (!chunk) 338 if (!chunk)
335 return NULL; 339 return NULL;
336 340
diff --git a/mm/percpu.c b/mm/percpu.c
index 50e7fdf84055..9297098519a6 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -80,6 +80,7 @@
80#include <linux/vmalloc.h> 80#include <linux/vmalloc.h>
81#include <linux/workqueue.h> 81#include <linux/workqueue.h>
82#include <linux/kmemleak.h> 82#include <linux/kmemleak.h>
83#include <linux/sched.h>
83 84
84#include <asm/cacheflush.h> 85#include <asm/cacheflush.h>
85#include <asm/sections.h> 86#include <asm/sections.h>
@@ -447,26 +448,25 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
447/** 448/**
448 * pcpu_mem_zalloc - allocate memory 449 * pcpu_mem_zalloc - allocate memory
449 * @size: bytes to allocate 450 * @size: bytes to allocate
451 * @gfp: allocation flags
450 * 452 *
451 * Allocate @size bytes. If @size is smaller than PAGE_SIZE, 453 * Allocate @size bytes. If @size is smaller than PAGE_SIZE,
452 * kzalloc() is used; otherwise, vzalloc() is used. The returned 454 * kzalloc() is used; otherwise, the equivalent of vzalloc() is used.
453 * memory is always zeroed. 455 * This is to facilitate passing through whitelisted flags. The
454 * 456 * returned memory is always zeroed.
455 * CONTEXT:
456 * Does GFP_KERNEL allocation.
457 * 457 *
458 * RETURNS: 458 * RETURNS:
459 * Pointer to the allocated area on success, NULL on failure. 459 * Pointer to the allocated area on success, NULL on failure.
460 */ 460 */
461static void *pcpu_mem_zalloc(size_t size) 461static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
462{ 462{
463 if (WARN_ON_ONCE(!slab_is_available())) 463 if (WARN_ON_ONCE(!slab_is_available()))
464 return NULL; 464 return NULL;
465 465
466 if (size <= PAGE_SIZE) 466 if (size <= PAGE_SIZE)
467 return kzalloc(size, GFP_KERNEL); 467 return kzalloc(size, gfp);
468 else 468 else
469 return vzalloc(size); 469 return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL);
470} 470}
471 471
472/** 472/**
@@ -1154,12 +1154,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
1154 return chunk; 1154 return chunk;
1155} 1155}
1156 1156
1157static struct pcpu_chunk *pcpu_alloc_chunk(void) 1157static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
1158{ 1158{
1159 struct pcpu_chunk *chunk; 1159 struct pcpu_chunk *chunk;
1160 int region_bits; 1160 int region_bits;
1161 1161
1162 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size); 1162 chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp);
1163 if (!chunk) 1163 if (!chunk)
1164 return NULL; 1164 return NULL;
1165 1165
@@ -1168,17 +1168,17 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
1168 region_bits = pcpu_chunk_map_bits(chunk); 1168 region_bits = pcpu_chunk_map_bits(chunk);
1169 1169
1170 chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) * 1170 chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) *
1171 sizeof(chunk->alloc_map[0])); 1171 sizeof(chunk->alloc_map[0]), gfp);
1172 if (!chunk->alloc_map) 1172 if (!chunk->alloc_map)
1173 goto alloc_map_fail; 1173 goto alloc_map_fail;
1174 1174
1175 chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) * 1175 chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) *
1176 sizeof(chunk->bound_map[0])); 1176 sizeof(chunk->bound_map[0]), gfp);
1177 if (!chunk->bound_map) 1177 if (!chunk->bound_map)
1178 goto bound_map_fail; 1178 goto bound_map_fail;
1179 1179
1180 chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) * 1180 chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
1181 sizeof(chunk->md_blocks[0])); 1181 sizeof(chunk->md_blocks[0]), gfp);
1182 if (!chunk->md_blocks) 1182 if (!chunk->md_blocks)
1183 goto md_blocks_fail; 1183 goto md_blocks_fail;
1184 1184
@@ -1277,9 +1277,11 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
1277 * pcpu_addr_to_page - translate address to physical address 1277 * pcpu_addr_to_page - translate address to physical address
1278 * pcpu_verify_alloc_info - check alloc_info is acceptable during init 1278 * pcpu_verify_alloc_info - check alloc_info is acceptable during init
1279 */ 1279 */
1280static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size); 1280static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
1281static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size); 1281 int page_start, int page_end, gfp_t gfp);
1282static struct pcpu_chunk *pcpu_create_chunk(void); 1282static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
1283 int page_start, int page_end);
1284static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
1283static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); 1285static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
1284static struct page *pcpu_addr_to_page(void *addr); 1286static struct page *pcpu_addr_to_page(void *addr);
1285static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); 1287static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
@@ -1339,6 +1341,8 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
1339static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, 1341static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
1340 gfp_t gfp) 1342 gfp_t gfp)
1341{ 1343{
1344 /* whitelisted flags that can be passed to the backing allocators */
1345 gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
1342 bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; 1346 bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
1343 bool do_warn = !(gfp & __GFP_NOWARN); 1347 bool do_warn = !(gfp & __GFP_NOWARN);
1344 static int warn_limit = 10; 1348 static int warn_limit = 10;
@@ -1369,8 +1373,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
1369 return NULL; 1373 return NULL;
1370 } 1374 }
1371 1375
1372 if (!is_atomic) 1376 if (!is_atomic) {
1373 mutex_lock(&pcpu_alloc_mutex); 1377 /*
1378 * pcpu_balance_workfn() allocates memory under this mutex,
1379 * and it may wait for memory reclaim. Allow current task
1380 * to become OOM victim, in case of memory pressure.
1381 */
1382 if (gfp & __GFP_NOFAIL)
1383 mutex_lock(&pcpu_alloc_mutex);
1384 else if (mutex_lock_killable(&pcpu_alloc_mutex))
1385 return NULL;
1386 }
1374 1387
1375 spin_lock_irqsave(&pcpu_lock, flags); 1388 spin_lock_irqsave(&pcpu_lock, flags);
1376 1389
@@ -1421,7 +1434,7 @@ restart:
1421 } 1434 }
1422 1435
1423 if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { 1436 if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
1424 chunk = pcpu_create_chunk(); 1437 chunk = pcpu_create_chunk(pcpu_gfp);
1425 if (!chunk) { 1438 if (!chunk) {
1426 err = "failed to allocate new chunk"; 1439 err = "failed to allocate new chunk";
1427 goto fail; 1440 goto fail;
@@ -1450,7 +1463,7 @@ area_found:
1450 page_start, page_end) { 1463 page_start, page_end) {
1451 WARN_ON(chunk->immutable); 1464 WARN_ON(chunk->immutable);
1452 1465
1453 ret = pcpu_populate_chunk(chunk, rs, re); 1466 ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
1454 1467
1455 spin_lock_irqsave(&pcpu_lock, flags); 1468 spin_lock_irqsave(&pcpu_lock, flags);
1456 if (ret) { 1469 if (ret) {
@@ -1561,10 +1574,17 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
1561 * pcpu_balance_workfn - manage the amount of free chunks and populated pages 1574 * pcpu_balance_workfn - manage the amount of free chunks and populated pages
1562 * @work: unused 1575 * @work: unused
1563 * 1576 *
1564 * Reclaim all fully free chunks except for the first one. 1577 * Reclaim all fully free chunks except for the first one. This is also
1578 * responsible for maintaining the pool of empty populated pages. However,
1579 * it is possible that this is called when physical memory is scarce causing
1580 * OOM killer to be triggered. We should avoid doing so until an actual
1581 * allocation causes the failure as it is possible that requests can be
1582 * serviced from already backed regions.
1565 */ 1583 */
1566static void pcpu_balance_workfn(struct work_struct *work) 1584static void pcpu_balance_workfn(struct work_struct *work)
1567{ 1585{
1586 /* gfp flags passed to underlying allocators */
1587 const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
1568 LIST_HEAD(to_free); 1588 LIST_HEAD(to_free);
1569 struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; 1589 struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
1570 struct pcpu_chunk *chunk, *next; 1590 struct pcpu_chunk *chunk, *next;
@@ -1600,6 +1620,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
1600 spin_unlock_irq(&pcpu_lock); 1620 spin_unlock_irq(&pcpu_lock);
1601 } 1621 }
1602 pcpu_destroy_chunk(chunk); 1622 pcpu_destroy_chunk(chunk);
1623 cond_resched();
1603 } 1624 }
1604 1625
1605 /* 1626 /*
@@ -1645,7 +1666,7 @@ retry_pop:
1645 chunk->nr_pages) { 1666 chunk->nr_pages) {
1646 int nr = min(re - rs, nr_to_pop); 1667 int nr = min(re - rs, nr_to_pop);
1647 1668
1648 ret = pcpu_populate_chunk(chunk, rs, rs + nr); 1669 ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
1649 if (!ret) { 1670 if (!ret) {
1650 nr_to_pop -= nr; 1671 nr_to_pop -= nr;
1651 spin_lock_irq(&pcpu_lock); 1672 spin_lock_irq(&pcpu_lock);
@@ -1662,7 +1683,7 @@ retry_pop:
1662 1683
1663 if (nr_to_pop) { 1684 if (nr_to_pop) {
1664 /* ran out of chunks to populate, create a new one and retry */ 1685 /* ran out of chunks to populate, create a new one and retry */
1665 chunk = pcpu_create_chunk(); 1686 chunk = pcpu_create_chunk(gfp);
1666 if (chunk) { 1687 if (chunk) {
1667 spin_lock_irq(&pcpu_lock); 1688 spin_lock_irq(&pcpu_lock);
1668 pcpu_chunk_relocate(chunk, -1); 1689 pcpu_chunk_relocate(chunk, -1);
diff --git a/mm/shmem.c b/mm/shmem.c
index 1907688b75ee..b85919243399 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -493,36 +493,45 @@ next:
493 info = list_entry(pos, struct shmem_inode_info, shrinklist); 493 info = list_entry(pos, struct shmem_inode_info, shrinklist);
494 inode = &info->vfs_inode; 494 inode = &info->vfs_inode;
495 495
496 if (nr_to_split && split >= nr_to_split) { 496 if (nr_to_split && split >= nr_to_split)
497 iput(inode); 497 goto leave;
498 continue;
499 }
500 498
501 page = find_lock_page(inode->i_mapping, 499 page = find_get_page(inode->i_mapping,
502 (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT); 500 (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
503 if (!page) 501 if (!page)
504 goto drop; 502 goto drop;
505 503
504 /* No huge page at the end of the file: nothing to split */
506 if (!PageTransHuge(page)) { 505 if (!PageTransHuge(page)) {
507 unlock_page(page);
508 put_page(page); 506 put_page(page);
509 goto drop; 507 goto drop;
510 } 508 }
511 509
510 /*
511 * Leave the inode on the list if we failed to lock
512 * the page at this time.
513 *
514 * Waiting for the lock may lead to deadlock in the
515 * reclaim path.
516 */
517 if (!trylock_page(page)) {
518 put_page(page);
519 goto leave;
520 }
521
512 ret = split_huge_page(page); 522 ret = split_huge_page(page);
513 unlock_page(page); 523 unlock_page(page);
514 put_page(page); 524 put_page(page);
515 525
516 if (ret) { 526 /* If split failed leave the inode on the list */
517 /* split failed: leave it on the list */ 527 if (ret)
518 iput(inode); 528 goto leave;
519 continue;
520 }
521 529
522 split++; 530 split++;
523drop: 531drop:
524 list_del_init(&info->shrinklist); 532 list_del_init(&info->shrinklist);
525 removed++; 533 removed++;
534leave:
526 iput(inode); 535 iput(inode);
527 } 536 }
528 537
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bee53495a829..cd5dc3faaa57 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1780,6 +1780,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
1780 set_bit(PGDAT_WRITEBACK, &pgdat->flags); 1780 set_bit(PGDAT_WRITEBACK, &pgdat->flags);
1781 1781
1782 /* 1782 /*
1783 * If dirty pages are scanned that are not queued for IO, it
1784 * implies that flushers are not doing their job. This can
1785 * happen when memory pressure pushes dirty pages to the end of
1786 * the LRU before the dirty limits are breached and the dirty
1787 * data has expired. It can also happen when the proportion of
1788 * dirty pages grows not through writes but through memory
1789 * pressure reclaiming all the clean cache. And in some cases,
1790 * the flushers simply cannot keep up with the allocation
1791 * rate. Nudge the flusher threads in case they are asleep.
1792 */
1793 if (stat.nr_unqueued_dirty == nr_taken)
1794 wakeup_flusher_threads(WB_REASON_VMSCAN);
1795
1796 /*
1783 * Legacy memcg will stall in page writeback so avoid forcibly 1797 * Legacy memcg will stall in page writeback so avoid forcibly
1784 * stalling here. 1798 * stalling here.
1785 */ 1799 */
@@ -1791,22 +1805,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
1791 if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested) 1805 if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested)
1792 set_bit(PGDAT_CONGESTED, &pgdat->flags); 1806 set_bit(PGDAT_CONGESTED, &pgdat->flags);
1793 1807
1794 /* 1808 /* Allow kswapd to start writing pages during reclaim. */
1795 * If dirty pages are scanned that are not queued for IO, it 1809 if (stat.nr_unqueued_dirty == nr_taken)
1796 * implies that flushers are not doing their job. This can
1797 * happen when memory pressure pushes dirty pages to the end of
1798 * the LRU before the dirty limits are breached and the dirty
1799 * data has expired. It can also happen when the proportion of
1800 * dirty pages grows not through writes but through memory
1801 * pressure reclaiming all the clean cache. And in some cases,
1802 * the flushers simply cannot keep up with the allocation
1803 * rate. Nudge the flusher threads in case they are asleep, but
1804 * also allow kswapd to start writing pages during reclaim.
1805 */
1806 if (stat.nr_unqueued_dirty == nr_taken) {
1807 wakeup_flusher_threads(WB_REASON_VMSCAN);
1808 set_bit(PGDAT_DIRTY, &pgdat->flags); 1810 set_bit(PGDAT_DIRTY, &pgdat->flags);
1809 }
1810 1811
1811 /* 1812 /*
1812 * If kswapd scans pages marked marked for immediate 1813 * If kswapd scans pages marked marked for immediate