diff options
author | David S. Miller <davem@davemloft.net> | 2018-03-23 11:24:57 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-03-23 11:31:58 -0400 |
commit | 03fe2debbb2771fb90881e4ce8109b09cf772a5c (patch) | |
tree | fbaf8738296b2e9dcba81c6daef2d515b6c4948c /mm | |
parent | 6686c459e1449a3ee5f3fd313b0a559ace7a700e (diff) | |
parent | f36b7534b83357cf52e747905de6d65b4f7c2512 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Fun set of conflict resolutions here...
For the mac80211 stuff, these were fortunately just parallel
adds. Trivially resolved.
In drivers/net/phy/phy.c we had a bug fix in 'net' that moved the
function phy_disable_interrupts() earlier in the file, whilst in
'net-next' the phy_error() call from this function was removed.
In net/ipv4/xfrm4_policy.c, David Ahern's changes to remove the
'rt_table_id' member of rtable collided with a bug fix in 'net' that
added a new struct member "rt_mtu_locked" which needs to be copied
over here.
The mlxsw driver conflict consisted of net-next separating
the span code and definitions into separate files, whilst
a 'net' bug fix made some changes to that moved code.
The mlx5 infiniband conflict resolution was quite non-trivial,
the RDMA tree's merge commit was used as a guide here, and
here are their notes:
====================
Due to bug fixes found by the syzkaller bot and taken into the for-rc
branch after development for the 4.17 merge window had already started
being taken into the for-next branch, there were fairly non-trivial
merge issues that would need to be resolved between the for-rc branch
and the for-next branch. This merge resolves those conflicts and
provides a unified base upon which ongoing development for 4.17 can
be based.
Conflicts:
drivers/infiniband/hw/mlx5/main.c - Commit 42cea83f9524
(IB/mlx5: Fix cleanup order on unload) added to for-rc and
commit b5ca15ad7e61 (IB/mlx5: Add proper representors support)
add as part of the devel cycle both needed to modify the
init/de-init functions used by mlx5. To support the new
representors, the new functions added by the cleanup patch
needed to be made non-static, and the init/de-init list
added by the representors patch needed to be modified to
match the init/de-init list changes made by the cleanup
patch.
Updates:
drivers/infiniband/hw/mlx5/mlx5_ib.h - Update function
prototypes added by representors patch to reflect new function
names as changed by cleanup patch
drivers/infiniband/hw/mlx5/ib_rep.c - Update init/de-init
stage list to match new order from cleanup patch
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/gup.c | 7 | ||||
-rw-r--r-- | mm/huge_memory.c | 9 | ||||
-rw-r--r-- | mm/hugetlb.c | 9 | ||||
-rw-r--r-- | mm/khugepaged.c | 15 | ||||
-rw-r--r-- | mm/memblock.c | 28 | ||||
-rw-r--r-- | mm/mempolicy.c | 3 | ||||
-rw-r--r-- | mm/page_alloc.c | 17 | ||||
-rw-r--r-- | mm/percpu-km.c | 8 | ||||
-rw-r--r-- | mm/percpu-vm.c | 18 | ||||
-rw-r--r-- | mm/percpu.c | 67 | ||||
-rw-r--r-- | mm/shmem.c | 31 | ||||
-rw-r--r-- | mm/vmscan.c | 31 |
12 files changed, 134 insertions, 109 deletions
@@ -516,7 +516,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, | |||
516 | } | 516 | } |
517 | 517 | ||
518 | if (ret & VM_FAULT_RETRY) { | 518 | if (ret & VM_FAULT_RETRY) { |
519 | if (nonblocking) | 519 | if (nonblocking && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT)) |
520 | *nonblocking = 0; | 520 | *nonblocking = 0; |
521 | return -EBUSY; | 521 | return -EBUSY; |
522 | } | 522 | } |
@@ -890,7 +890,10 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, | |||
890 | break; | 890 | break; |
891 | } | 891 | } |
892 | if (*locked) { | 892 | if (*locked) { |
893 | /* VM_FAULT_RETRY didn't trigger */ | 893 | /* |
894 | * VM_FAULT_RETRY didn't trigger or it was a | ||
895 | * FOLL_NOWAIT. | ||
896 | */ | ||
894 | if (!pages_done) | 897 | if (!pages_done) |
895 | pages_done = ret; | 898 | pages_done = ret; |
896 | break; | 899 | break; |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 87ab9b8f56b5..5a68730eebd6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -555,7 +555,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, | |||
555 | 555 | ||
556 | VM_BUG_ON_PAGE(!PageCompound(page), page); | 556 | VM_BUG_ON_PAGE(!PageCompound(page), page); |
557 | 557 | ||
558 | if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) { | 558 | if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg, |
559 | true)) { | ||
559 | put_page(page); | 560 | put_page(page); |
560 | count_vm_event(THP_FAULT_FALLBACK); | 561 | count_vm_event(THP_FAULT_FALLBACK); |
561 | return VM_FAULT_FALLBACK; | 562 | return VM_FAULT_FALLBACK; |
@@ -1316,7 +1317,7 @@ alloc: | |||
1316 | } | 1317 | } |
1317 | 1318 | ||
1318 | if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm, | 1319 | if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm, |
1319 | huge_gfp, &memcg, true))) { | 1320 | huge_gfp | __GFP_NORETRY, &memcg, true))) { |
1320 | put_page(new_page); | 1321 | put_page(new_page); |
1321 | split_huge_pmd(vma, vmf->pmd, vmf->address); | 1322 | split_huge_pmd(vma, vmf->pmd, vmf->address); |
1322 | if (page) | 1323 | if (page) |
@@ -2783,11 +2784,13 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, | |||
2783 | 2784 | ||
2784 | list_for_each_safe(pos, next, &list) { | 2785 | list_for_each_safe(pos, next, &list) { |
2785 | page = list_entry((void *)pos, struct page, mapping); | 2786 | page = list_entry((void *)pos, struct page, mapping); |
2786 | lock_page(page); | 2787 | if (!trylock_page(page)) |
2788 | goto next; | ||
2787 | /* split_huge_page() removes page from list on success */ | 2789 | /* split_huge_page() removes page from list on success */ |
2788 | if (!split_huge_page(page)) | 2790 | if (!split_huge_page(page)) |
2789 | split++; | 2791 | split++; |
2790 | unlock_page(page); | 2792 | unlock_page(page); |
2793 | next: | ||
2791 | put_page(page); | 2794 | put_page(page); |
2792 | } | 2795 | } |
2793 | 2796 | ||
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7c204e3d132b..976bbc5646fe 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/bootmem.h> | 18 | #include <linux/bootmem.h> |
19 | #include <linux/sysfs.h> | 19 | #include <linux/sysfs.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <linux/mmdebug.h> | ||
21 | #include <linux/sched/signal.h> | 22 | #include <linux/sched/signal.h> |
22 | #include <linux/rmap.h> | 23 | #include <linux/rmap.h> |
23 | #include <linux/string_helpers.h> | 24 | #include <linux/string_helpers.h> |
@@ -1583,7 +1584,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, | |||
1583 | page = NULL; | 1584 | page = NULL; |
1584 | } else { | 1585 | } else { |
1585 | h->surplus_huge_pages++; | 1586 | h->surplus_huge_pages++; |
1586 | h->nr_huge_pages_node[page_to_nid(page)]++; | 1587 | h->surplus_huge_pages_node[page_to_nid(page)]++; |
1587 | } | 1588 | } |
1588 | 1589 | ||
1589 | out_unlock: | 1590 | out_unlock: |
@@ -4374,6 +4375,12 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
4374 | struct resv_map *resv_map; | 4375 | struct resv_map *resv_map; |
4375 | long gbl_reserve; | 4376 | long gbl_reserve; |
4376 | 4377 | ||
4378 | /* This should never happen */ | ||
4379 | if (from > to) { | ||
4380 | VM_WARN(1, "%s called with a negative range\n", __func__); | ||
4381 | return -EINVAL; | ||
4382 | } | ||
4383 | |||
4377 | /* | 4384 | /* |
4378 | * Only apply hugepage reservation if asked. At fault time, an | 4385 | * Only apply hugepage reservation if asked. At fault time, an |
4379 | * attempt will be made for VM_NORESERVE to allocate a page | 4386 | * attempt will be made for VM_NORESERVE to allocate a page |
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b7e2268dfc9a..e42568284e06 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c | |||
@@ -530,7 +530,12 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, | |||
530 | goto out; | 530 | goto out; |
531 | } | 531 | } |
532 | 532 | ||
533 | VM_BUG_ON_PAGE(PageCompound(page), page); | 533 | /* TODO: teach khugepaged to collapse THP mapped with pte */ |
534 | if (PageCompound(page)) { | ||
535 | result = SCAN_PAGE_COMPOUND; | ||
536 | goto out; | ||
537 | } | ||
538 | |||
534 | VM_BUG_ON_PAGE(!PageAnon(page), page); | 539 | VM_BUG_ON_PAGE(!PageAnon(page), page); |
535 | 540 | ||
536 | /* | 541 | /* |
@@ -960,7 +965,9 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
960 | goto out_nolock; | 965 | goto out_nolock; |
961 | } | 966 | } |
962 | 967 | ||
963 | if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) { | 968 | /* Do not oom kill for khugepaged charges */ |
969 | if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY, | ||
970 | &memcg, true))) { | ||
964 | result = SCAN_CGROUP_CHARGE_FAIL; | 971 | result = SCAN_CGROUP_CHARGE_FAIL; |
965 | goto out_nolock; | 972 | goto out_nolock; |
966 | } | 973 | } |
@@ -1319,7 +1326,9 @@ static void collapse_shmem(struct mm_struct *mm, | |||
1319 | goto out; | 1326 | goto out; |
1320 | } | 1327 | } |
1321 | 1328 | ||
1322 | if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) { | 1329 | /* Do not oom kill for khugepaged charges */ |
1330 | if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY, | ||
1331 | &memcg, true))) { | ||
1323 | result = SCAN_CGROUP_CHARGE_FAIL; | 1332 | result = SCAN_CGROUP_CHARGE_FAIL; |
1324 | goto out; | 1333 | goto out; |
1325 | } | 1334 | } |
diff --git a/mm/memblock.c b/mm/memblock.c index 5a9ca2a1751b..48376bd33274 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -1101,34 +1101,6 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid, | |||
1101 | *out_nid = r->nid; | 1101 | *out_nid = r->nid; |
1102 | } | 1102 | } |
1103 | 1103 | ||
1104 | unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn, | ||
1105 | unsigned long max_pfn) | ||
1106 | { | ||
1107 | struct memblock_type *type = &memblock.memory; | ||
1108 | unsigned int right = type->cnt; | ||
1109 | unsigned int mid, left = 0; | ||
1110 | phys_addr_t addr = PFN_PHYS(pfn + 1); | ||
1111 | |||
1112 | do { | ||
1113 | mid = (right + left) / 2; | ||
1114 | |||
1115 | if (addr < type->regions[mid].base) | ||
1116 | right = mid; | ||
1117 | else if (addr >= (type->regions[mid].base + | ||
1118 | type->regions[mid].size)) | ||
1119 | left = mid + 1; | ||
1120 | else { | ||
1121 | /* addr is within the region, so pfn + 1 is valid */ | ||
1122 | return min(pfn + 1, max_pfn); | ||
1123 | } | ||
1124 | } while (left < right); | ||
1125 | |||
1126 | if (right == type->cnt) | ||
1127 | return max_pfn; | ||
1128 | else | ||
1129 | return min(PHYS_PFN(type->regions[right].base), max_pfn); | ||
1130 | } | ||
1131 | |||
1132 | /** | 1104 | /** |
1133 | * memblock_set_node - set node ID on memblock regions | 1105 | * memblock_set_node - set node ID on memblock regions |
1134 | * @base: base of area to set node ID for | 1106 | * @base: base of area to set node ID for |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d879f1d8a44a..32cba0332787 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -2124,6 +2124,9 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) | |||
2124 | case MPOL_INTERLEAVE: | 2124 | case MPOL_INTERLEAVE: |
2125 | return !!nodes_equal(a->v.nodes, b->v.nodes); | 2125 | return !!nodes_equal(a->v.nodes, b->v.nodes); |
2126 | case MPOL_PREFERRED: | 2126 | case MPOL_PREFERRED: |
2127 | /* a's ->flags is the same as b's */ | ||
2128 | if (a->flags & MPOL_F_LOCAL) | ||
2129 | return true; | ||
2127 | return a->v.preferred_node == b->v.preferred_node; | 2130 | return a->v.preferred_node == b->v.preferred_node; |
2128 | default: | 2131 | default: |
2129 | BUG(); | 2132 | BUG(); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cb416723538f..1741dd23e7c1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1910,7 +1910,9 @@ static int move_freepages(struct zone *zone, | |||
1910 | * Remove at a later date when no bug reports exist related to | 1910 | * Remove at a later date when no bug reports exist related to |
1911 | * grouping pages by mobility | 1911 | * grouping pages by mobility |
1912 | */ | 1912 | */ |
1913 | VM_BUG_ON(page_zone(start_page) != page_zone(end_page)); | 1913 | VM_BUG_ON(pfn_valid(page_to_pfn(start_page)) && |
1914 | pfn_valid(page_to_pfn(end_page)) && | ||
1915 | page_zone(start_page) != page_zone(end_page)); | ||
1914 | #endif | 1916 | #endif |
1915 | 1917 | ||
1916 | if (num_movable) | 1918 | if (num_movable) |
@@ -3594,7 +3596,7 @@ static bool __need_fs_reclaim(gfp_t gfp_mask) | |||
3594 | return false; | 3596 | return false; |
3595 | 3597 | ||
3596 | /* this guy won't enter reclaim */ | 3598 | /* this guy won't enter reclaim */ |
3597 | if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) | 3599 | if (current->flags & PF_MEMALLOC) |
3598 | return false; | 3600 | return false; |
3599 | 3601 | ||
3600 | /* We're only interested __GFP_FS allocations for now */ | 3602 | /* We're only interested __GFP_FS allocations for now */ |
@@ -5354,17 +5356,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, | |||
5354 | if (context != MEMMAP_EARLY) | 5356 | if (context != MEMMAP_EARLY) |
5355 | goto not_early; | 5357 | goto not_early; |
5356 | 5358 | ||
5357 | if (!early_pfn_valid(pfn)) { | 5359 | if (!early_pfn_valid(pfn)) |
5358 | #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP | ||
5359 | /* | ||
5360 | * Skip to the pfn preceding the next valid one (or | ||
5361 | * end_pfn), such that we hit a valid pfn (or end_pfn) | ||
5362 | * on our next iteration of the loop. | ||
5363 | */ | ||
5364 | pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1; | ||
5365 | #endif | ||
5366 | continue; | 5360 | continue; |
5367 | } | ||
5368 | if (!early_pfn_in_nid(pfn, nid)) | 5361 | if (!early_pfn_in_nid(pfn, nid)) |
5369 | continue; | 5362 | continue; |
5370 | if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised)) | 5363 | if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised)) |
diff --git a/mm/percpu-km.c b/mm/percpu-km.c index d2a76642c4ae..38de70ab1a0d 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c | |||
@@ -34,7 +34,7 @@ | |||
34 | #include <linux/log2.h> | 34 | #include <linux/log2.h> |
35 | 35 | ||
36 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, | 36 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, |
37 | int page_start, int page_end) | 37 | int page_start, int page_end, gfp_t gfp) |
38 | { | 38 | { |
39 | return 0; | 39 | return 0; |
40 | } | 40 | } |
@@ -45,18 +45,18 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, | |||
45 | /* nada */ | 45 | /* nada */ |
46 | } | 46 | } |
47 | 47 | ||
48 | static struct pcpu_chunk *pcpu_create_chunk(void) | 48 | static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) |
49 | { | 49 | { |
50 | const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; | 50 | const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; |
51 | struct pcpu_chunk *chunk; | 51 | struct pcpu_chunk *chunk; |
52 | struct page *pages; | 52 | struct page *pages; |
53 | int i; | 53 | int i; |
54 | 54 | ||
55 | chunk = pcpu_alloc_chunk(); | 55 | chunk = pcpu_alloc_chunk(gfp); |
56 | if (!chunk) | 56 | if (!chunk) |
57 | return NULL; | 57 | return NULL; |
58 | 58 | ||
59 | pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages)); | 59 | pages = alloc_pages(gfp, order_base_2(nr_pages)); |
60 | if (!pages) { | 60 | if (!pages) { |
61 | pcpu_free_chunk(chunk); | 61 | pcpu_free_chunk(chunk); |
62 | return NULL; | 62 | return NULL; |
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 9158e5a81391..d8078de912de 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c | |||
@@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void) | |||
37 | lockdep_assert_held(&pcpu_alloc_mutex); | 37 | lockdep_assert_held(&pcpu_alloc_mutex); |
38 | 38 | ||
39 | if (!pages) | 39 | if (!pages) |
40 | pages = pcpu_mem_zalloc(pages_size); | 40 | pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL); |
41 | return pages; | 41 | return pages; |
42 | } | 42 | } |
43 | 43 | ||
@@ -73,18 +73,21 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk, | |||
73 | * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() | 73 | * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() |
74 | * @page_start: page index of the first page to be allocated | 74 | * @page_start: page index of the first page to be allocated |
75 | * @page_end: page index of the last page to be allocated + 1 | 75 | * @page_end: page index of the last page to be allocated + 1 |
76 | * @gfp: allocation flags passed to the underlying allocator | ||
76 | * | 77 | * |
77 | * Allocate pages [@page_start,@page_end) into @pages for all units. | 78 | * Allocate pages [@page_start,@page_end) into @pages for all units. |
78 | * The allocation is for @chunk. Percpu core doesn't care about the | 79 | * The allocation is for @chunk. Percpu core doesn't care about the |
79 | * content of @pages and will pass it verbatim to pcpu_map_pages(). | 80 | * content of @pages and will pass it verbatim to pcpu_map_pages(). |
80 | */ | 81 | */ |
81 | static int pcpu_alloc_pages(struct pcpu_chunk *chunk, | 82 | static int pcpu_alloc_pages(struct pcpu_chunk *chunk, |
82 | struct page **pages, int page_start, int page_end) | 83 | struct page **pages, int page_start, int page_end, |
84 | gfp_t gfp) | ||
83 | { | 85 | { |
84 | const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM; | ||
85 | unsigned int cpu, tcpu; | 86 | unsigned int cpu, tcpu; |
86 | int i; | 87 | int i; |
87 | 88 | ||
89 | gfp |= __GFP_HIGHMEM; | ||
90 | |||
88 | for_each_possible_cpu(cpu) { | 91 | for_each_possible_cpu(cpu) { |
89 | for (i = page_start; i < page_end; i++) { | 92 | for (i = page_start; i < page_end; i++) { |
90 | struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; | 93 | struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; |
@@ -262,6 +265,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk, | |||
262 | * @chunk: chunk of interest | 265 | * @chunk: chunk of interest |
263 | * @page_start: the start page | 266 | * @page_start: the start page |
264 | * @page_end: the end page | 267 | * @page_end: the end page |
268 | * @gfp: allocation flags passed to the underlying memory allocator | ||
265 | * | 269 | * |
266 | * For each cpu, populate and map pages [@page_start,@page_end) into | 270 | * For each cpu, populate and map pages [@page_start,@page_end) into |
267 | * @chunk. | 271 | * @chunk. |
@@ -270,7 +274,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk, | |||
270 | * pcpu_alloc_mutex, does GFP_KERNEL allocation. | 274 | * pcpu_alloc_mutex, does GFP_KERNEL allocation. |
271 | */ | 275 | */ |
272 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, | 276 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, |
273 | int page_start, int page_end) | 277 | int page_start, int page_end, gfp_t gfp) |
274 | { | 278 | { |
275 | struct page **pages; | 279 | struct page **pages; |
276 | 280 | ||
@@ -278,7 +282,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, | |||
278 | if (!pages) | 282 | if (!pages) |
279 | return -ENOMEM; | 283 | return -ENOMEM; |
280 | 284 | ||
281 | if (pcpu_alloc_pages(chunk, pages, page_start, page_end)) | 285 | if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp)) |
282 | return -ENOMEM; | 286 | return -ENOMEM; |
283 | 287 | ||
284 | if (pcpu_map_pages(chunk, pages, page_start, page_end)) { | 288 | if (pcpu_map_pages(chunk, pages, page_start, page_end)) { |
@@ -325,12 +329,12 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, | |||
325 | pcpu_free_pages(chunk, pages, page_start, page_end); | 329 | pcpu_free_pages(chunk, pages, page_start, page_end); |
326 | } | 330 | } |
327 | 331 | ||
328 | static struct pcpu_chunk *pcpu_create_chunk(void) | 332 | static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) |
329 | { | 333 | { |
330 | struct pcpu_chunk *chunk; | 334 | struct pcpu_chunk *chunk; |
331 | struct vm_struct **vms; | 335 | struct vm_struct **vms; |
332 | 336 | ||
333 | chunk = pcpu_alloc_chunk(); | 337 | chunk = pcpu_alloc_chunk(gfp); |
334 | if (!chunk) | 338 | if (!chunk) |
335 | return NULL; | 339 | return NULL; |
336 | 340 | ||
diff --git a/mm/percpu.c b/mm/percpu.c index 50e7fdf84055..9297098519a6 100644 --- a/mm/percpu.c +++ b/mm/percpu.c | |||
@@ -80,6 +80,7 @@ | |||
80 | #include <linux/vmalloc.h> | 80 | #include <linux/vmalloc.h> |
81 | #include <linux/workqueue.h> | 81 | #include <linux/workqueue.h> |
82 | #include <linux/kmemleak.h> | 82 | #include <linux/kmemleak.h> |
83 | #include <linux/sched.h> | ||
83 | 84 | ||
84 | #include <asm/cacheflush.h> | 85 | #include <asm/cacheflush.h> |
85 | #include <asm/sections.h> | 86 | #include <asm/sections.h> |
@@ -447,26 +448,25 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits, | |||
447 | /** | 448 | /** |
448 | * pcpu_mem_zalloc - allocate memory | 449 | * pcpu_mem_zalloc - allocate memory |
449 | * @size: bytes to allocate | 450 | * @size: bytes to allocate |
451 | * @gfp: allocation flags | ||
450 | * | 452 | * |
451 | * Allocate @size bytes. If @size is smaller than PAGE_SIZE, | 453 | * Allocate @size bytes. If @size is smaller than PAGE_SIZE, |
452 | * kzalloc() is used; otherwise, vzalloc() is used. The returned | 454 | * kzalloc() is used; otherwise, the equivalent of vzalloc() is used. |
453 | * memory is always zeroed. | 455 | * This is to facilitate passing through whitelisted flags. The |
454 | * | 456 | * returned memory is always zeroed. |
455 | * CONTEXT: | ||
456 | * Does GFP_KERNEL allocation. | ||
457 | * | 457 | * |
458 | * RETURNS: | 458 | * RETURNS: |
459 | * Pointer to the allocated area on success, NULL on failure. | 459 | * Pointer to the allocated area on success, NULL on failure. |
460 | */ | 460 | */ |
461 | static void *pcpu_mem_zalloc(size_t size) | 461 | static void *pcpu_mem_zalloc(size_t size, gfp_t gfp) |
462 | { | 462 | { |
463 | if (WARN_ON_ONCE(!slab_is_available())) | 463 | if (WARN_ON_ONCE(!slab_is_available())) |
464 | return NULL; | 464 | return NULL; |
465 | 465 | ||
466 | if (size <= PAGE_SIZE) | 466 | if (size <= PAGE_SIZE) |
467 | return kzalloc(size, GFP_KERNEL); | 467 | return kzalloc(size, gfp); |
468 | else | 468 | else |
469 | return vzalloc(size); | 469 | return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL); |
470 | } | 470 | } |
471 | 471 | ||
472 | /** | 472 | /** |
@@ -1154,12 +1154,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, | |||
1154 | return chunk; | 1154 | return chunk; |
1155 | } | 1155 | } |
1156 | 1156 | ||
1157 | static struct pcpu_chunk *pcpu_alloc_chunk(void) | 1157 | static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) |
1158 | { | 1158 | { |
1159 | struct pcpu_chunk *chunk; | 1159 | struct pcpu_chunk *chunk; |
1160 | int region_bits; | 1160 | int region_bits; |
1161 | 1161 | ||
1162 | chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size); | 1162 | chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp); |
1163 | if (!chunk) | 1163 | if (!chunk) |
1164 | return NULL; | 1164 | return NULL; |
1165 | 1165 | ||
@@ -1168,17 +1168,17 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) | |||
1168 | region_bits = pcpu_chunk_map_bits(chunk); | 1168 | region_bits = pcpu_chunk_map_bits(chunk); |
1169 | 1169 | ||
1170 | chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) * | 1170 | chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) * |
1171 | sizeof(chunk->alloc_map[0])); | 1171 | sizeof(chunk->alloc_map[0]), gfp); |
1172 | if (!chunk->alloc_map) | 1172 | if (!chunk->alloc_map) |
1173 | goto alloc_map_fail; | 1173 | goto alloc_map_fail; |
1174 | 1174 | ||
1175 | chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) * | 1175 | chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) * |
1176 | sizeof(chunk->bound_map[0])); | 1176 | sizeof(chunk->bound_map[0]), gfp); |
1177 | if (!chunk->bound_map) | 1177 | if (!chunk->bound_map) |
1178 | goto bound_map_fail; | 1178 | goto bound_map_fail; |
1179 | 1179 | ||
1180 | chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) * | 1180 | chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) * |
1181 | sizeof(chunk->md_blocks[0])); | 1181 | sizeof(chunk->md_blocks[0]), gfp); |
1182 | if (!chunk->md_blocks) | 1182 | if (!chunk->md_blocks) |
1183 | goto md_blocks_fail; | 1183 | goto md_blocks_fail; |
1184 | 1184 | ||
@@ -1277,9 +1277,11 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk, | |||
1277 | * pcpu_addr_to_page - translate address to physical address | 1277 | * pcpu_addr_to_page - translate address to physical address |
1278 | * pcpu_verify_alloc_info - check alloc_info is acceptable during init | 1278 | * pcpu_verify_alloc_info - check alloc_info is acceptable during init |
1279 | */ | 1279 | */ |
1280 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size); | 1280 | static int pcpu_populate_chunk(struct pcpu_chunk *chunk, |
1281 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size); | 1281 | int page_start, int page_end, gfp_t gfp); |
1282 | static struct pcpu_chunk *pcpu_create_chunk(void); | 1282 | static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, |
1283 | int page_start, int page_end); | ||
1284 | static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp); | ||
1283 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); | 1285 | static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); |
1284 | static struct page *pcpu_addr_to_page(void *addr); | 1286 | static struct page *pcpu_addr_to_page(void *addr); |
1285 | static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); | 1287 | static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); |
@@ -1339,6 +1341,8 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) | |||
1339 | static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, | 1341 | static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, |
1340 | gfp_t gfp) | 1342 | gfp_t gfp) |
1341 | { | 1343 | { |
1344 | /* whitelisted flags that can be passed to the backing allocators */ | ||
1345 | gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); | ||
1342 | bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; | 1346 | bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; |
1343 | bool do_warn = !(gfp & __GFP_NOWARN); | 1347 | bool do_warn = !(gfp & __GFP_NOWARN); |
1344 | static int warn_limit = 10; | 1348 | static int warn_limit = 10; |
@@ -1369,8 +1373,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, | |||
1369 | return NULL; | 1373 | return NULL; |
1370 | } | 1374 | } |
1371 | 1375 | ||
1372 | if (!is_atomic) | 1376 | if (!is_atomic) { |
1373 | mutex_lock(&pcpu_alloc_mutex); | 1377 | /* |
1378 | * pcpu_balance_workfn() allocates memory under this mutex, | ||
1379 | * and it may wait for memory reclaim. Allow current task | ||
1380 | * to become OOM victim, in case of memory pressure. | ||
1381 | */ | ||
1382 | if (gfp & __GFP_NOFAIL) | ||
1383 | mutex_lock(&pcpu_alloc_mutex); | ||
1384 | else if (mutex_lock_killable(&pcpu_alloc_mutex)) | ||
1385 | return NULL; | ||
1386 | } | ||
1374 | 1387 | ||
1375 | spin_lock_irqsave(&pcpu_lock, flags); | 1388 | spin_lock_irqsave(&pcpu_lock, flags); |
1376 | 1389 | ||
@@ -1421,7 +1434,7 @@ restart: | |||
1421 | } | 1434 | } |
1422 | 1435 | ||
1423 | if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { | 1436 | if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { |
1424 | chunk = pcpu_create_chunk(); | 1437 | chunk = pcpu_create_chunk(pcpu_gfp); |
1425 | if (!chunk) { | 1438 | if (!chunk) { |
1426 | err = "failed to allocate new chunk"; | 1439 | err = "failed to allocate new chunk"; |
1427 | goto fail; | 1440 | goto fail; |
@@ -1450,7 +1463,7 @@ area_found: | |||
1450 | page_start, page_end) { | 1463 | page_start, page_end) { |
1451 | WARN_ON(chunk->immutable); | 1464 | WARN_ON(chunk->immutable); |
1452 | 1465 | ||
1453 | ret = pcpu_populate_chunk(chunk, rs, re); | 1466 | ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp); |
1454 | 1467 | ||
1455 | spin_lock_irqsave(&pcpu_lock, flags); | 1468 | spin_lock_irqsave(&pcpu_lock, flags); |
1456 | if (ret) { | 1469 | if (ret) { |
@@ -1561,10 +1574,17 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) | |||
1561 | * pcpu_balance_workfn - manage the amount of free chunks and populated pages | 1574 | * pcpu_balance_workfn - manage the amount of free chunks and populated pages |
1562 | * @work: unused | 1575 | * @work: unused |
1563 | * | 1576 | * |
1564 | * Reclaim all fully free chunks except for the first one. | 1577 | * Reclaim all fully free chunks except for the first one. This is also |
1578 | * responsible for maintaining the pool of empty populated pages. However, | ||
1579 | * it is possible that this is called when physical memory is scarce causing | ||
1580 | * OOM killer to be triggered. We should avoid doing so until an actual | ||
1581 | * allocation causes the failure as it is possible that requests can be | ||
1582 | * serviced from already backed regions. | ||
1565 | */ | 1583 | */ |
1566 | static void pcpu_balance_workfn(struct work_struct *work) | 1584 | static void pcpu_balance_workfn(struct work_struct *work) |
1567 | { | 1585 | { |
1586 | /* gfp flags passed to underlying allocators */ | ||
1587 | const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; | ||
1568 | LIST_HEAD(to_free); | 1588 | LIST_HEAD(to_free); |
1569 | struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; | 1589 | struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; |
1570 | struct pcpu_chunk *chunk, *next; | 1590 | struct pcpu_chunk *chunk, *next; |
@@ -1600,6 +1620,7 @@ static void pcpu_balance_workfn(struct work_struct *work) | |||
1600 | spin_unlock_irq(&pcpu_lock); | 1620 | spin_unlock_irq(&pcpu_lock); |
1601 | } | 1621 | } |
1602 | pcpu_destroy_chunk(chunk); | 1622 | pcpu_destroy_chunk(chunk); |
1623 | cond_resched(); | ||
1603 | } | 1624 | } |
1604 | 1625 | ||
1605 | /* | 1626 | /* |
@@ -1645,7 +1666,7 @@ retry_pop: | |||
1645 | chunk->nr_pages) { | 1666 | chunk->nr_pages) { |
1646 | int nr = min(re - rs, nr_to_pop); | 1667 | int nr = min(re - rs, nr_to_pop); |
1647 | 1668 | ||
1648 | ret = pcpu_populate_chunk(chunk, rs, rs + nr); | 1669 | ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp); |
1649 | if (!ret) { | 1670 | if (!ret) { |
1650 | nr_to_pop -= nr; | 1671 | nr_to_pop -= nr; |
1651 | spin_lock_irq(&pcpu_lock); | 1672 | spin_lock_irq(&pcpu_lock); |
@@ -1662,7 +1683,7 @@ retry_pop: | |||
1662 | 1683 | ||
1663 | if (nr_to_pop) { | 1684 | if (nr_to_pop) { |
1664 | /* ran out of chunks to populate, create a new one and retry */ | 1685 | /* ran out of chunks to populate, create a new one and retry */ |
1665 | chunk = pcpu_create_chunk(); | 1686 | chunk = pcpu_create_chunk(gfp); |
1666 | if (chunk) { | 1687 | if (chunk) { |
1667 | spin_lock_irq(&pcpu_lock); | 1688 | spin_lock_irq(&pcpu_lock); |
1668 | pcpu_chunk_relocate(chunk, -1); | 1689 | pcpu_chunk_relocate(chunk, -1); |
diff --git a/mm/shmem.c b/mm/shmem.c index 1907688b75ee..b85919243399 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -493,36 +493,45 @@ next: | |||
493 | info = list_entry(pos, struct shmem_inode_info, shrinklist); | 493 | info = list_entry(pos, struct shmem_inode_info, shrinklist); |
494 | inode = &info->vfs_inode; | 494 | inode = &info->vfs_inode; |
495 | 495 | ||
496 | if (nr_to_split && split >= nr_to_split) { | 496 | if (nr_to_split && split >= nr_to_split) |
497 | iput(inode); | 497 | goto leave; |
498 | continue; | ||
499 | } | ||
500 | 498 | ||
501 | page = find_lock_page(inode->i_mapping, | 499 | page = find_get_page(inode->i_mapping, |
502 | (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT); | 500 | (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT); |
503 | if (!page) | 501 | if (!page) |
504 | goto drop; | 502 | goto drop; |
505 | 503 | ||
504 | /* No huge page at the end of the file: nothing to split */ | ||
506 | if (!PageTransHuge(page)) { | 505 | if (!PageTransHuge(page)) { |
507 | unlock_page(page); | ||
508 | put_page(page); | 506 | put_page(page); |
509 | goto drop; | 507 | goto drop; |
510 | } | 508 | } |
511 | 509 | ||
510 | /* | ||
511 | * Leave the inode on the list if we failed to lock | ||
512 | * the page at this time. | ||
513 | * | ||
514 | * Waiting for the lock may lead to deadlock in the | ||
515 | * reclaim path. | ||
516 | */ | ||
517 | if (!trylock_page(page)) { | ||
518 | put_page(page); | ||
519 | goto leave; | ||
520 | } | ||
521 | |||
512 | ret = split_huge_page(page); | 522 | ret = split_huge_page(page); |
513 | unlock_page(page); | 523 | unlock_page(page); |
514 | put_page(page); | 524 | put_page(page); |
515 | 525 | ||
516 | if (ret) { | 526 | /* If split failed leave the inode on the list */ |
517 | /* split failed: leave it on the list */ | 527 | if (ret) |
518 | iput(inode); | 528 | goto leave; |
519 | continue; | ||
520 | } | ||
521 | 529 | ||
522 | split++; | 530 | split++; |
523 | drop: | 531 | drop: |
524 | list_del_init(&info->shrinklist); | 532 | list_del_init(&info->shrinklist); |
525 | removed++; | 533 | removed++; |
534 | leave: | ||
526 | iput(inode); | 535 | iput(inode); |
527 | } | 536 | } |
528 | 537 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index bee53495a829..cd5dc3faaa57 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1780,6 +1780,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, | |||
1780 | set_bit(PGDAT_WRITEBACK, &pgdat->flags); | 1780 | set_bit(PGDAT_WRITEBACK, &pgdat->flags); |
1781 | 1781 | ||
1782 | /* | 1782 | /* |
1783 | * If dirty pages are scanned that are not queued for IO, it | ||
1784 | * implies that flushers are not doing their job. This can | ||
1785 | * happen when memory pressure pushes dirty pages to the end of | ||
1786 | * the LRU before the dirty limits are breached and the dirty | ||
1787 | * data has expired. It can also happen when the proportion of | ||
1788 | * dirty pages grows not through writes but through memory | ||
1789 | * pressure reclaiming all the clean cache. And in some cases, | ||
1790 | * the flushers simply cannot keep up with the allocation | ||
1791 | * rate. Nudge the flusher threads in case they are asleep. | ||
1792 | */ | ||
1793 | if (stat.nr_unqueued_dirty == nr_taken) | ||
1794 | wakeup_flusher_threads(WB_REASON_VMSCAN); | ||
1795 | |||
1796 | /* | ||
1783 | * Legacy memcg will stall in page writeback so avoid forcibly | 1797 | * Legacy memcg will stall in page writeback so avoid forcibly |
1784 | * stalling here. | 1798 | * stalling here. |
1785 | */ | 1799 | */ |
@@ -1791,22 +1805,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, | |||
1791 | if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested) | 1805 | if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested) |
1792 | set_bit(PGDAT_CONGESTED, &pgdat->flags); | 1806 | set_bit(PGDAT_CONGESTED, &pgdat->flags); |
1793 | 1807 | ||
1794 | /* | 1808 | /* Allow kswapd to start writing pages during reclaim. */ |
1795 | * If dirty pages are scanned that are not queued for IO, it | 1809 | if (stat.nr_unqueued_dirty == nr_taken) |
1796 | * implies that flushers are not doing their job. This can | ||
1797 | * happen when memory pressure pushes dirty pages to the end of | ||
1798 | * the LRU before the dirty limits are breached and the dirty | ||
1799 | * data has expired. It can also happen when the proportion of | ||
1800 | * dirty pages grows not through writes but through memory | ||
1801 | * pressure reclaiming all the clean cache. And in some cases, | ||
1802 | * the flushers simply cannot keep up with the allocation | ||
1803 | * rate. Nudge the flusher threads in case they are asleep, but | ||
1804 | * also allow kswapd to start writing pages during reclaim. | ||
1805 | */ | ||
1806 | if (stat.nr_unqueued_dirty == nr_taken) { | ||
1807 | wakeup_flusher_threads(WB_REASON_VMSCAN); | ||
1808 | set_bit(PGDAT_DIRTY, &pgdat->flags); | 1810 | set_bit(PGDAT_DIRTY, &pgdat->flags); |
1809 | } | ||
1810 | 1811 | ||
1811 | /* | 1812 | /* |
1812 | * If kswapd scans pages marked marked for immediate | 1813 | * If kswapd scans pages marked marked for immediate |