aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-03-22 21:48:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-03-22 21:48:43 -0400
commitf36b7534b83357cf52e747905de6d65b4f7c2512 (patch)
treeca52ebdc4aaa738bd464b22a06ed034e41c46acb
parent8401c72c593d2be8607d2a0a4551ee5c867d6f2f (diff)
parent9d3c3354bb85bab4d865fe95039443f09a4c8394 (diff)
Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "13 fixes" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: mm, thp: do not cause memcg oom for thp mm/vmscan: wake up flushers for legacy cgroups too Revert "mm: page_alloc: skip over regions of invalid pfns where possible" mm/shmem: do not wait for lock_page() in shmem_unused_huge_shrink() mm/thp: do not wait for lock_page() in deferred_split_scan() mm/khugepaged.c: convert VM_BUG_ON() to collapse fail x86/mm: implement free pmd/pte page interfaces mm/vmalloc: add interfaces to free unmapped page table h8300: remove extraneous __BIG_ENDIAN definition hugetlbfs: check for pgoff value overflow lockdep: fix fs_reclaim warning MAINTAINERS: update Mark Fasheh's e-mail mm/mempolicy.c: avoid use uninitialized preferred_node
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/arm64/mm/mmu.c10
-rw-r--r--arch/h8300/include/asm/byteorder.h1
-rw-r--r--arch/x86/mm/pgtable.c48
-rw-r--r--fs/hugetlbfs/inode.c17
-rw-r--r--include/asm-generic/pgtable.h10
-rw-r--r--include/linux/memblock.h1
-rw-r--r--lib/ioremap.c6
-rw-r--r--mm/huge_memory.c9
-rw-r--r--mm/hugetlb.c7
-rw-r--r--mm/khugepaged.c15
-rw-r--r--mm/memblock.c28
-rw-r--r--mm/mempolicy.c3
-rw-r--r--mm/page_alloc.c13
-rw-r--r--mm/shmem.c31
-rw-r--r--mm/vmscan.c31
16 files changed, 153 insertions, 79 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 4e62756936fa..73c0cdabf755 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10334,7 +10334,7 @@ F: drivers/oprofile/
10334F: include/linux/oprofile.h 10334F: include/linux/oprofile.h
10335 10335
10336ORACLE CLUSTER FILESYSTEM 2 (OCFS2) 10336ORACLE CLUSTER FILESYSTEM 2 (OCFS2)
10337M: Mark Fasheh <mfasheh@versity.com> 10337M: Mark Fasheh <mark@fasheh.com>
10338M: Joel Becker <jlbec@evilplan.org> 10338M: Joel Becker <jlbec@evilplan.org>
10339L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers) 10339L: ocfs2-devel@oss.oracle.com (moderated for non-subscribers)
10340W: http://ocfs2.wiki.kernel.org 10340W: http://ocfs2.wiki.kernel.org
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 8c704f1e53c2..2dbb2c9f1ec1 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -972,3 +972,13 @@ int pmd_clear_huge(pmd_t *pmdp)
972 pmd_clear(pmdp); 972 pmd_clear(pmdp);
973 return 1; 973 return 1;
974} 974}
975
976int pud_free_pmd_page(pud_t *pud)
977{
978 return pud_none(*pud);
979}
980
981int pmd_free_pte_page(pmd_t *pmd)
982{
983 return pmd_none(*pmd);
984}
diff --git a/arch/h8300/include/asm/byteorder.h b/arch/h8300/include/asm/byteorder.h
index ecff2d1ca5a3..6eaa7ad5fc2c 100644
--- a/arch/h8300/include/asm/byteorder.h
+++ b/arch/h8300/include/asm/byteorder.h
@@ -2,7 +2,6 @@
2#ifndef __H8300_BYTEORDER_H__ 2#ifndef __H8300_BYTEORDER_H__
3#define __H8300_BYTEORDER_H__ 3#define __H8300_BYTEORDER_H__
4 4
5#define __BIG_ENDIAN __ORDER_BIG_ENDIAN__
6#include <linux/byteorder/big_endian.h> 5#include <linux/byteorder/big_endian.h>
7 6
8#endif 7#endif
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 004abf9ebf12..34cda7e0551b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -702,4 +702,52 @@ int pmd_clear_huge(pmd_t *pmd)
702 702
703 return 0; 703 return 0;
704} 704}
705
706/**
707 * pud_free_pmd_page - Clear pud entry and free pmd page.
708 * @pud: Pointer to a PUD.
709 *
710 * Context: The pud range has been unmaped and TLB purged.
711 * Return: 1 if clearing the entry succeeded. 0 otherwise.
712 */
713int pud_free_pmd_page(pud_t *pud)
714{
715 pmd_t *pmd;
716 int i;
717
718 if (pud_none(*pud))
719 return 1;
720
721 pmd = (pmd_t *)pud_page_vaddr(*pud);
722
723 for (i = 0; i < PTRS_PER_PMD; i++)
724 if (!pmd_free_pte_page(&pmd[i]))
725 return 0;
726
727 pud_clear(pud);
728 free_page((unsigned long)pmd);
729
730 return 1;
731}
732
733/**
734 * pmd_free_pte_page - Clear pmd entry and free pte page.
735 * @pmd: Pointer to a PMD.
736 *
737 * Context: The pmd range has been unmaped and TLB purged.
738 * Return: 1 if clearing the entry succeeded. 0 otherwise.
739 */
740int pmd_free_pte_page(pmd_t *pmd)
741{
742 pte_t *pte;
743
744 if (pmd_none(*pmd))
745 return 1;
746
747 pte = (pte_t *)pmd_page_vaddr(*pmd);
748 pmd_clear(pmd);
749 free_page((unsigned long)pte);
750
751 return 1;
752}
705#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ 753#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 8fe1b0aa2896..b9a254dcc0e7 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -108,6 +108,16 @@ static void huge_pagevec_release(struct pagevec *pvec)
108 pagevec_reinit(pvec); 108 pagevec_reinit(pvec);
109} 109}
110 110
111/*
112 * Mask used when checking the page offset value passed in via system
113 * calls. This value will be converted to a loff_t which is signed.
114 * Therefore, we want to check the upper PAGE_SHIFT + 1 bits of the
115 * value. The extra bit (- 1 in the shift value) is to take the sign
116 * bit into account.
117 */
118#define PGOFF_LOFFT_MAX \
119 (((1UL << (PAGE_SHIFT + 1)) - 1) << (BITS_PER_LONG - (PAGE_SHIFT + 1)))
120
111static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) 121static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
112{ 122{
113 struct inode *inode = file_inode(file); 123 struct inode *inode = file_inode(file);
@@ -127,12 +137,13 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
127 vma->vm_ops = &hugetlb_vm_ops; 137 vma->vm_ops = &hugetlb_vm_ops;
128 138
129 /* 139 /*
130 * Offset passed to mmap (before page shift) could have been 140 * page based offset in vm_pgoff could be sufficiently large to
131 * negative when represented as a (l)off_t. 141 * overflow a (l)off_t when converted to byte offset.
132 */ 142 */
133 if (((loff_t)vma->vm_pgoff << PAGE_SHIFT) < 0) 143 if (vma->vm_pgoff & PGOFF_LOFFT_MAX)
134 return -EINVAL; 144 return -EINVAL;
135 145
146 /* must be huge page aligned */
136 if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) 147 if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
137 return -EINVAL; 148 return -EINVAL;
138 149
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 2cfa3075d148..bfbb44a5ad38 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -983,6 +983,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot);
983int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); 983int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot);
984int pud_clear_huge(pud_t *pud); 984int pud_clear_huge(pud_t *pud);
985int pmd_clear_huge(pmd_t *pmd); 985int pmd_clear_huge(pmd_t *pmd);
986int pud_free_pmd_page(pud_t *pud);
987int pmd_free_pte_page(pmd_t *pmd);
986#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ 988#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
987static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) 989static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
988{ 990{
@@ -1008,6 +1010,14 @@ static inline int pmd_clear_huge(pmd_t *pmd)
1008{ 1010{
1009 return 0; 1011 return 0;
1010} 1012}
1013static inline int pud_free_pmd_page(pud_t *pud)
1014{
1015 return 0;
1016}
1017static inline int pmd_free_pte_page(pmd_t *pmd)
1018{
1019 return 0;
1020}
1011#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ 1021#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
1012 1022
1013#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE 1023#ifndef __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 8be5077efb5f..f92ea7783652 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -187,7 +187,6 @@ int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
187 unsigned long *end_pfn); 187 unsigned long *end_pfn);
188void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn, 188void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
189 unsigned long *out_end_pfn, int *out_nid); 189 unsigned long *out_end_pfn, int *out_nid);
190unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn);
191 190
192/** 191/**
193 * for_each_mem_pfn_range - early memory pfn range iterator 192 * for_each_mem_pfn_range - early memory pfn range iterator
diff --git a/lib/ioremap.c b/lib/ioremap.c
index b808a390e4c3..54e5bbaa3200 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -91,7 +91,8 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
91 91
92 if (ioremap_pmd_enabled() && 92 if (ioremap_pmd_enabled() &&
93 ((next - addr) == PMD_SIZE) && 93 ((next - addr) == PMD_SIZE) &&
94 IS_ALIGNED(phys_addr + addr, PMD_SIZE)) { 94 IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
95 pmd_free_pte_page(pmd)) {
95 if (pmd_set_huge(pmd, phys_addr + addr, prot)) 96 if (pmd_set_huge(pmd, phys_addr + addr, prot))
96 continue; 97 continue;
97 } 98 }
@@ -117,7 +118,8 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
117 118
118 if (ioremap_pud_enabled() && 119 if (ioremap_pud_enabled() &&
119 ((next - addr) == PUD_SIZE) && 120 ((next - addr) == PUD_SIZE) &&
120 IS_ALIGNED(phys_addr + addr, PUD_SIZE)) { 121 IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
122 pud_free_pmd_page(pud)) {
121 if (pud_set_huge(pud, phys_addr + addr, prot)) 123 if (pud_set_huge(pud, phys_addr + addr, prot))
122 continue; 124 continue;
123 } 125 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 87ab9b8f56b5..5a68730eebd6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -555,7 +555,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
555 555
556 VM_BUG_ON_PAGE(!PageCompound(page), page); 556 VM_BUG_ON_PAGE(!PageCompound(page), page);
557 557
558 if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) { 558 if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,
559 true)) {
559 put_page(page); 560 put_page(page);
560 count_vm_event(THP_FAULT_FALLBACK); 561 count_vm_event(THP_FAULT_FALLBACK);
561 return VM_FAULT_FALLBACK; 562 return VM_FAULT_FALLBACK;
@@ -1316,7 +1317,7 @@ alloc:
1316 } 1317 }
1317 1318
1318 if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm, 1319 if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
1319 huge_gfp, &memcg, true))) { 1320 huge_gfp | __GFP_NORETRY, &memcg, true))) {
1320 put_page(new_page); 1321 put_page(new_page);
1321 split_huge_pmd(vma, vmf->pmd, vmf->address); 1322 split_huge_pmd(vma, vmf->pmd, vmf->address);
1322 if (page) 1323 if (page)
@@ -2783,11 +2784,13 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
2783 2784
2784 list_for_each_safe(pos, next, &list) { 2785 list_for_each_safe(pos, next, &list) {
2785 page = list_entry((void *)pos, struct page, mapping); 2786 page = list_entry((void *)pos, struct page, mapping);
2786 lock_page(page); 2787 if (!trylock_page(page))
2788 goto next;
2787 /* split_huge_page() removes page from list on success */ 2789 /* split_huge_page() removes page from list on success */
2788 if (!split_huge_page(page)) 2790 if (!split_huge_page(page))
2789 split++; 2791 split++;
2790 unlock_page(page); 2792 unlock_page(page);
2793next:
2791 put_page(page); 2794 put_page(page);
2792 } 2795 }
2793 2796
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a963f2034dfc..976bbc5646fe 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -18,6 +18,7 @@
18#include <linux/bootmem.h> 18#include <linux/bootmem.h>
19#include <linux/sysfs.h> 19#include <linux/sysfs.h>
20#include <linux/slab.h> 20#include <linux/slab.h>
21#include <linux/mmdebug.h>
21#include <linux/sched/signal.h> 22#include <linux/sched/signal.h>
22#include <linux/rmap.h> 23#include <linux/rmap.h>
23#include <linux/string_helpers.h> 24#include <linux/string_helpers.h>
@@ -4374,6 +4375,12 @@ int hugetlb_reserve_pages(struct inode *inode,
4374 struct resv_map *resv_map; 4375 struct resv_map *resv_map;
4375 long gbl_reserve; 4376 long gbl_reserve;
4376 4377
4378 /* This should never happen */
4379 if (from > to) {
4380 VM_WARN(1, "%s called with a negative range\n", __func__);
4381 return -EINVAL;
4382 }
4383
4377 /* 4384 /*
4378 * Only apply hugepage reservation if asked. At fault time, an 4385 * Only apply hugepage reservation if asked. At fault time, an
4379 * attempt will be made for VM_NORESERVE to allocate a page 4386 * attempt will be made for VM_NORESERVE to allocate a page
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b7e2268dfc9a..e42568284e06 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -530,7 +530,12 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
530 goto out; 530 goto out;
531 } 531 }
532 532
533 VM_BUG_ON_PAGE(PageCompound(page), page); 533 /* TODO: teach khugepaged to collapse THP mapped with pte */
534 if (PageCompound(page)) {
535 result = SCAN_PAGE_COMPOUND;
536 goto out;
537 }
538
534 VM_BUG_ON_PAGE(!PageAnon(page), page); 539 VM_BUG_ON_PAGE(!PageAnon(page), page);
535 540
536 /* 541 /*
@@ -960,7 +965,9 @@ static void collapse_huge_page(struct mm_struct *mm,
960 goto out_nolock; 965 goto out_nolock;
961 } 966 }
962 967
963 if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) { 968 /* Do not oom kill for khugepaged charges */
969 if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
970 &memcg, true))) {
964 result = SCAN_CGROUP_CHARGE_FAIL; 971 result = SCAN_CGROUP_CHARGE_FAIL;
965 goto out_nolock; 972 goto out_nolock;
966 } 973 }
@@ -1319,7 +1326,9 @@ static void collapse_shmem(struct mm_struct *mm,
1319 goto out; 1326 goto out;
1320 } 1327 }
1321 1328
1322 if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) { 1329 /* Do not oom kill for khugepaged charges */
1330 if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
1331 &memcg, true))) {
1323 result = SCAN_CGROUP_CHARGE_FAIL; 1332 result = SCAN_CGROUP_CHARGE_FAIL;
1324 goto out; 1333 goto out;
1325 } 1334 }
diff --git a/mm/memblock.c b/mm/memblock.c
index b6ba6b7adadc..48376bd33274 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1101,34 +1101,6 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
1101 *out_nid = r->nid; 1101 *out_nid = r->nid;
1102} 1102}
1103 1103
1104unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
1105 unsigned long max_pfn)
1106{
1107 struct memblock_type *type = &memblock.memory;
1108 unsigned int right = type->cnt;
1109 unsigned int mid, left = 0;
1110 phys_addr_t addr = PFN_PHYS(++pfn);
1111
1112 do {
1113 mid = (right + left) / 2;
1114
1115 if (addr < type->regions[mid].base)
1116 right = mid;
1117 else if (addr >= (type->regions[mid].base +
1118 type->regions[mid].size))
1119 left = mid + 1;
1120 else {
1121 /* addr is within the region, so pfn is valid */
1122 return pfn;
1123 }
1124 } while (left < right);
1125
1126 if (right == type->cnt)
1127 return -1UL;
1128 else
1129 return PHYS_PFN(type->regions[right].base);
1130}
1131
1132/** 1104/**
1133 * memblock_set_node - set node ID on memblock regions 1105 * memblock_set_node - set node ID on memblock regions
1134 * @base: base of area to set node ID for 1106 * @base: base of area to set node ID for
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d879f1d8a44a..32cba0332787 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2124,6 +2124,9 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
2124 case MPOL_INTERLEAVE: 2124 case MPOL_INTERLEAVE:
2125 return !!nodes_equal(a->v.nodes, b->v.nodes); 2125 return !!nodes_equal(a->v.nodes, b->v.nodes);
2126 case MPOL_PREFERRED: 2126 case MPOL_PREFERRED:
2127 /* a's ->flags is the same as b's */
2128 if (a->flags & MPOL_F_LOCAL)
2129 return true;
2127 return a->v.preferred_node == b->v.preferred_node; 2130 return a->v.preferred_node == b->v.preferred_node;
2128 default: 2131 default:
2129 BUG(); 2132 BUG();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 635d7dd29d7f..1741dd23e7c1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3596,7 +3596,7 @@ static bool __need_fs_reclaim(gfp_t gfp_mask)
3596 return false; 3596 return false;
3597 3597
3598 /* this guy won't enter reclaim */ 3598 /* this guy won't enter reclaim */
3599 if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) 3599 if (current->flags & PF_MEMALLOC)
3600 return false; 3600 return false;
3601 3601
3602 /* We're only interested __GFP_FS allocations for now */ 3602 /* We're only interested __GFP_FS allocations for now */
@@ -5356,17 +5356,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
5356 if (context != MEMMAP_EARLY) 5356 if (context != MEMMAP_EARLY)
5357 goto not_early; 5357 goto not_early;
5358 5358
5359 if (!early_pfn_valid(pfn)) { 5359 if (!early_pfn_valid(pfn))
5360#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
5361 /*
5362 * Skip to the pfn preceding the next valid one (or
5363 * end_pfn), such that we hit a valid pfn (or end_pfn)
5364 * on our next iteration of the loop.
5365 */
5366 pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
5367#endif
5368 continue; 5360 continue;
5369 }
5370 if (!early_pfn_in_nid(pfn, nid)) 5361 if (!early_pfn_in_nid(pfn, nid))
5371 continue; 5362 continue;
5372 if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised)) 5363 if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
diff --git a/mm/shmem.c b/mm/shmem.c
index 1907688b75ee..b85919243399 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -493,36 +493,45 @@ next:
493 info = list_entry(pos, struct shmem_inode_info, shrinklist); 493 info = list_entry(pos, struct shmem_inode_info, shrinklist);
494 inode = &info->vfs_inode; 494 inode = &info->vfs_inode;
495 495
496 if (nr_to_split && split >= nr_to_split) { 496 if (nr_to_split && split >= nr_to_split)
497 iput(inode); 497 goto leave;
498 continue;
499 }
500 498
501 page = find_lock_page(inode->i_mapping, 499 page = find_get_page(inode->i_mapping,
502 (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT); 500 (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
503 if (!page) 501 if (!page)
504 goto drop; 502 goto drop;
505 503
504 /* No huge page at the end of the file: nothing to split */
506 if (!PageTransHuge(page)) { 505 if (!PageTransHuge(page)) {
507 unlock_page(page);
508 put_page(page); 506 put_page(page);
509 goto drop; 507 goto drop;
510 } 508 }
511 509
510 /*
511 * Leave the inode on the list if we failed to lock
512 * the page at this time.
513 *
514 * Waiting for the lock may lead to deadlock in the
515 * reclaim path.
516 */
517 if (!trylock_page(page)) {
518 put_page(page);
519 goto leave;
520 }
521
512 ret = split_huge_page(page); 522 ret = split_huge_page(page);
513 unlock_page(page); 523 unlock_page(page);
514 put_page(page); 524 put_page(page);
515 525
516 if (ret) { 526 /* If split failed leave the inode on the list */
517 /* split failed: leave it on the list */ 527 if (ret)
518 iput(inode); 528 goto leave;
519 continue;
520 }
521 529
522 split++; 530 split++;
523drop: 531drop:
524 list_del_init(&info->shrinklist); 532 list_del_init(&info->shrinklist);
525 removed++; 533 removed++;
534leave:
526 iput(inode); 535 iput(inode);
527 } 536 }
528 537
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bee53495a829..cd5dc3faaa57 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1780,6 +1780,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
1780 set_bit(PGDAT_WRITEBACK, &pgdat->flags); 1780 set_bit(PGDAT_WRITEBACK, &pgdat->flags);
1781 1781
1782 /* 1782 /*
1783 * If dirty pages are scanned that are not queued for IO, it
1784 * implies that flushers are not doing their job. This can
1785 * happen when memory pressure pushes dirty pages to the end of
1786 * the LRU before the dirty limits are breached and the dirty
1787 * data has expired. It can also happen when the proportion of
1788 * dirty pages grows not through writes but through memory
1789 * pressure reclaiming all the clean cache. And in some cases,
1790 * the flushers simply cannot keep up with the allocation
1791 * rate. Nudge the flusher threads in case they are asleep.
1792 */
1793 if (stat.nr_unqueued_dirty == nr_taken)
1794 wakeup_flusher_threads(WB_REASON_VMSCAN);
1795
1796 /*
1783 * Legacy memcg will stall in page writeback so avoid forcibly 1797 * Legacy memcg will stall in page writeback so avoid forcibly
1784 * stalling here. 1798 * stalling here.
1785 */ 1799 */
@@ -1791,22 +1805,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
1791 if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested) 1805 if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested)
1792 set_bit(PGDAT_CONGESTED, &pgdat->flags); 1806 set_bit(PGDAT_CONGESTED, &pgdat->flags);
1793 1807
1794 /* 1808 /* Allow kswapd to start writing pages during reclaim. */
1795 * If dirty pages are scanned that are not queued for IO, it 1809 if (stat.nr_unqueued_dirty == nr_taken)
1796 * implies that flushers are not doing their job. This can
1797 * happen when memory pressure pushes dirty pages to the end of
1798 * the LRU before the dirty limits are breached and the dirty
1799 * data has expired. It can also happen when the proportion of
1800 * dirty pages grows not through writes but through memory
1801 * pressure reclaiming all the clean cache. And in some cases,
1802 * the flushers simply cannot keep up with the allocation
1803 * rate. Nudge the flusher threads in case they are asleep, but
1804 * also allow kswapd to start writing pages during reclaim.
1805 */
1806 if (stat.nr_unqueued_dirty == nr_taken) {
1807 wakeup_flusher_threads(WB_REASON_VMSCAN);
1808 set_bit(PGDAT_DIRTY, &pgdat->flags); 1810 set_bit(PGDAT_DIRTY, &pgdat->flags);
1809 }
1810 1811
1811 /* 1812 /*
1812 * If kswapd scans pages marked marked for immediate 1813 * If kswapd scans pages marked marked for immediate