aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c44
-rw-r--r--mm/memblock.c2
-rw-r--r--mm/memcontrol.c67
-rw-r--r--mm/memory-failure.c94
-rw-r--r--mm/memory.c32
-rw-r--r--mm/migrate.c7
-rw-r--r--mm/mlock.c7
-rw-r--r--mm/vmscan.c4
8 files changed, 169 insertions, 88 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index e187454d82f6..3e29781ee762 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1162,7 +1162,12 @@ static void __split_huge_page_refcount(struct page *page)
1162 /* after clearing PageTail the gup refcount can be released */ 1162 /* after clearing PageTail the gup refcount can be released */
1163 smp_mb(); 1163 smp_mb();
1164 1164
1165 page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; 1165 /*
1166 * retain hwpoison flag of the poisoned tail page:
1167 * fix for the unsuitable process killed on Guest Machine(KVM)
1168 * by the memory-failure.
1169 */
1170 page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON;
1166 page_tail->flags |= (page->flags & 1171 page_tail->flags |= (page->flags &
1167 ((1L << PG_referenced) | 1172 ((1L << PG_referenced) |
1168 (1L << PG_swapbacked) | 1173 (1L << PG_swapbacked) |
@@ -1806,6 +1811,8 @@ static void collapse_huge_page(struct mm_struct *mm,
1806 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ 1811 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */
1807 if (!vma->anon_vma || vma->vm_ops || vma->vm_file) 1812 if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
1808 goto out; 1813 goto out;
1814 if (is_vma_temporary_stack(vma))
1815 goto out;
1809 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); 1816 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
1810 1817
1811 pgd = pgd_offset(mm, address); 1818 pgd = pgd_offset(mm, address);
@@ -1847,7 +1854,6 @@ static void collapse_huge_page(struct mm_struct *mm,
1847 set_pmd_at(mm, address, pmd, _pmd); 1854 set_pmd_at(mm, address, pmd, _pmd);
1848 spin_unlock(&mm->page_table_lock); 1855 spin_unlock(&mm->page_table_lock);
1849 anon_vma_unlock(vma->anon_vma); 1856 anon_vma_unlock(vma->anon_vma);
1850 mem_cgroup_uncharge_page(new_page);
1851 goto out; 1857 goto out;
1852 } 1858 }
1853 1859
@@ -1893,6 +1899,7 @@ out_up_write:
1893 return; 1899 return;
1894 1900
1895out: 1901out:
1902 mem_cgroup_uncharge_page(new_page);
1896#ifdef CONFIG_NUMA 1903#ifdef CONFIG_NUMA
1897 put_page(new_page); 1904 put_page(new_page);
1898#endif 1905#endif
@@ -2027,32 +2034,27 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
2027 if ((!(vma->vm_flags & VM_HUGEPAGE) && 2034 if ((!(vma->vm_flags & VM_HUGEPAGE) &&
2028 !khugepaged_always()) || 2035 !khugepaged_always()) ||
2029 (vma->vm_flags & VM_NOHUGEPAGE)) { 2036 (vma->vm_flags & VM_NOHUGEPAGE)) {
2037 skip:
2030 progress++; 2038 progress++;
2031 continue; 2039 continue;
2032 } 2040 }
2033
2034 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ 2041 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */
2035 if (!vma->anon_vma || vma->vm_ops || vma->vm_file) { 2042 if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
2036 khugepaged_scan.address = vma->vm_end; 2043 goto skip;
2037 progress++; 2044 if (is_vma_temporary_stack(vma))
2038 continue; 2045 goto skip;
2039 } 2046
2040 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); 2047 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
2041 2048
2042 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 2049 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
2043 hend = vma->vm_end & HPAGE_PMD_MASK; 2050 hend = vma->vm_end & HPAGE_PMD_MASK;
2044 if (hstart >= hend) { 2051 if (hstart >= hend)
2045 progress++; 2052 goto skip;
2046 continue; 2053 if (khugepaged_scan.address > hend)
2047 } 2054 goto skip;
2048 if (khugepaged_scan.address < hstart) 2055 if (khugepaged_scan.address < hstart)
2049 khugepaged_scan.address = hstart; 2056 khugepaged_scan.address = hstart;
2050 if (khugepaged_scan.address > hend) { 2057 VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
2051 khugepaged_scan.address = hend + HPAGE_PMD_SIZE;
2052 progress++;
2053 continue;
2054 }
2055 BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
2056 2058
2057 while (khugepaged_scan.address < hend) { 2059 while (khugepaged_scan.address < hend) {
2058 int ret; 2060 int ret;
@@ -2081,7 +2083,7 @@ breakouterloop:
2081breakouterloop_mmap_sem: 2083breakouterloop_mmap_sem:
2082 2084
2083 spin_lock(&khugepaged_mm_lock); 2085 spin_lock(&khugepaged_mm_lock);
2084 BUG_ON(khugepaged_scan.mm_slot != mm_slot); 2086 VM_BUG_ON(khugepaged_scan.mm_slot != mm_slot);
2085 /* 2087 /*
2086 * Release the current mm_slot if this mm is about to die, or 2088 * Release the current mm_slot if this mm is about to die, or
2087 * if we scanned all vmas of this mm. 2089 * if we scanned all vmas of this mm.
@@ -2236,9 +2238,9 @@ static int khugepaged(void *none)
2236 2238
2237 for (;;) { 2239 for (;;) {
2238 mutex_unlock(&khugepaged_mutex); 2240 mutex_unlock(&khugepaged_mutex);
2239 BUG_ON(khugepaged_thread != current); 2241 VM_BUG_ON(khugepaged_thread != current);
2240 khugepaged_loop(); 2242 khugepaged_loop();
2241 BUG_ON(khugepaged_thread != current); 2243 VM_BUG_ON(khugepaged_thread != current);
2242 2244
2243 mutex_lock(&khugepaged_mutex); 2245 mutex_lock(&khugepaged_mutex);
2244 if (!khugepaged_enabled()) 2246 if (!khugepaged_enabled())
diff --git a/mm/memblock.c b/mm/memblock.c
index bdba245d8afd..4618fda975a0 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -137,8 +137,6 @@ static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size,
137 137
138 BUG_ON(0 == size); 138 BUG_ON(0 == size);
139 139
140 size = memblock_align_up(size, align);
141
142 /* Pump up max_addr */ 140 /* Pump up max_addr */
143 if (end == MEMBLOCK_ALLOC_ACCESSIBLE) 141 if (end == MEMBLOCK_ALLOC_ACCESSIBLE)
144 end = memblock.current_limit; 142 end = memblock.current_limit;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3878cfe399dc..da53a252b259 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -612,8 +612,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem,
612 /* pagein of a big page is an event. So, ignore page size */ 612 /* pagein of a big page is an event. So, ignore page size */
613 if (nr_pages > 0) 613 if (nr_pages > 0)
614 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); 614 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]);
615 else 615 else {
616 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); 616 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]);
617 nr_pages = -nr_pages; /* for event */
618 }
617 619
618 __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages); 620 __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages);
619 621
@@ -1111,6 +1113,23 @@ static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem)
1111 return false; 1113 return false;
1112} 1114}
1113 1115
1116/**
1117 * mem_cgroup_check_margin - check if the memory cgroup allows charging
1118 * @mem: memory cgroup to check
1119 * @bytes: the number of bytes the caller intends to charge
1120 *
1121 * Returns a boolean value on whether @mem can be charged @bytes or
1122 * whether this would exceed the limit.
1123 */
1124static bool mem_cgroup_check_margin(struct mem_cgroup *mem, unsigned long bytes)
1125{
1126 if (!res_counter_check_margin(&mem->res, bytes))
1127 return false;
1128 if (do_swap_account && !res_counter_check_margin(&mem->memsw, bytes))
1129 return false;
1130 return true;
1131}
1132
1114static unsigned int get_swappiness(struct mem_cgroup *memcg) 1133static unsigned int get_swappiness(struct mem_cgroup *memcg)
1115{ 1134{
1116 struct cgroup *cgrp = memcg->css.cgroup; 1135 struct cgroup *cgrp = memcg->css.cgroup;
@@ -1837,23 +1856,34 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask,
1837 flags |= MEM_CGROUP_RECLAIM_NOSWAP; 1856 flags |= MEM_CGROUP_RECLAIM_NOSWAP;
1838 } else 1857 } else
1839 mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); 1858 mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
1840 1859 /*
1841 if (csize > PAGE_SIZE) /* change csize and retry */ 1860 * csize can be either a huge page (HPAGE_SIZE), a batch of
1861 * regular pages (CHARGE_SIZE), or a single regular page
1862 * (PAGE_SIZE).
1863 *
1864 * Never reclaim on behalf of optional batching, retry with a
1865 * single page instead.
1866 */
1867 if (csize == CHARGE_SIZE)
1842 return CHARGE_RETRY; 1868 return CHARGE_RETRY;
1843 1869
1844 if (!(gfp_mask & __GFP_WAIT)) 1870 if (!(gfp_mask & __GFP_WAIT))
1845 return CHARGE_WOULDBLOCK; 1871 return CHARGE_WOULDBLOCK;
1846 1872
1847 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, 1873 ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
1848 gfp_mask, flags); 1874 gfp_mask, flags);
1875 if (mem_cgroup_check_margin(mem_over_limit, csize))
1876 return CHARGE_RETRY;
1849 /* 1877 /*
1850 * try_to_free_mem_cgroup_pages() might not give us a full 1878 * Even though the limit is exceeded at this point, reclaim
1851 * picture of reclaim. Some pages are reclaimed and might be 1879 * may have been able to free some pages. Retry the charge
1852 * moved to swap cache or just unmapped from the cgroup. 1880 * before killing the task.
1853 * Check the limit again to see if the reclaim reduced the 1881 *
1854 * current usage of the cgroup before giving up 1882 * Only for regular pages, though: huge pages are rather
1883 * unlikely to succeed so close to the limit, and we fall back
1884 * to regular pages anyway in case of failure.
1855 */ 1885 */
1856 if (ret || mem_cgroup_check_under_limit(mem_over_limit)) 1886 if (csize == PAGE_SIZE && ret)
1857 return CHARGE_RETRY; 1887 return CHARGE_RETRY;
1858 1888
1859 /* 1889 /*
@@ -2323,13 +2353,19 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
2323 gfp_t gfp_mask, enum charge_type ctype) 2353 gfp_t gfp_mask, enum charge_type ctype)
2324{ 2354{
2325 struct mem_cgroup *mem = NULL; 2355 struct mem_cgroup *mem = NULL;
2356 int page_size = PAGE_SIZE;
2326 struct page_cgroup *pc; 2357 struct page_cgroup *pc;
2358 bool oom = true;
2327 int ret; 2359 int ret;
2328 int page_size = PAGE_SIZE;
2329 2360
2330 if (PageTransHuge(page)) { 2361 if (PageTransHuge(page)) {
2331 page_size <<= compound_order(page); 2362 page_size <<= compound_order(page);
2332 VM_BUG_ON(!PageTransHuge(page)); 2363 VM_BUG_ON(!PageTransHuge(page));
2364 /*
2365 * Never OOM-kill a process for a huge page. The
2366 * fault handler will fall back to regular pages.
2367 */
2368 oom = false;
2333 } 2369 }
2334 2370
2335 pc = lookup_page_cgroup(page); 2371 pc = lookup_page_cgroup(page);
@@ -2338,7 +2374,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
2338 return 0; 2374 return 0;
2339 prefetchw(pc); 2375 prefetchw(pc);
2340 2376
2341 ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page_size); 2377 ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, oom, page_size);
2342 if (ret || !mem) 2378 if (ret || !mem)
2343 return ret; 2379 return ret;
2344 2380
@@ -5024,9 +5060,9 @@ struct cgroup_subsys mem_cgroup_subsys = {
5024static int __init enable_swap_account(char *s) 5060static int __init enable_swap_account(char *s)
5025{ 5061{
5026 /* consider enabled if no parameter or 1 is given */ 5062 /* consider enabled if no parameter or 1 is given */
5027 if (!s || !strcmp(s, "1")) 5063 if (!(*s) || !strcmp(s, "=1"))
5028 really_do_swap_account = 1; 5064 really_do_swap_account = 1;
5029 else if (!strcmp(s, "0")) 5065 else if (!strcmp(s, "=0"))
5030 really_do_swap_account = 0; 5066 really_do_swap_account = 0;
5031 return 1; 5067 return 1;
5032} 5068}
@@ -5034,7 +5070,8 @@ __setup("swapaccount", enable_swap_account);
5034 5070
5035static int __init disable_swap_account(char *s) 5071static int __init disable_swap_account(char *s)
5036{ 5072{
5037 enable_swap_account("0"); 5073 printk_once("noswapaccount is deprecated and will be removed in 2.6.40. Use swapaccount=0 instead\n");
5074 enable_swap_account("=0");
5038 return 1; 5075 return 1;
5039} 5076}
5040__setup("noswapaccount", disable_swap_account); 5077__setup("noswapaccount", disable_swap_account);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 548fbd70f026..0207c2f6f8bd 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -233,8 +233,8 @@ void shake_page(struct page *p, int access)
233 } 233 }
234 234
235 /* 235 /*
236 * Only all shrink_slab here (which would also 236 * Only call shrink_slab here (which would also shrink other caches) if
237 * shrink other caches) if access is not potentially fatal. 237 * access is not potentially fatal.
238 */ 238 */
239 if (access) { 239 if (access) {
240 int nr; 240 int nr;
@@ -386,8 +386,6 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
386 struct task_struct *tsk; 386 struct task_struct *tsk;
387 struct anon_vma *av; 387 struct anon_vma *av;
388 388
389 if (!PageHuge(page) && unlikely(split_huge_page(page)))
390 return;
391 read_lock(&tasklist_lock); 389 read_lock(&tasklist_lock);
392 av = page_lock_anon_vma(page); 390 av = page_lock_anon_vma(page);
393 if (av == NULL) /* Not actually mapped anymore */ 391 if (av == NULL) /* Not actually mapped anymore */
@@ -856,6 +854,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
856 int ret; 854 int ret;
857 int kill = 1; 855 int kill = 1;
858 struct page *hpage = compound_head(p); 856 struct page *hpage = compound_head(p);
857 struct page *ppage;
859 858
860 if (PageReserved(p) || PageSlab(p)) 859 if (PageReserved(p) || PageSlab(p))
861 return SWAP_SUCCESS; 860 return SWAP_SUCCESS;
@@ -897,6 +896,44 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
897 } 896 }
898 897
899 /* 898 /*
899 * ppage: poisoned page
900 * if p is regular page(4k page)
901 * ppage == real poisoned page;
902 * else p is hugetlb or THP, ppage == head page.
903 */
904 ppage = hpage;
905
906 if (PageTransHuge(hpage)) {
907 /*
908 * Verify that this isn't a hugetlbfs head page, the check for
909 * PageAnon is just for avoid tripping a split_huge_page
910 * internal debug check, as split_huge_page refuses to deal with
911 * anything that isn't an anon page. PageAnon can't go away fro
912 * under us because we hold a refcount on the hpage, without a
913 * refcount on the hpage. split_huge_page can't be safely called
914 * in the first place, having a refcount on the tail isn't
915 * enough * to be safe.
916 */
917 if (!PageHuge(hpage) && PageAnon(hpage)) {
918 if (unlikely(split_huge_page(hpage))) {
919 /*
920 * FIXME: if splitting THP is failed, it is
921 * better to stop the following operation rather
922 * than causing panic by unmapping. System might
923 * survive if the page is freed later.
924 */
925 printk(KERN_INFO
926 "MCE %#lx: failed to split THP\n", pfn);
927
928 BUG_ON(!PageHWPoison(p));
929 return SWAP_FAIL;
930 }
931 /* THP is split, so ppage should be the real poisoned page. */
932 ppage = p;
933 }
934 }
935
936 /*
900 * First collect all the processes that have the page 937 * First collect all the processes that have the page
901 * mapped in dirty form. This has to be done before try_to_unmap, 938 * mapped in dirty form. This has to be done before try_to_unmap,
902 * because ttu takes the rmap data structures down. 939 * because ttu takes the rmap data structures down.
@@ -905,12 +942,18 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
905 * there's nothing that can be done. 942 * there's nothing that can be done.
906 */ 943 */
907 if (kill) 944 if (kill)
908 collect_procs(hpage, &tokill); 945 collect_procs(ppage, &tokill);
946
947 if (hpage != ppage)
948 lock_page_nosync(ppage);
909 949
910 ret = try_to_unmap(hpage, ttu); 950 ret = try_to_unmap(ppage, ttu);
911 if (ret != SWAP_SUCCESS) 951 if (ret != SWAP_SUCCESS)
912 printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", 952 printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n",
913 pfn, page_mapcount(hpage)); 953 pfn, page_mapcount(ppage));
954
955 if (hpage != ppage)
956 unlock_page(ppage);
914 957
915 /* 958 /*
916 * Now that the dirty bit has been propagated to the 959 * Now that the dirty bit has been propagated to the
@@ -921,7 +964,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
921 * use a more force-full uncatchable kill to prevent 964 * use a more force-full uncatchable kill to prevent
922 * any accesses to the poisoned memory. 965 * any accesses to the poisoned memory.
923 */ 966 */
924 kill_procs_ao(&tokill, !!PageDirty(hpage), trapno, 967 kill_procs_ao(&tokill, !!PageDirty(ppage), trapno,
925 ret != SWAP_SUCCESS, p, pfn); 968 ret != SWAP_SUCCESS, p, pfn);
926 969
927 return ret; 970 return ret;
@@ -1022,19 +1065,22 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
1022 * The check (unnecessarily) ignores LRU pages being isolated and 1065 * The check (unnecessarily) ignores LRU pages being isolated and
1023 * walked by the page reclaim code, however that's not a big loss. 1066 * walked by the page reclaim code, however that's not a big loss.
1024 */ 1067 */
1025 if (!PageLRU(p) && !PageHuge(p)) 1068 if (!PageHuge(p) && !PageTransCompound(p)) {
1026 shake_page(p, 0); 1069 if (!PageLRU(p))
1027 if (!PageLRU(p) && !PageHuge(p)) { 1070 shake_page(p, 0);
1028 /* 1071 if (!PageLRU(p)) {
1029 * shake_page could have turned it free. 1072 /*
1030 */ 1073 * shake_page could have turned it free.
1031 if (is_free_buddy_page(p)) { 1074 */
1032 action_result(pfn, "free buddy, 2nd try", DELAYED); 1075 if (is_free_buddy_page(p)) {
1033 return 0; 1076 action_result(pfn, "free buddy, 2nd try",
1077 DELAYED);
1078 return 0;
1079 }
1080 action_result(pfn, "non LRU", IGNORED);
1081 put_page(p);
1082 return -EBUSY;
1034 } 1083 }
1035 action_result(pfn, "non LRU", IGNORED);
1036 put_page(p);
1037 return -EBUSY;
1038 } 1084 }
1039 1085
1040 /* 1086 /*
@@ -1064,7 +1110,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
1064 * For error on the tail page, we should set PG_hwpoison 1110 * For error on the tail page, we should set PG_hwpoison
1065 * on the head page to show that the hugepage is hwpoisoned 1111 * on the head page to show that the hugepage is hwpoisoned
1066 */ 1112 */
1067 if (PageTail(p) && TestSetPageHWPoison(hpage)) { 1113 if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) {
1068 action_result(pfn, "hugepage already hardware poisoned", 1114 action_result(pfn, "hugepage already hardware poisoned",
1069 IGNORED); 1115 IGNORED);
1070 unlock_page(hpage); 1116 unlock_page(hpage);
@@ -1295,7 +1341,10 @@ static int soft_offline_huge_page(struct page *page, int flags)
1295 ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0, 1341 ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0,
1296 true); 1342 true);
1297 if (ret) { 1343 if (ret) {
1298 putback_lru_pages(&pagelist); 1344 struct page *page1, *page2;
1345 list_for_each_entry_safe(page1, page2, &pagelist, lru)
1346 put_page(page1);
1347
1299 pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", 1348 pr_debug("soft offline: %#lx: migration failed %d, type %lx\n",
1300 pfn, ret, page->flags); 1349 pfn, ret, page->flags);
1301 if (ret > 0) 1350 if (ret > 0)
@@ -1419,6 +1468,7 @@ int soft_offline_page(struct page *page, int flags)
1419 ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 1468 ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
1420 0, true); 1469 0, true);
1421 if (ret) { 1470 if (ret) {
1471 putback_lru_pages(&pagelist);
1422 pr_info("soft offline: %#lx: migration failed %d, type %lx\n", 1472 pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
1423 pfn, ret, page->flags); 1473 pfn, ret, page->flags);
1424 if (ret > 0) 1474 if (ret > 0)
diff --git a/mm/memory.c b/mm/memory.c
index 31250faff390..8e8c18324863 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2219,7 +2219,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2219 &ptl); 2219 &ptl);
2220 if (!pte_same(*page_table, orig_pte)) { 2220 if (!pte_same(*page_table, orig_pte)) {
2221 unlock_page(old_page); 2221 unlock_page(old_page);
2222 page_cache_release(old_page);
2223 goto unlock; 2222 goto unlock;
2224 } 2223 }
2225 page_cache_release(old_page); 2224 page_cache_release(old_page);
@@ -2289,7 +2288,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2289 &ptl); 2288 &ptl);
2290 if (!pte_same(*page_table, orig_pte)) { 2289 if (!pte_same(*page_table, orig_pte)) {
2291 unlock_page(old_page); 2290 unlock_page(old_page);
2292 page_cache_release(old_page);
2293 goto unlock; 2291 goto unlock;
2294 } 2292 }
2295 2293
@@ -2367,16 +2365,6 @@ gotten:
2367 } 2365 }
2368 __SetPageUptodate(new_page); 2366 __SetPageUptodate(new_page);
2369 2367
2370 /*
2371 * Don't let another task, with possibly unlocked vma,
2372 * keep the mlocked page.
2373 */
2374 if ((vma->vm_flags & VM_LOCKED) && old_page) {
2375 lock_page(old_page); /* for LRU manipulation */
2376 clear_page_mlock(old_page);
2377 unlock_page(old_page);
2378 }
2379
2380 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) 2368 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
2381 goto oom_free_new; 2369 goto oom_free_new;
2382 2370
@@ -2444,10 +2432,20 @@ gotten:
2444 2432
2445 if (new_page) 2433 if (new_page)
2446 page_cache_release(new_page); 2434 page_cache_release(new_page);
2447 if (old_page)
2448 page_cache_release(old_page);
2449unlock: 2435unlock:
2450 pte_unmap_unlock(page_table, ptl); 2436 pte_unmap_unlock(page_table, ptl);
2437 if (old_page) {
2438 /*
2439 * Don't let another task, with possibly unlocked vma,
2440 * keep the mlocked page.
2441 */
2442 if ((ret & VM_FAULT_WRITE) && (vma->vm_flags & VM_LOCKED)) {
2443 lock_page(old_page); /* LRU manipulation */
2444 munlock_vma_page(old_page);
2445 unlock_page(old_page);
2446 }
2447 page_cache_release(old_page);
2448 }
2451 return ret; 2449 return ret;
2452oom_free_new: 2450oom_free_new:
2453 page_cache_release(new_page); 2451 page_cache_release(new_page);
@@ -3053,12 +3051,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3053 goto out; 3051 goto out;
3054 } 3052 }
3055 charged = 1; 3053 charged = 1;
3056 /*
3057 * Don't let another task, with possibly unlocked vma,
3058 * keep the mlocked page.
3059 */
3060 if (vma->vm_flags & VM_LOCKED)
3061 clear_page_mlock(vmf.page);
3062 copy_user_highpage(page, vmf.page, address, vma); 3054 copy_user_highpage(page, vmf.page, address, vma);
3063 __SetPageUptodate(page); 3055 __SetPageUptodate(page);
3064 } else { 3056 } else {
diff --git a/mm/migrate.c b/mm/migrate.c
index 9f29a3b7aac2..766115253807 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -772,6 +772,7 @@ uncharge:
772unlock: 772unlock:
773 unlock_page(page); 773 unlock_page(page);
774 774
775move_newpage:
775 if (rc != -EAGAIN) { 776 if (rc != -EAGAIN) {
776 /* 777 /*
777 * A page that has been migrated has all references 778 * A page that has been migrated has all references
@@ -785,8 +786,6 @@ unlock:
785 putback_lru_page(page); 786 putback_lru_page(page);
786 } 787 }
787 788
788move_newpage:
789
790 /* 789 /*
791 * Move the new page to the LRU. If migration was not successful 790 * Move the new page to the LRU. If migration was not successful
792 * then this will free the page. 791 * then this will free the page.
@@ -981,10 +980,6 @@ int migrate_huge_pages(struct list_head *from,
981 } 980 }
982 rc = 0; 981 rc = 0;
983out: 982out:
984
985 list_for_each_entry_safe(page, page2, from, lru)
986 put_page(page);
987
988 if (rc) 983 if (rc)
989 return rc; 984 return rc;
990 985
diff --git a/mm/mlock.c b/mm/mlock.c
index 13e81ee8be9d..c3924c7f00be 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -178,6 +178,13 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma,
178 if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) 178 if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
179 gup_flags |= FOLL_WRITE; 179 gup_flags |= FOLL_WRITE;
180 180
181 /*
182 * We want mlock to succeed for regions that have any permissions
183 * other than PROT_NONE.
184 */
185 if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))
186 gup_flags |= FOLL_FORCE;
187
181 if (vma->vm_flags & VM_LOCKED) 188 if (vma->vm_flags & VM_LOCKED)
182 gup_flags |= FOLL_MLOCK; 189 gup_flags |= FOLL_MLOCK;
183 190
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 148c6e630df2..17497d0cd8b9 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1882,12 +1882,12 @@ static void shrink_zone(int priority, struct zone *zone,
1882 unsigned long nr[NR_LRU_LISTS]; 1882 unsigned long nr[NR_LRU_LISTS];
1883 unsigned long nr_to_scan; 1883 unsigned long nr_to_scan;
1884 enum lru_list l; 1884 enum lru_list l;
1885 unsigned long nr_reclaimed; 1885 unsigned long nr_reclaimed, nr_scanned;
1886 unsigned long nr_to_reclaim = sc->nr_to_reclaim; 1886 unsigned long nr_to_reclaim = sc->nr_to_reclaim;
1887 unsigned long nr_scanned = sc->nr_scanned;
1888 1887
1889restart: 1888restart:
1890 nr_reclaimed = 0; 1889 nr_reclaimed = 0;
1890 nr_scanned = sc->nr_scanned;
1891 get_scan_count(zone, sc, nr, priority); 1891 get_scan_count(zone, sc, nr, priority);
1892 1892
1893 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || 1893 while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] ||