diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/huge_memory.c | 44 | ||||
-rw-r--r-- | mm/memblock.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 67 | ||||
-rw-r--r-- | mm/memory-failure.c | 94 | ||||
-rw-r--r-- | mm/memory.c | 32 | ||||
-rw-r--r-- | mm/migrate.c | 7 | ||||
-rw-r--r-- | mm/mlock.c | 7 | ||||
-rw-r--r-- | mm/vmscan.c | 4 |
8 files changed, 169 insertions, 88 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e187454d82f6..3e29781ee762 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1162,7 +1162,12 @@ static void __split_huge_page_refcount(struct page *page) | |||
1162 | /* after clearing PageTail the gup refcount can be released */ | 1162 | /* after clearing PageTail the gup refcount can be released */ |
1163 | smp_mb(); | 1163 | smp_mb(); |
1164 | 1164 | ||
1165 | page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; | 1165 | /* |
1166 | * retain hwpoison flag of the poisoned tail page: | ||
1167 | * fix for the unsuitable process killed on Guest Machine(KVM) | ||
1168 | * by the memory-failure. | ||
1169 | */ | ||
1170 | page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON; | ||
1166 | page_tail->flags |= (page->flags & | 1171 | page_tail->flags |= (page->flags & |
1167 | ((1L << PG_referenced) | | 1172 | ((1L << PG_referenced) | |
1168 | (1L << PG_swapbacked) | | 1173 | (1L << PG_swapbacked) | |
@@ -1806,6 +1811,8 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1806 | /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ | 1811 | /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ |
1807 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) | 1812 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) |
1808 | goto out; | 1813 | goto out; |
1814 | if (is_vma_temporary_stack(vma)) | ||
1815 | goto out; | ||
1809 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); | 1816 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); |
1810 | 1817 | ||
1811 | pgd = pgd_offset(mm, address); | 1818 | pgd = pgd_offset(mm, address); |
@@ -1847,7 +1854,6 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1847 | set_pmd_at(mm, address, pmd, _pmd); | 1854 | set_pmd_at(mm, address, pmd, _pmd); |
1848 | spin_unlock(&mm->page_table_lock); | 1855 | spin_unlock(&mm->page_table_lock); |
1849 | anon_vma_unlock(vma->anon_vma); | 1856 | anon_vma_unlock(vma->anon_vma); |
1850 | mem_cgroup_uncharge_page(new_page); | ||
1851 | goto out; | 1857 | goto out; |
1852 | } | 1858 | } |
1853 | 1859 | ||
@@ -1893,6 +1899,7 @@ out_up_write: | |||
1893 | return; | 1899 | return; |
1894 | 1900 | ||
1895 | out: | 1901 | out: |
1902 | mem_cgroup_uncharge_page(new_page); | ||
1896 | #ifdef CONFIG_NUMA | 1903 | #ifdef CONFIG_NUMA |
1897 | put_page(new_page); | 1904 | put_page(new_page); |
1898 | #endif | 1905 | #endif |
@@ -2027,32 +2034,27 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, | |||
2027 | if ((!(vma->vm_flags & VM_HUGEPAGE) && | 2034 | if ((!(vma->vm_flags & VM_HUGEPAGE) && |
2028 | !khugepaged_always()) || | 2035 | !khugepaged_always()) || |
2029 | (vma->vm_flags & VM_NOHUGEPAGE)) { | 2036 | (vma->vm_flags & VM_NOHUGEPAGE)) { |
2037 | skip: | ||
2030 | progress++; | 2038 | progress++; |
2031 | continue; | 2039 | continue; |
2032 | } | 2040 | } |
2033 | |||
2034 | /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ | 2041 | /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ |
2035 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) { | 2042 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) |
2036 | khugepaged_scan.address = vma->vm_end; | 2043 | goto skip; |
2037 | progress++; | 2044 | if (is_vma_temporary_stack(vma)) |
2038 | continue; | 2045 | goto skip; |
2039 | } | 2046 | |
2040 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); | 2047 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); |
2041 | 2048 | ||
2042 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 2049 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
2043 | hend = vma->vm_end & HPAGE_PMD_MASK; | 2050 | hend = vma->vm_end & HPAGE_PMD_MASK; |
2044 | if (hstart >= hend) { | 2051 | if (hstart >= hend) |
2045 | progress++; | 2052 | goto skip; |
2046 | continue; | 2053 | if (khugepaged_scan.address > hend) |
2047 | } | 2054 | goto skip; |
2048 | if (khugepaged_scan.address < hstart) | 2055 | if (khugepaged_scan.address < hstart) |
2049 | khugepaged_scan.address = hstart; | 2056 | khugepaged_scan.address = hstart; |
2050 | if (khugepaged_scan.address > hend) { | 2057 | VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK); |
2051 | khugepaged_scan.address = hend + HPAGE_PMD_SIZE; | ||
2052 | progress++; | ||
2053 | continue; | ||
2054 | } | ||
2055 | BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK); | ||
2056 | 2058 | ||
2057 | while (khugepaged_scan.address < hend) { | 2059 | while (khugepaged_scan.address < hend) { |
2058 | int ret; | 2060 | int ret; |
@@ -2081,7 +2083,7 @@ breakouterloop: | |||
2081 | breakouterloop_mmap_sem: | 2083 | breakouterloop_mmap_sem: |
2082 | 2084 | ||
2083 | spin_lock(&khugepaged_mm_lock); | 2085 | spin_lock(&khugepaged_mm_lock); |
2084 | BUG_ON(khugepaged_scan.mm_slot != mm_slot); | 2086 | VM_BUG_ON(khugepaged_scan.mm_slot != mm_slot); |
2085 | /* | 2087 | /* |
2086 | * Release the current mm_slot if this mm is about to die, or | 2088 | * Release the current mm_slot if this mm is about to die, or |
2087 | * if we scanned all vmas of this mm. | 2089 | * if we scanned all vmas of this mm. |
@@ -2236,9 +2238,9 @@ static int khugepaged(void *none) | |||
2236 | 2238 | ||
2237 | for (;;) { | 2239 | for (;;) { |
2238 | mutex_unlock(&khugepaged_mutex); | 2240 | mutex_unlock(&khugepaged_mutex); |
2239 | BUG_ON(khugepaged_thread != current); | 2241 | VM_BUG_ON(khugepaged_thread != current); |
2240 | khugepaged_loop(); | 2242 | khugepaged_loop(); |
2241 | BUG_ON(khugepaged_thread != current); | 2243 | VM_BUG_ON(khugepaged_thread != current); |
2242 | 2244 | ||
2243 | mutex_lock(&khugepaged_mutex); | 2245 | mutex_lock(&khugepaged_mutex); |
2244 | if (!khugepaged_enabled()) | 2246 | if (!khugepaged_enabled()) |
diff --git a/mm/memblock.c b/mm/memblock.c index bdba245d8afd..4618fda975a0 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -137,8 +137,6 @@ static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size, | |||
137 | 137 | ||
138 | BUG_ON(0 == size); | 138 | BUG_ON(0 == size); |
139 | 139 | ||
140 | size = memblock_align_up(size, align); | ||
141 | |||
142 | /* Pump up max_addr */ | 140 | /* Pump up max_addr */ |
143 | if (end == MEMBLOCK_ALLOC_ACCESSIBLE) | 141 | if (end == MEMBLOCK_ALLOC_ACCESSIBLE) |
144 | end = memblock.current_limit; | 142 | end = memblock.current_limit; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3878cfe399dc..da53a252b259 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -612,8 +612,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | |||
612 | /* pagein of a big page is an event. So, ignore page size */ | 612 | /* pagein of a big page is an event. So, ignore page size */ |
613 | if (nr_pages > 0) | 613 | if (nr_pages > 0) |
614 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); | 614 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); |
615 | else | 615 | else { |
616 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); | 616 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); |
617 | nr_pages = -nr_pages; /* for event */ | ||
618 | } | ||
617 | 619 | ||
618 | __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages); | 620 | __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages); |
619 | 621 | ||
@@ -1111,6 +1113,23 @@ static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) | |||
1111 | return false; | 1113 | return false; |
1112 | } | 1114 | } |
1113 | 1115 | ||
1116 | /** | ||
1117 | * mem_cgroup_check_margin - check if the memory cgroup allows charging | ||
1118 | * @mem: memory cgroup to check | ||
1119 | * @bytes: the number of bytes the caller intends to charge | ||
1120 | * | ||
1121 | * Returns a boolean value on whether @mem can be charged @bytes or | ||
1122 | * whether this would exceed the limit. | ||
1123 | */ | ||
1124 | static bool mem_cgroup_check_margin(struct mem_cgroup *mem, unsigned long bytes) | ||
1125 | { | ||
1126 | if (!res_counter_check_margin(&mem->res, bytes)) | ||
1127 | return false; | ||
1128 | if (do_swap_account && !res_counter_check_margin(&mem->memsw, bytes)) | ||
1129 | return false; | ||
1130 | return true; | ||
1131 | } | ||
1132 | |||
1114 | static unsigned int get_swappiness(struct mem_cgroup *memcg) | 1133 | static unsigned int get_swappiness(struct mem_cgroup *memcg) |
1115 | { | 1134 | { |
1116 | struct cgroup *cgrp = memcg->css.cgroup; | 1135 | struct cgroup *cgrp = memcg->css.cgroup; |
@@ -1837,23 +1856,34 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
1837 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; | 1856 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
1838 | } else | 1857 | } else |
1839 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); | 1858 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); |
1840 | 1859 | /* | |
1841 | if (csize > PAGE_SIZE) /* change csize and retry */ | 1860 | * csize can be either a huge page (HPAGE_SIZE), a batch of |
1861 | * regular pages (CHARGE_SIZE), or a single regular page | ||
1862 | * (PAGE_SIZE). | ||
1863 | * | ||
1864 | * Never reclaim on behalf of optional batching, retry with a | ||
1865 | * single page instead. | ||
1866 | */ | ||
1867 | if (csize == CHARGE_SIZE) | ||
1842 | return CHARGE_RETRY; | 1868 | return CHARGE_RETRY; |
1843 | 1869 | ||
1844 | if (!(gfp_mask & __GFP_WAIT)) | 1870 | if (!(gfp_mask & __GFP_WAIT)) |
1845 | return CHARGE_WOULDBLOCK; | 1871 | return CHARGE_WOULDBLOCK; |
1846 | 1872 | ||
1847 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, | 1873 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, |
1848 | gfp_mask, flags); | 1874 | gfp_mask, flags); |
1875 | if (mem_cgroup_check_margin(mem_over_limit, csize)) | ||
1876 | return CHARGE_RETRY; | ||
1849 | /* | 1877 | /* |
1850 | * try_to_free_mem_cgroup_pages() might not give us a full | 1878 | * Even though the limit is exceeded at this point, reclaim |
1851 | * picture of reclaim. Some pages are reclaimed and might be | 1879 | * may have been able to free some pages. Retry the charge |
1852 | * moved to swap cache or just unmapped from the cgroup. | 1880 | * before killing the task. |
1853 | * Check the limit again to see if the reclaim reduced the | 1881 | * |
1854 | * current usage of the cgroup before giving up | 1882 | * Only for regular pages, though: huge pages are rather |
1883 | * unlikely to succeed so close to the limit, and we fall back | ||
1884 | * to regular pages anyway in case of failure. | ||
1855 | */ | 1885 | */ |
1856 | if (ret || mem_cgroup_check_under_limit(mem_over_limit)) | 1886 | if (csize == PAGE_SIZE && ret) |
1857 | return CHARGE_RETRY; | 1887 | return CHARGE_RETRY; |
1858 | 1888 | ||
1859 | /* | 1889 | /* |
@@ -2323,13 +2353,19 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2323 | gfp_t gfp_mask, enum charge_type ctype) | 2353 | gfp_t gfp_mask, enum charge_type ctype) |
2324 | { | 2354 | { |
2325 | struct mem_cgroup *mem = NULL; | 2355 | struct mem_cgroup *mem = NULL; |
2356 | int page_size = PAGE_SIZE; | ||
2326 | struct page_cgroup *pc; | 2357 | struct page_cgroup *pc; |
2358 | bool oom = true; | ||
2327 | int ret; | 2359 | int ret; |
2328 | int page_size = PAGE_SIZE; | ||
2329 | 2360 | ||
2330 | if (PageTransHuge(page)) { | 2361 | if (PageTransHuge(page)) { |
2331 | page_size <<= compound_order(page); | 2362 | page_size <<= compound_order(page); |
2332 | VM_BUG_ON(!PageTransHuge(page)); | 2363 | VM_BUG_ON(!PageTransHuge(page)); |
2364 | /* | ||
2365 | * Never OOM-kill a process for a huge page. The | ||
2366 | * fault handler will fall back to regular pages. | ||
2367 | */ | ||
2368 | oom = false; | ||
2333 | } | 2369 | } |
2334 | 2370 | ||
2335 | pc = lookup_page_cgroup(page); | 2371 | pc = lookup_page_cgroup(page); |
@@ -2338,7 +2374,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2338 | return 0; | 2374 | return 0; |
2339 | prefetchw(pc); | 2375 | prefetchw(pc); |
2340 | 2376 | ||
2341 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page_size); | 2377 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, oom, page_size); |
2342 | if (ret || !mem) | 2378 | if (ret || !mem) |
2343 | return ret; | 2379 | return ret; |
2344 | 2380 | ||
@@ -5024,9 +5060,9 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
5024 | static int __init enable_swap_account(char *s) | 5060 | static int __init enable_swap_account(char *s) |
5025 | { | 5061 | { |
5026 | /* consider enabled if no parameter or 1 is given */ | 5062 | /* consider enabled if no parameter or 1 is given */ |
5027 | if (!s || !strcmp(s, "1")) | 5063 | if (!(*s) || !strcmp(s, "=1")) |
5028 | really_do_swap_account = 1; | 5064 | really_do_swap_account = 1; |
5029 | else if (!strcmp(s, "0")) | 5065 | else if (!strcmp(s, "=0")) |
5030 | really_do_swap_account = 0; | 5066 | really_do_swap_account = 0; |
5031 | return 1; | 5067 | return 1; |
5032 | } | 5068 | } |
@@ -5034,7 +5070,8 @@ __setup("swapaccount", enable_swap_account); | |||
5034 | 5070 | ||
5035 | static int __init disable_swap_account(char *s) | 5071 | static int __init disable_swap_account(char *s) |
5036 | { | 5072 | { |
5037 | enable_swap_account("0"); | 5073 | printk_once("noswapaccount is deprecated and will be removed in 2.6.40. Use swapaccount=0 instead\n"); |
5074 | enable_swap_account("=0"); | ||
5038 | return 1; | 5075 | return 1; |
5039 | } | 5076 | } |
5040 | __setup("noswapaccount", disable_swap_account); | 5077 | __setup("noswapaccount", disable_swap_account); |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 548fbd70f026..0207c2f6f8bd 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -233,8 +233,8 @@ void shake_page(struct page *p, int access) | |||
233 | } | 233 | } |
234 | 234 | ||
235 | /* | 235 | /* |
236 | * Only all shrink_slab here (which would also | 236 | * Only call shrink_slab here (which would also shrink other caches) if |
237 | * shrink other caches) if access is not potentially fatal. | 237 | * access is not potentially fatal. |
238 | */ | 238 | */ |
239 | if (access) { | 239 | if (access) { |
240 | int nr; | 240 | int nr; |
@@ -386,8 +386,6 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, | |||
386 | struct task_struct *tsk; | 386 | struct task_struct *tsk; |
387 | struct anon_vma *av; | 387 | struct anon_vma *av; |
388 | 388 | ||
389 | if (!PageHuge(page) && unlikely(split_huge_page(page))) | ||
390 | return; | ||
391 | read_lock(&tasklist_lock); | 389 | read_lock(&tasklist_lock); |
392 | av = page_lock_anon_vma(page); | 390 | av = page_lock_anon_vma(page); |
393 | if (av == NULL) /* Not actually mapped anymore */ | 391 | if (av == NULL) /* Not actually mapped anymore */ |
@@ -856,6 +854,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
856 | int ret; | 854 | int ret; |
857 | int kill = 1; | 855 | int kill = 1; |
858 | struct page *hpage = compound_head(p); | 856 | struct page *hpage = compound_head(p); |
857 | struct page *ppage; | ||
859 | 858 | ||
860 | if (PageReserved(p) || PageSlab(p)) | 859 | if (PageReserved(p) || PageSlab(p)) |
861 | return SWAP_SUCCESS; | 860 | return SWAP_SUCCESS; |
@@ -897,6 +896,44 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
897 | } | 896 | } |
898 | 897 | ||
899 | /* | 898 | /* |
899 | * ppage: poisoned page | ||
900 | * if p is regular page(4k page) | ||
901 | * ppage == real poisoned page; | ||
902 | * else p is hugetlb or THP, ppage == head page. | ||
903 | */ | ||
904 | ppage = hpage; | ||
905 | |||
906 | if (PageTransHuge(hpage)) { | ||
907 | /* | ||
908 | * Verify that this isn't a hugetlbfs head page, the check for | ||
909 | * PageAnon is just for avoid tripping a split_huge_page | ||
910 | * internal debug check, as split_huge_page refuses to deal with | ||
911 | * anything that isn't an anon page. PageAnon can't go away fro | ||
912 | * under us because we hold a refcount on the hpage, without a | ||
913 | * refcount on the hpage. split_huge_page can't be safely called | ||
914 | * in the first place, having a refcount on the tail isn't | ||
915 | * enough * to be safe. | ||
916 | */ | ||
917 | if (!PageHuge(hpage) && PageAnon(hpage)) { | ||
918 | if (unlikely(split_huge_page(hpage))) { | ||
919 | /* | ||
920 | * FIXME: if splitting THP is failed, it is | ||
921 | * better to stop the following operation rather | ||
922 | * than causing panic by unmapping. System might | ||
923 | * survive if the page is freed later. | ||
924 | */ | ||
925 | printk(KERN_INFO | ||
926 | "MCE %#lx: failed to split THP\n", pfn); | ||
927 | |||
928 | BUG_ON(!PageHWPoison(p)); | ||
929 | return SWAP_FAIL; | ||
930 | } | ||
931 | /* THP is split, so ppage should be the real poisoned page. */ | ||
932 | ppage = p; | ||
933 | } | ||
934 | } | ||
935 | |||
936 | /* | ||
900 | * First collect all the processes that have the page | 937 | * First collect all the processes that have the page |
901 | * mapped in dirty form. This has to be done before try_to_unmap, | 938 | * mapped in dirty form. This has to be done before try_to_unmap, |
902 | * because ttu takes the rmap data structures down. | 939 | * because ttu takes the rmap data structures down. |
@@ -905,12 +942,18 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
905 | * there's nothing that can be done. | 942 | * there's nothing that can be done. |
906 | */ | 943 | */ |
907 | if (kill) | 944 | if (kill) |
908 | collect_procs(hpage, &tokill); | 945 | collect_procs(ppage, &tokill); |
946 | |||
947 | if (hpage != ppage) | ||
948 | lock_page_nosync(ppage); | ||
909 | 949 | ||
910 | ret = try_to_unmap(hpage, ttu); | 950 | ret = try_to_unmap(ppage, ttu); |
911 | if (ret != SWAP_SUCCESS) | 951 | if (ret != SWAP_SUCCESS) |
912 | printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", | 952 | printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", |
913 | pfn, page_mapcount(hpage)); | 953 | pfn, page_mapcount(ppage)); |
954 | |||
955 | if (hpage != ppage) | ||
956 | unlock_page(ppage); | ||
914 | 957 | ||
915 | /* | 958 | /* |
916 | * Now that the dirty bit has been propagated to the | 959 | * Now that the dirty bit has been propagated to the |
@@ -921,7 +964,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
921 | * use a more force-full uncatchable kill to prevent | 964 | * use a more force-full uncatchable kill to prevent |
922 | * any accesses to the poisoned memory. | 965 | * any accesses to the poisoned memory. |
923 | */ | 966 | */ |
924 | kill_procs_ao(&tokill, !!PageDirty(hpage), trapno, | 967 | kill_procs_ao(&tokill, !!PageDirty(ppage), trapno, |
925 | ret != SWAP_SUCCESS, p, pfn); | 968 | ret != SWAP_SUCCESS, p, pfn); |
926 | 969 | ||
927 | return ret; | 970 | return ret; |
@@ -1022,19 +1065,22 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
1022 | * The check (unnecessarily) ignores LRU pages being isolated and | 1065 | * The check (unnecessarily) ignores LRU pages being isolated and |
1023 | * walked by the page reclaim code, however that's not a big loss. | 1066 | * walked by the page reclaim code, however that's not a big loss. |
1024 | */ | 1067 | */ |
1025 | if (!PageLRU(p) && !PageHuge(p)) | 1068 | if (!PageHuge(p) && !PageTransCompound(p)) { |
1026 | shake_page(p, 0); | 1069 | if (!PageLRU(p)) |
1027 | if (!PageLRU(p) && !PageHuge(p)) { | 1070 | shake_page(p, 0); |
1028 | /* | 1071 | if (!PageLRU(p)) { |
1029 | * shake_page could have turned it free. | 1072 | /* |
1030 | */ | 1073 | * shake_page could have turned it free. |
1031 | if (is_free_buddy_page(p)) { | 1074 | */ |
1032 | action_result(pfn, "free buddy, 2nd try", DELAYED); | 1075 | if (is_free_buddy_page(p)) { |
1033 | return 0; | 1076 | action_result(pfn, "free buddy, 2nd try", |
1077 | DELAYED); | ||
1078 | return 0; | ||
1079 | } | ||
1080 | action_result(pfn, "non LRU", IGNORED); | ||
1081 | put_page(p); | ||
1082 | return -EBUSY; | ||
1034 | } | 1083 | } |
1035 | action_result(pfn, "non LRU", IGNORED); | ||
1036 | put_page(p); | ||
1037 | return -EBUSY; | ||
1038 | } | 1084 | } |
1039 | 1085 | ||
1040 | /* | 1086 | /* |
@@ -1064,7 +1110,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
1064 | * For error on the tail page, we should set PG_hwpoison | 1110 | * For error on the tail page, we should set PG_hwpoison |
1065 | * on the head page to show that the hugepage is hwpoisoned | 1111 | * on the head page to show that the hugepage is hwpoisoned |
1066 | */ | 1112 | */ |
1067 | if (PageTail(p) && TestSetPageHWPoison(hpage)) { | 1113 | if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) { |
1068 | action_result(pfn, "hugepage already hardware poisoned", | 1114 | action_result(pfn, "hugepage already hardware poisoned", |
1069 | IGNORED); | 1115 | IGNORED); |
1070 | unlock_page(hpage); | 1116 | unlock_page(hpage); |
@@ -1295,7 +1341,10 @@ static int soft_offline_huge_page(struct page *page, int flags) | |||
1295 | ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0, | 1341 | ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0, |
1296 | true); | 1342 | true); |
1297 | if (ret) { | 1343 | if (ret) { |
1298 | putback_lru_pages(&pagelist); | 1344 | struct page *page1, *page2; |
1345 | list_for_each_entry_safe(page1, page2, &pagelist, lru) | ||
1346 | put_page(page1); | ||
1347 | |||
1299 | pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", | 1348 | pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", |
1300 | pfn, ret, page->flags); | 1349 | pfn, ret, page->flags); |
1301 | if (ret > 0) | 1350 | if (ret > 0) |
@@ -1419,6 +1468,7 @@ int soft_offline_page(struct page *page, int flags) | |||
1419 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, | 1468 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, |
1420 | 0, true); | 1469 | 0, true); |
1421 | if (ret) { | 1470 | if (ret) { |
1471 | putback_lru_pages(&pagelist); | ||
1422 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", | 1472 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", |
1423 | pfn, ret, page->flags); | 1473 | pfn, ret, page->flags); |
1424 | if (ret > 0) | 1474 | if (ret > 0) |
diff --git a/mm/memory.c b/mm/memory.c index 31250faff390..8e8c18324863 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2219,7 +2219,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2219 | &ptl); | 2219 | &ptl); |
2220 | if (!pte_same(*page_table, orig_pte)) { | 2220 | if (!pte_same(*page_table, orig_pte)) { |
2221 | unlock_page(old_page); | 2221 | unlock_page(old_page); |
2222 | page_cache_release(old_page); | ||
2223 | goto unlock; | 2222 | goto unlock; |
2224 | } | 2223 | } |
2225 | page_cache_release(old_page); | 2224 | page_cache_release(old_page); |
@@ -2289,7 +2288,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2289 | &ptl); | 2288 | &ptl); |
2290 | if (!pte_same(*page_table, orig_pte)) { | 2289 | if (!pte_same(*page_table, orig_pte)) { |
2291 | unlock_page(old_page); | 2290 | unlock_page(old_page); |
2292 | page_cache_release(old_page); | ||
2293 | goto unlock; | 2291 | goto unlock; |
2294 | } | 2292 | } |
2295 | 2293 | ||
@@ -2367,16 +2365,6 @@ gotten: | |||
2367 | } | 2365 | } |
2368 | __SetPageUptodate(new_page); | 2366 | __SetPageUptodate(new_page); |
2369 | 2367 | ||
2370 | /* | ||
2371 | * Don't let another task, with possibly unlocked vma, | ||
2372 | * keep the mlocked page. | ||
2373 | */ | ||
2374 | if ((vma->vm_flags & VM_LOCKED) && old_page) { | ||
2375 | lock_page(old_page); /* for LRU manipulation */ | ||
2376 | clear_page_mlock(old_page); | ||
2377 | unlock_page(old_page); | ||
2378 | } | ||
2379 | |||
2380 | if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) | 2368 | if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) |
2381 | goto oom_free_new; | 2369 | goto oom_free_new; |
2382 | 2370 | ||
@@ -2444,10 +2432,20 @@ gotten: | |||
2444 | 2432 | ||
2445 | if (new_page) | 2433 | if (new_page) |
2446 | page_cache_release(new_page); | 2434 | page_cache_release(new_page); |
2447 | if (old_page) | ||
2448 | page_cache_release(old_page); | ||
2449 | unlock: | 2435 | unlock: |
2450 | pte_unmap_unlock(page_table, ptl); | 2436 | pte_unmap_unlock(page_table, ptl); |
2437 | if (old_page) { | ||
2438 | /* | ||
2439 | * Don't let another task, with possibly unlocked vma, | ||
2440 | * keep the mlocked page. | ||
2441 | */ | ||
2442 | if ((ret & VM_FAULT_WRITE) && (vma->vm_flags & VM_LOCKED)) { | ||
2443 | lock_page(old_page); /* LRU manipulation */ | ||
2444 | munlock_vma_page(old_page); | ||
2445 | unlock_page(old_page); | ||
2446 | } | ||
2447 | page_cache_release(old_page); | ||
2448 | } | ||
2451 | return ret; | 2449 | return ret; |
2452 | oom_free_new: | 2450 | oom_free_new: |
2453 | page_cache_release(new_page); | 2451 | page_cache_release(new_page); |
@@ -3053,12 +3051,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3053 | goto out; | 3051 | goto out; |
3054 | } | 3052 | } |
3055 | charged = 1; | 3053 | charged = 1; |
3056 | /* | ||
3057 | * Don't let another task, with possibly unlocked vma, | ||
3058 | * keep the mlocked page. | ||
3059 | */ | ||
3060 | if (vma->vm_flags & VM_LOCKED) | ||
3061 | clear_page_mlock(vmf.page); | ||
3062 | copy_user_highpage(page, vmf.page, address, vma); | 3054 | copy_user_highpage(page, vmf.page, address, vma); |
3063 | __SetPageUptodate(page); | 3055 | __SetPageUptodate(page); |
3064 | } else { | 3056 | } else { |
diff --git a/mm/migrate.c b/mm/migrate.c index 9f29a3b7aac2..766115253807 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -772,6 +772,7 @@ uncharge: | |||
772 | unlock: | 772 | unlock: |
773 | unlock_page(page); | 773 | unlock_page(page); |
774 | 774 | ||
775 | move_newpage: | ||
775 | if (rc != -EAGAIN) { | 776 | if (rc != -EAGAIN) { |
776 | /* | 777 | /* |
777 | * A page that has been migrated has all references | 778 | * A page that has been migrated has all references |
@@ -785,8 +786,6 @@ unlock: | |||
785 | putback_lru_page(page); | 786 | putback_lru_page(page); |
786 | } | 787 | } |
787 | 788 | ||
788 | move_newpage: | ||
789 | |||
790 | /* | 789 | /* |
791 | * Move the new page to the LRU. If migration was not successful | 790 | * Move the new page to the LRU. If migration was not successful |
792 | * then this will free the page. | 791 | * then this will free the page. |
@@ -981,10 +980,6 @@ int migrate_huge_pages(struct list_head *from, | |||
981 | } | 980 | } |
982 | rc = 0; | 981 | rc = 0; |
983 | out: | 982 | out: |
984 | |||
985 | list_for_each_entry_safe(page, page2, from, lru) | ||
986 | put_page(page); | ||
987 | |||
988 | if (rc) | 983 | if (rc) |
989 | return rc; | 984 | return rc; |
990 | 985 | ||
diff --git a/mm/mlock.c b/mm/mlock.c index 13e81ee8be9d..c3924c7f00be 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -178,6 +178,13 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
178 | if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) | 178 | if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) |
179 | gup_flags |= FOLL_WRITE; | 179 | gup_flags |= FOLL_WRITE; |
180 | 180 | ||
181 | /* | ||
182 | * We want mlock to succeed for regions that have any permissions | ||
183 | * other than PROT_NONE. | ||
184 | */ | ||
185 | if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) | ||
186 | gup_flags |= FOLL_FORCE; | ||
187 | |||
181 | if (vma->vm_flags & VM_LOCKED) | 188 | if (vma->vm_flags & VM_LOCKED) |
182 | gup_flags |= FOLL_MLOCK; | 189 | gup_flags |= FOLL_MLOCK; |
183 | 190 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index 148c6e630df2..17497d0cd8b9 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1882,12 +1882,12 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1882 | unsigned long nr[NR_LRU_LISTS]; | 1882 | unsigned long nr[NR_LRU_LISTS]; |
1883 | unsigned long nr_to_scan; | 1883 | unsigned long nr_to_scan; |
1884 | enum lru_list l; | 1884 | enum lru_list l; |
1885 | unsigned long nr_reclaimed; | 1885 | unsigned long nr_reclaimed, nr_scanned; |
1886 | unsigned long nr_to_reclaim = sc->nr_to_reclaim; | 1886 | unsigned long nr_to_reclaim = sc->nr_to_reclaim; |
1887 | unsigned long nr_scanned = sc->nr_scanned; | ||
1888 | 1887 | ||
1889 | restart: | 1888 | restart: |
1890 | nr_reclaimed = 0; | 1889 | nr_reclaimed = 0; |
1890 | nr_scanned = sc->nr_scanned; | ||
1891 | get_scan_count(zone, sc, nr, priority); | 1891 | get_scan_count(zone, sc, nr, priority); |
1892 | 1892 | ||
1893 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1893 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |