diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 2 | ||||
-rw-r--r-- | mm/huge_memory.c | 78 | ||||
-rw-r--r-- | mm/kmemleak-test.c | 6 | ||||
-rw-r--r-- | mm/kmemleak.c | 13 | ||||
-rw-r--r-- | mm/memblock.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 98 | ||||
-rw-r--r-- | mm/memory-failure.c | 94 | ||||
-rw-r--r-- | mm/memory.c | 34 | ||||
-rw-r--r-- | mm/mempolicy.c | 16 | ||||
-rw-r--r-- | mm/migrate.c | 15 | ||||
-rw-r--r-- | mm/mlock.c | 7 | ||||
-rw-r--r-- | mm/mremap.c | 4 | ||||
-rw-r--r-- | mm/page_alloc.c | 23 | ||||
-rw-r--r-- | mm/pgtable-generic.c | 1 | ||||
-rw-r--r-- | mm/rmap.c | 54 | ||||
-rw-r--r-- | mm/shmem.c | 4 | ||||
-rw-r--r-- | mm/swapfile.c | 2 | ||||
-rw-r--r-- | mm/truncate.c | 2 | ||||
-rw-r--r-- | mm/vmscan.c | 39 |
19 files changed, 323 insertions, 171 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index 3ad483bdf505..e9c0c61f2ddd 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -179,7 +179,7 @@ config SPLIT_PTLOCK_CPUS | |||
179 | config COMPACTION | 179 | config COMPACTION |
180 | bool "Allow for memory compaction" | 180 | bool "Allow for memory compaction" |
181 | select MIGRATION | 181 | select MIGRATION |
182 | depends on EXPERIMENTAL && HUGETLB_PAGE && MMU | 182 | depends on MMU |
183 | help | 183 | help |
184 | Allows the compaction of memory for the allocation of huge pages. | 184 | Allows the compaction of memory for the allocation of huge pages. |
185 | 185 | ||
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index e187454d82f6..113e35c47502 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -650,10 +650,10 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag) | |||
650 | 650 | ||
651 | static inline struct page *alloc_hugepage_vma(int defrag, | 651 | static inline struct page *alloc_hugepage_vma(int defrag, |
652 | struct vm_area_struct *vma, | 652 | struct vm_area_struct *vma, |
653 | unsigned long haddr) | 653 | unsigned long haddr, int nd) |
654 | { | 654 | { |
655 | return alloc_pages_vma(alloc_hugepage_gfpmask(defrag), | 655 | return alloc_pages_vma(alloc_hugepage_gfpmask(defrag), |
656 | HPAGE_PMD_ORDER, vma, haddr); | 656 | HPAGE_PMD_ORDER, vma, haddr, nd); |
657 | } | 657 | } |
658 | 658 | ||
659 | #ifndef CONFIG_NUMA | 659 | #ifndef CONFIG_NUMA |
@@ -678,7 +678,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
678 | if (unlikely(khugepaged_enter(vma))) | 678 | if (unlikely(khugepaged_enter(vma))) |
679 | return VM_FAULT_OOM; | 679 | return VM_FAULT_OOM; |
680 | page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), | 680 | page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), |
681 | vma, haddr); | 681 | vma, haddr, numa_node_id()); |
682 | if (unlikely(!page)) | 682 | if (unlikely(!page)) |
683 | goto out; | 683 | goto out; |
684 | if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { | 684 | if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { |
@@ -799,8 +799,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | |||
799 | } | 799 | } |
800 | 800 | ||
801 | for (i = 0; i < HPAGE_PMD_NR; i++) { | 801 | for (i = 0; i < HPAGE_PMD_NR; i++) { |
802 | pages[i] = alloc_page_vma(GFP_HIGHUSER_MOVABLE, | 802 | pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, |
803 | vma, address); | 803 | vma, address, page_to_nid(page)); |
804 | if (unlikely(!pages[i] || | 804 | if (unlikely(!pages[i] || |
805 | mem_cgroup_newpage_charge(pages[i], mm, | 805 | mem_cgroup_newpage_charge(pages[i], mm, |
806 | GFP_KERNEL))) { | 806 | GFP_KERNEL))) { |
@@ -902,7 +902,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
902 | if (transparent_hugepage_enabled(vma) && | 902 | if (transparent_hugepage_enabled(vma) && |
903 | !transparent_hugepage_debug_cow()) | 903 | !transparent_hugepage_debug_cow()) |
904 | new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), | 904 | new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), |
905 | vma, haddr); | 905 | vma, haddr, numa_node_id()); |
906 | else | 906 | else |
907 | new_page = NULL; | 907 | new_page = NULL; |
908 | 908 | ||
@@ -1162,7 +1162,12 @@ static void __split_huge_page_refcount(struct page *page) | |||
1162 | /* after clearing PageTail the gup refcount can be released */ | 1162 | /* after clearing PageTail the gup refcount can be released */ |
1163 | smp_mb(); | 1163 | smp_mb(); |
1164 | 1164 | ||
1165 | page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; | 1165 | /* |
1166 | * retain hwpoison flag of the poisoned tail page: | ||
1167 | * fix for the unsuitable process killed on Guest Machine(KVM) | ||
1168 | * by the memory-failure. | ||
1169 | */ | ||
1170 | page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP | __PG_HWPOISON; | ||
1166 | page_tail->flags |= (page->flags & | 1171 | page_tail->flags |= (page->flags & |
1167 | ((1L << PG_referenced) | | 1172 | ((1L << PG_referenced) | |
1168 | (1L << PG_swapbacked) | | 1173 | (1L << PG_swapbacked) | |
@@ -1740,7 +1745,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, | |||
1740 | static void collapse_huge_page(struct mm_struct *mm, | 1745 | static void collapse_huge_page(struct mm_struct *mm, |
1741 | unsigned long address, | 1746 | unsigned long address, |
1742 | struct page **hpage, | 1747 | struct page **hpage, |
1743 | struct vm_area_struct *vma) | 1748 | struct vm_area_struct *vma, |
1749 | int node) | ||
1744 | { | 1750 | { |
1745 | pgd_t *pgd; | 1751 | pgd_t *pgd; |
1746 | pud_t *pud; | 1752 | pud_t *pud; |
@@ -1756,6 +1762,10 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1756 | #ifndef CONFIG_NUMA | 1762 | #ifndef CONFIG_NUMA |
1757 | VM_BUG_ON(!*hpage); | 1763 | VM_BUG_ON(!*hpage); |
1758 | new_page = *hpage; | 1764 | new_page = *hpage; |
1765 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { | ||
1766 | up_read(&mm->mmap_sem); | ||
1767 | return; | ||
1768 | } | ||
1759 | #else | 1769 | #else |
1760 | VM_BUG_ON(*hpage); | 1770 | VM_BUG_ON(*hpage); |
1761 | /* | 1771 | /* |
@@ -1768,18 +1778,19 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1768 | * mmap_sem in read mode is good idea also to allow greater | 1778 | * mmap_sem in read mode is good idea also to allow greater |
1769 | * scalability. | 1779 | * scalability. |
1770 | */ | 1780 | */ |
1771 | new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address); | 1781 | new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address, |
1782 | node); | ||
1772 | if (unlikely(!new_page)) { | 1783 | if (unlikely(!new_page)) { |
1773 | up_read(&mm->mmap_sem); | 1784 | up_read(&mm->mmap_sem); |
1774 | *hpage = ERR_PTR(-ENOMEM); | 1785 | *hpage = ERR_PTR(-ENOMEM); |
1775 | return; | 1786 | return; |
1776 | } | 1787 | } |
1777 | #endif | ||
1778 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { | 1788 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { |
1779 | up_read(&mm->mmap_sem); | 1789 | up_read(&mm->mmap_sem); |
1780 | put_page(new_page); | 1790 | put_page(new_page); |
1781 | return; | 1791 | return; |
1782 | } | 1792 | } |
1793 | #endif | ||
1783 | 1794 | ||
1784 | /* after allocating the hugepage upgrade to mmap_sem write mode */ | 1795 | /* after allocating the hugepage upgrade to mmap_sem write mode */ |
1785 | up_read(&mm->mmap_sem); | 1796 | up_read(&mm->mmap_sem); |
@@ -1806,6 +1817,8 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1806 | /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ | 1817 | /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ |
1807 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) | 1818 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) |
1808 | goto out; | 1819 | goto out; |
1820 | if (is_vma_temporary_stack(vma)) | ||
1821 | goto out; | ||
1809 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); | 1822 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); |
1810 | 1823 | ||
1811 | pgd = pgd_offset(mm, address); | 1824 | pgd = pgd_offset(mm, address); |
@@ -1847,7 +1860,6 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1847 | set_pmd_at(mm, address, pmd, _pmd); | 1860 | set_pmd_at(mm, address, pmd, _pmd); |
1848 | spin_unlock(&mm->page_table_lock); | 1861 | spin_unlock(&mm->page_table_lock); |
1849 | anon_vma_unlock(vma->anon_vma); | 1862 | anon_vma_unlock(vma->anon_vma); |
1850 | mem_cgroup_uncharge_page(new_page); | ||
1851 | goto out; | 1863 | goto out; |
1852 | } | 1864 | } |
1853 | 1865 | ||
@@ -1893,6 +1905,7 @@ out_up_write: | |||
1893 | return; | 1905 | return; |
1894 | 1906 | ||
1895 | out: | 1907 | out: |
1908 | mem_cgroup_uncharge_page(new_page); | ||
1896 | #ifdef CONFIG_NUMA | 1909 | #ifdef CONFIG_NUMA |
1897 | put_page(new_page); | 1910 | put_page(new_page); |
1898 | #endif | 1911 | #endif |
@@ -1912,6 +1925,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
1912 | struct page *page; | 1925 | struct page *page; |
1913 | unsigned long _address; | 1926 | unsigned long _address; |
1914 | spinlock_t *ptl; | 1927 | spinlock_t *ptl; |
1928 | int node = -1; | ||
1915 | 1929 | ||
1916 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | 1930 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); |
1917 | 1931 | ||
@@ -1942,6 +1956,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
1942 | page = vm_normal_page(vma, _address, pteval); | 1956 | page = vm_normal_page(vma, _address, pteval); |
1943 | if (unlikely(!page)) | 1957 | if (unlikely(!page)) |
1944 | goto out_unmap; | 1958 | goto out_unmap; |
1959 | /* | ||
1960 | * Chose the node of the first page. This could | ||
1961 | * be more sophisticated and look at more pages, | ||
1962 | * but isn't for now. | ||
1963 | */ | ||
1964 | if (node == -1) | ||
1965 | node = page_to_nid(page); | ||
1945 | VM_BUG_ON(PageCompound(page)); | 1966 | VM_BUG_ON(PageCompound(page)); |
1946 | if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) | 1967 | if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) |
1947 | goto out_unmap; | 1968 | goto out_unmap; |
@@ -1958,7 +1979,7 @@ out_unmap: | |||
1958 | pte_unmap_unlock(pte, ptl); | 1979 | pte_unmap_unlock(pte, ptl); |
1959 | if (ret) | 1980 | if (ret) |
1960 | /* collapse_huge_page will return with the mmap_sem released */ | 1981 | /* collapse_huge_page will return with the mmap_sem released */ |
1961 | collapse_huge_page(mm, address, hpage, vma); | 1982 | collapse_huge_page(mm, address, hpage, vma, node); |
1962 | out: | 1983 | out: |
1963 | return ret; | 1984 | return ret; |
1964 | } | 1985 | } |
@@ -2027,32 +2048,27 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, | |||
2027 | if ((!(vma->vm_flags & VM_HUGEPAGE) && | 2048 | if ((!(vma->vm_flags & VM_HUGEPAGE) && |
2028 | !khugepaged_always()) || | 2049 | !khugepaged_always()) || |
2029 | (vma->vm_flags & VM_NOHUGEPAGE)) { | 2050 | (vma->vm_flags & VM_NOHUGEPAGE)) { |
2051 | skip: | ||
2030 | progress++; | 2052 | progress++; |
2031 | continue; | 2053 | continue; |
2032 | } | 2054 | } |
2033 | |||
2034 | /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ | 2055 | /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ |
2035 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) { | 2056 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) |
2036 | khugepaged_scan.address = vma->vm_end; | 2057 | goto skip; |
2037 | progress++; | 2058 | if (is_vma_temporary_stack(vma)) |
2038 | continue; | 2059 | goto skip; |
2039 | } | 2060 | |
2040 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); | 2061 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); |
2041 | 2062 | ||
2042 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 2063 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
2043 | hend = vma->vm_end & HPAGE_PMD_MASK; | 2064 | hend = vma->vm_end & HPAGE_PMD_MASK; |
2044 | if (hstart >= hend) { | 2065 | if (hstart >= hend) |
2045 | progress++; | 2066 | goto skip; |
2046 | continue; | 2067 | if (khugepaged_scan.address > hend) |
2047 | } | 2068 | goto skip; |
2048 | if (khugepaged_scan.address < hstart) | 2069 | if (khugepaged_scan.address < hstart) |
2049 | khugepaged_scan.address = hstart; | 2070 | khugepaged_scan.address = hstart; |
2050 | if (khugepaged_scan.address > hend) { | 2071 | VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK); |
2051 | khugepaged_scan.address = hend + HPAGE_PMD_SIZE; | ||
2052 | progress++; | ||
2053 | continue; | ||
2054 | } | ||
2055 | BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK); | ||
2056 | 2072 | ||
2057 | while (khugepaged_scan.address < hend) { | 2073 | while (khugepaged_scan.address < hend) { |
2058 | int ret; | 2074 | int ret; |
@@ -2081,7 +2097,7 @@ breakouterloop: | |||
2081 | breakouterloop_mmap_sem: | 2097 | breakouterloop_mmap_sem: |
2082 | 2098 | ||
2083 | spin_lock(&khugepaged_mm_lock); | 2099 | spin_lock(&khugepaged_mm_lock); |
2084 | BUG_ON(khugepaged_scan.mm_slot != mm_slot); | 2100 | VM_BUG_ON(khugepaged_scan.mm_slot != mm_slot); |
2085 | /* | 2101 | /* |
2086 | * Release the current mm_slot if this mm is about to die, or | 2102 | * Release the current mm_slot if this mm is about to die, or |
2087 | * if we scanned all vmas of this mm. | 2103 | * if we scanned all vmas of this mm. |
@@ -2236,9 +2252,9 @@ static int khugepaged(void *none) | |||
2236 | 2252 | ||
2237 | for (;;) { | 2253 | for (;;) { |
2238 | mutex_unlock(&khugepaged_mutex); | 2254 | mutex_unlock(&khugepaged_mutex); |
2239 | BUG_ON(khugepaged_thread != current); | 2255 | VM_BUG_ON(khugepaged_thread != current); |
2240 | khugepaged_loop(); | 2256 | khugepaged_loop(); |
2241 | BUG_ON(khugepaged_thread != current); | 2257 | VM_BUG_ON(khugepaged_thread != current); |
2242 | 2258 | ||
2243 | mutex_lock(&khugepaged_mutex); | 2259 | mutex_lock(&khugepaged_mutex); |
2244 | if (!khugepaged_enabled()) | 2260 | if (!khugepaged_enabled()) |
diff --git a/mm/kmemleak-test.c b/mm/kmemleak-test.c index 177a5169bbde..ff0d9779cec8 100644 --- a/mm/kmemleak-test.c +++ b/mm/kmemleak-test.c | |||
@@ -75,13 +75,11 @@ static int __init kmemleak_test_init(void) | |||
75 | * after the module is removed. | 75 | * after the module is removed. |
76 | */ | 76 | */ |
77 | for (i = 0; i < 10; i++) { | 77 | for (i = 0; i < 10; i++) { |
78 | elem = kmalloc(sizeof(*elem), GFP_KERNEL); | 78 | elem = kzalloc(sizeof(*elem), GFP_KERNEL); |
79 | pr_info("kmemleak: kmalloc(sizeof(*elem)) = %p\n", elem); | 79 | pr_info("kmemleak: kzalloc(sizeof(*elem)) = %p\n", elem); |
80 | if (!elem) | 80 | if (!elem) |
81 | return -ENOMEM; | 81 | return -ENOMEM; |
82 | memset(elem, 0, sizeof(*elem)); | ||
83 | INIT_LIST_HEAD(&elem->list); | 82 | INIT_LIST_HEAD(&elem->list); |
84 | |||
85 | list_add_tail(&elem->list, &test_list); | 83 | list_add_tail(&elem->list, &test_list); |
86 | } | 84 | } |
87 | 85 | ||
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index bd9bc214091b..84225f3b7190 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
@@ -113,7 +113,9 @@ | |||
113 | #define BYTES_PER_POINTER sizeof(void *) | 113 | #define BYTES_PER_POINTER sizeof(void *) |
114 | 114 | ||
115 | /* GFP bitmask for kmemleak internal allocations */ | 115 | /* GFP bitmask for kmemleak internal allocations */ |
116 | #define GFP_KMEMLEAK_MASK (GFP_KERNEL | GFP_ATOMIC) | 116 | #define gfp_kmemleak_mask(gfp) (((gfp) & (GFP_KERNEL | GFP_ATOMIC)) | \ |
117 | __GFP_NORETRY | __GFP_NOMEMALLOC | \ | ||
118 | __GFP_NOWARN) | ||
117 | 119 | ||
118 | /* scanning area inside a memory block */ | 120 | /* scanning area inside a memory block */ |
119 | struct kmemleak_scan_area { | 121 | struct kmemleak_scan_area { |
@@ -511,9 +513,10 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, | |||
511 | struct kmemleak_object *object; | 513 | struct kmemleak_object *object; |
512 | struct prio_tree_node *node; | 514 | struct prio_tree_node *node; |
513 | 515 | ||
514 | object = kmem_cache_alloc(object_cache, gfp & GFP_KMEMLEAK_MASK); | 516 | object = kmem_cache_alloc(object_cache, gfp_kmemleak_mask(gfp)); |
515 | if (!object) { | 517 | if (!object) { |
516 | kmemleak_stop("Cannot allocate a kmemleak_object structure\n"); | 518 | pr_warning("Cannot allocate a kmemleak_object structure\n"); |
519 | kmemleak_disable(); | ||
517 | return NULL; | 520 | return NULL; |
518 | } | 521 | } |
519 | 522 | ||
@@ -734,9 +737,9 @@ static void add_scan_area(unsigned long ptr, size_t size, gfp_t gfp) | |||
734 | return; | 737 | return; |
735 | } | 738 | } |
736 | 739 | ||
737 | area = kmem_cache_alloc(scan_area_cache, gfp & GFP_KMEMLEAK_MASK); | 740 | area = kmem_cache_alloc(scan_area_cache, gfp_kmemleak_mask(gfp)); |
738 | if (!area) { | 741 | if (!area) { |
739 | kmemleak_warn("Cannot allocate a scan area\n"); | 742 | pr_warning("Cannot allocate a scan area\n"); |
740 | goto out; | 743 | goto out; |
741 | } | 744 | } |
742 | 745 | ||
diff --git a/mm/memblock.c b/mm/memblock.c index bdba245d8afd..4618fda975a0 100644 --- a/mm/memblock.c +++ b/mm/memblock.c | |||
@@ -137,8 +137,6 @@ static phys_addr_t __init_memblock memblock_find_base(phys_addr_t size, | |||
137 | 137 | ||
138 | BUG_ON(0 == size); | 138 | BUG_ON(0 == size); |
139 | 139 | ||
140 | size = memblock_align_up(size, align); | ||
141 | |||
142 | /* Pump up max_addr */ | 140 | /* Pump up max_addr */ |
143 | if (end == MEMBLOCK_ALLOC_ACCESSIBLE) | 141 | if (end == MEMBLOCK_ALLOC_ACCESSIBLE) |
144 | end = memblock.current_limit; | 142 | end = memblock.current_limit; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index db76ef726293..da53a252b259 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -612,8 +612,10 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | |||
612 | /* pagein of a big page is an event. So, ignore page size */ | 612 | /* pagein of a big page is an event. So, ignore page size */ |
613 | if (nr_pages > 0) | 613 | if (nr_pages > 0) |
614 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); | 614 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); |
615 | else | 615 | else { |
616 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); | 616 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); |
617 | nr_pages = -nr_pages; /* for event */ | ||
618 | } | ||
617 | 619 | ||
618 | __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages); | 620 | __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages); |
619 | 621 | ||
@@ -1111,6 +1113,23 @@ static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) | |||
1111 | return false; | 1113 | return false; |
1112 | } | 1114 | } |
1113 | 1115 | ||
1116 | /** | ||
1117 | * mem_cgroup_check_margin - check if the memory cgroup allows charging | ||
1118 | * @mem: memory cgroup to check | ||
1119 | * @bytes: the number of bytes the caller intends to charge | ||
1120 | * | ||
1121 | * Returns a boolean value on whether @mem can be charged @bytes or | ||
1122 | * whether this would exceed the limit. | ||
1123 | */ | ||
1124 | static bool mem_cgroup_check_margin(struct mem_cgroup *mem, unsigned long bytes) | ||
1125 | { | ||
1126 | if (!res_counter_check_margin(&mem->res, bytes)) | ||
1127 | return false; | ||
1128 | if (do_swap_account && !res_counter_check_margin(&mem->memsw, bytes)) | ||
1129 | return false; | ||
1130 | return true; | ||
1131 | } | ||
1132 | |||
1114 | static unsigned int get_swappiness(struct mem_cgroup *memcg) | 1133 | static unsigned int get_swappiness(struct mem_cgroup *memcg) |
1115 | { | 1134 | { |
1116 | struct cgroup *cgrp = memcg->css.cgroup; | 1135 | struct cgroup *cgrp = memcg->css.cgroup; |
@@ -1832,27 +1851,39 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
1832 | if (likely(!ret)) | 1851 | if (likely(!ret)) |
1833 | return CHARGE_OK; | 1852 | return CHARGE_OK; |
1834 | 1853 | ||
1854 | res_counter_uncharge(&mem->res, csize); | ||
1835 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); | 1855 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw); |
1836 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; | 1856 | flags |= MEM_CGROUP_RECLAIM_NOSWAP; |
1837 | } else | 1857 | } else |
1838 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); | 1858 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); |
1839 | 1859 | /* | |
1840 | if (csize > PAGE_SIZE) /* change csize and retry */ | 1860 | * csize can be either a huge page (HPAGE_SIZE), a batch of |
1861 | * regular pages (CHARGE_SIZE), or a single regular page | ||
1862 | * (PAGE_SIZE). | ||
1863 | * | ||
1864 | * Never reclaim on behalf of optional batching, retry with a | ||
1865 | * single page instead. | ||
1866 | */ | ||
1867 | if (csize == CHARGE_SIZE) | ||
1841 | return CHARGE_RETRY; | 1868 | return CHARGE_RETRY; |
1842 | 1869 | ||
1843 | if (!(gfp_mask & __GFP_WAIT)) | 1870 | if (!(gfp_mask & __GFP_WAIT)) |
1844 | return CHARGE_WOULDBLOCK; | 1871 | return CHARGE_WOULDBLOCK; |
1845 | 1872 | ||
1846 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, | 1873 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, |
1847 | gfp_mask, flags); | 1874 | gfp_mask, flags); |
1875 | if (mem_cgroup_check_margin(mem_over_limit, csize)) | ||
1876 | return CHARGE_RETRY; | ||
1848 | /* | 1877 | /* |
1849 | * try_to_free_mem_cgroup_pages() might not give us a full | 1878 | * Even though the limit is exceeded at this point, reclaim |
1850 | * picture of reclaim. Some pages are reclaimed and might be | 1879 | * may have been able to free some pages. Retry the charge |
1851 | * moved to swap cache or just unmapped from the cgroup. | 1880 | * before killing the task. |
1852 | * Check the limit again to see if the reclaim reduced the | 1881 | * |
1853 | * current usage of the cgroup before giving up | 1882 | * Only for regular pages, though: huge pages are rather |
1883 | * unlikely to succeed so close to the limit, and we fall back | ||
1884 | * to regular pages anyway in case of failure. | ||
1854 | */ | 1885 | */ |
1855 | if (ret || mem_cgroup_check_under_limit(mem_over_limit)) | 1886 | if (csize == PAGE_SIZE && ret) |
1856 | return CHARGE_RETRY; | 1887 | return CHARGE_RETRY; |
1857 | 1888 | ||
1858 | /* | 1889 | /* |
@@ -2144,6 +2175,8 @@ void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail) | |||
2144 | struct page_cgroup *tail_pc = lookup_page_cgroup(tail); | 2175 | struct page_cgroup *tail_pc = lookup_page_cgroup(tail); |
2145 | unsigned long flags; | 2176 | unsigned long flags; |
2146 | 2177 | ||
2178 | if (mem_cgroup_disabled()) | ||
2179 | return; | ||
2147 | /* | 2180 | /* |
2148 | * We have no races with charge/uncharge but will have races with | 2181 | * We have no races with charge/uncharge but will have races with |
2149 | * page state accounting. | 2182 | * page state accounting. |
@@ -2233,7 +2266,12 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
2233 | { | 2266 | { |
2234 | int ret = -EINVAL; | 2267 | int ret = -EINVAL; |
2235 | unsigned long flags; | 2268 | unsigned long flags; |
2236 | 2269 | /* | |
2270 | * The page is isolated from LRU. So, collapse function | ||
2271 | * will not handle this page. But page splitting can happen. | ||
2272 | * Do this check under compound_page_lock(). The caller should | ||
2273 | * hold it. | ||
2274 | */ | ||
2237 | if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page)) | 2275 | if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page)) |
2238 | return -EBUSY; | 2276 | return -EBUSY; |
2239 | 2277 | ||
@@ -2265,7 +2303,7 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
2265 | struct cgroup *cg = child->css.cgroup; | 2303 | struct cgroup *cg = child->css.cgroup; |
2266 | struct cgroup *pcg = cg->parent; | 2304 | struct cgroup *pcg = cg->parent; |
2267 | struct mem_cgroup *parent; | 2305 | struct mem_cgroup *parent; |
2268 | int charge = PAGE_SIZE; | 2306 | int page_size = PAGE_SIZE; |
2269 | unsigned long flags; | 2307 | unsigned long flags; |
2270 | int ret; | 2308 | int ret; |
2271 | 2309 | ||
@@ -2278,23 +2316,26 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
2278 | goto out; | 2316 | goto out; |
2279 | if (isolate_lru_page(page)) | 2317 | if (isolate_lru_page(page)) |
2280 | goto put; | 2318 | goto put; |
2281 | /* The page is isolated from LRU and we have no race with splitting */ | 2319 | |
2282 | charge = PAGE_SIZE << compound_order(page); | 2320 | if (PageTransHuge(page)) |
2321 | page_size = HPAGE_SIZE; | ||
2283 | 2322 | ||
2284 | parent = mem_cgroup_from_cont(pcg); | 2323 | parent = mem_cgroup_from_cont(pcg); |
2285 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, &parent, false, charge); | 2324 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, |
2325 | &parent, false, page_size); | ||
2286 | if (ret || !parent) | 2326 | if (ret || !parent) |
2287 | goto put_back; | 2327 | goto put_back; |
2288 | 2328 | ||
2289 | if (charge > PAGE_SIZE) | 2329 | if (page_size > PAGE_SIZE) |
2290 | flags = compound_lock_irqsave(page); | 2330 | flags = compound_lock_irqsave(page); |
2291 | 2331 | ||
2292 | ret = mem_cgroup_move_account(pc, child, parent, true, charge); | 2332 | ret = mem_cgroup_move_account(pc, child, parent, true, page_size); |
2293 | if (ret) | 2333 | if (ret) |
2294 | mem_cgroup_cancel_charge(parent, charge); | 2334 | mem_cgroup_cancel_charge(parent, page_size); |
2295 | put_back: | 2335 | |
2296 | if (charge > PAGE_SIZE) | 2336 | if (page_size > PAGE_SIZE) |
2297 | compound_unlock_irqrestore(page, flags); | 2337 | compound_unlock_irqrestore(page, flags); |
2338 | put_back: | ||
2298 | putback_lru_page(page); | 2339 | putback_lru_page(page); |
2299 | put: | 2340 | put: |
2300 | put_page(page); | 2341 | put_page(page); |
@@ -2312,13 +2353,19 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2312 | gfp_t gfp_mask, enum charge_type ctype) | 2353 | gfp_t gfp_mask, enum charge_type ctype) |
2313 | { | 2354 | { |
2314 | struct mem_cgroup *mem = NULL; | 2355 | struct mem_cgroup *mem = NULL; |
2356 | int page_size = PAGE_SIZE; | ||
2315 | struct page_cgroup *pc; | 2357 | struct page_cgroup *pc; |
2358 | bool oom = true; | ||
2316 | int ret; | 2359 | int ret; |
2317 | int page_size = PAGE_SIZE; | ||
2318 | 2360 | ||
2319 | if (PageTransHuge(page)) { | 2361 | if (PageTransHuge(page)) { |
2320 | page_size <<= compound_order(page); | 2362 | page_size <<= compound_order(page); |
2321 | VM_BUG_ON(!PageTransHuge(page)); | 2363 | VM_BUG_ON(!PageTransHuge(page)); |
2364 | /* | ||
2365 | * Never OOM-kill a process for a huge page. The | ||
2366 | * fault handler will fall back to regular pages. | ||
2367 | */ | ||
2368 | oom = false; | ||
2322 | } | 2369 | } |
2323 | 2370 | ||
2324 | pc = lookup_page_cgroup(page); | 2371 | pc = lookup_page_cgroup(page); |
@@ -2327,7 +2374,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2327 | return 0; | 2374 | return 0; |
2328 | prefetchw(pc); | 2375 | prefetchw(pc); |
2329 | 2376 | ||
2330 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, true, page_size); | 2377 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, oom, page_size); |
2331 | if (ret || !mem) | 2378 | if (ret || !mem) |
2332 | return ret; | 2379 | return ret; |
2333 | 2380 | ||
@@ -5013,9 +5060,9 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
5013 | static int __init enable_swap_account(char *s) | 5060 | static int __init enable_swap_account(char *s) |
5014 | { | 5061 | { |
5015 | /* consider enabled if no parameter or 1 is given */ | 5062 | /* consider enabled if no parameter or 1 is given */ |
5016 | if (!s || !strcmp(s, "1")) | 5063 | if (!(*s) || !strcmp(s, "=1")) |
5017 | really_do_swap_account = 1; | 5064 | really_do_swap_account = 1; |
5018 | else if (!strcmp(s, "0")) | 5065 | else if (!strcmp(s, "=0")) |
5019 | really_do_swap_account = 0; | 5066 | really_do_swap_account = 0; |
5020 | return 1; | 5067 | return 1; |
5021 | } | 5068 | } |
@@ -5023,7 +5070,8 @@ __setup("swapaccount", enable_swap_account); | |||
5023 | 5070 | ||
5024 | static int __init disable_swap_account(char *s) | 5071 | static int __init disable_swap_account(char *s) |
5025 | { | 5072 | { |
5026 | enable_swap_account("0"); | 5073 | printk_once("noswapaccount is deprecated and will be removed in 2.6.40. Use swapaccount=0 instead\n"); |
5074 | enable_swap_account("=0"); | ||
5027 | return 1; | 5075 | return 1; |
5028 | } | 5076 | } |
5029 | __setup("noswapaccount", disable_swap_account); | 5077 | __setup("noswapaccount", disable_swap_account); |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 548fbd70f026..0207c2f6f8bd 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -233,8 +233,8 @@ void shake_page(struct page *p, int access) | |||
233 | } | 233 | } |
234 | 234 | ||
235 | /* | 235 | /* |
236 | * Only all shrink_slab here (which would also | 236 | * Only call shrink_slab here (which would also shrink other caches) if |
237 | * shrink other caches) if access is not potentially fatal. | 237 | * access is not potentially fatal. |
238 | */ | 238 | */ |
239 | if (access) { | 239 | if (access) { |
240 | int nr; | 240 | int nr; |
@@ -386,8 +386,6 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, | |||
386 | struct task_struct *tsk; | 386 | struct task_struct *tsk; |
387 | struct anon_vma *av; | 387 | struct anon_vma *av; |
388 | 388 | ||
389 | if (!PageHuge(page) && unlikely(split_huge_page(page))) | ||
390 | return; | ||
391 | read_lock(&tasklist_lock); | 389 | read_lock(&tasklist_lock); |
392 | av = page_lock_anon_vma(page); | 390 | av = page_lock_anon_vma(page); |
393 | if (av == NULL) /* Not actually mapped anymore */ | 391 | if (av == NULL) /* Not actually mapped anymore */ |
@@ -856,6 +854,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
856 | int ret; | 854 | int ret; |
857 | int kill = 1; | 855 | int kill = 1; |
858 | struct page *hpage = compound_head(p); | 856 | struct page *hpage = compound_head(p); |
857 | struct page *ppage; | ||
859 | 858 | ||
860 | if (PageReserved(p) || PageSlab(p)) | 859 | if (PageReserved(p) || PageSlab(p)) |
861 | return SWAP_SUCCESS; | 860 | return SWAP_SUCCESS; |
@@ -897,6 +896,44 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
897 | } | 896 | } |
898 | 897 | ||
899 | /* | 898 | /* |
899 | * ppage: poisoned page | ||
900 | * if p is regular page(4k page) | ||
901 | * ppage == real poisoned page; | ||
902 | * else p is hugetlb or THP, ppage == head page. | ||
903 | */ | ||
904 | ppage = hpage; | ||
905 | |||
906 | if (PageTransHuge(hpage)) { | ||
907 | /* | ||
908 | * Verify that this isn't a hugetlbfs head page, the check for | ||
909 | * PageAnon is just for avoid tripping a split_huge_page | ||
910 | * internal debug check, as split_huge_page refuses to deal with | ||
911 | * anything that isn't an anon page. PageAnon can't go away fro | ||
912 | * under us because we hold a refcount on the hpage, without a | ||
913 | * refcount on the hpage. split_huge_page can't be safely called | ||
914 | * in the first place, having a refcount on the tail isn't | ||
915 | * enough * to be safe. | ||
916 | */ | ||
917 | if (!PageHuge(hpage) && PageAnon(hpage)) { | ||
918 | if (unlikely(split_huge_page(hpage))) { | ||
919 | /* | ||
920 | * FIXME: if splitting THP is failed, it is | ||
921 | * better to stop the following operation rather | ||
922 | * than causing panic by unmapping. System might | ||
923 | * survive if the page is freed later. | ||
924 | */ | ||
925 | printk(KERN_INFO | ||
926 | "MCE %#lx: failed to split THP\n", pfn); | ||
927 | |||
928 | BUG_ON(!PageHWPoison(p)); | ||
929 | return SWAP_FAIL; | ||
930 | } | ||
931 | /* THP is split, so ppage should be the real poisoned page. */ | ||
932 | ppage = p; | ||
933 | } | ||
934 | } | ||
935 | |||
936 | /* | ||
900 | * First collect all the processes that have the page | 937 | * First collect all the processes that have the page |
901 | * mapped in dirty form. This has to be done before try_to_unmap, | 938 | * mapped in dirty form. This has to be done before try_to_unmap, |
902 | * because ttu takes the rmap data structures down. | 939 | * because ttu takes the rmap data structures down. |
@@ -905,12 +942,18 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
905 | * there's nothing that can be done. | 942 | * there's nothing that can be done. |
906 | */ | 943 | */ |
907 | if (kill) | 944 | if (kill) |
908 | collect_procs(hpage, &tokill); | 945 | collect_procs(ppage, &tokill); |
946 | |||
947 | if (hpage != ppage) | ||
948 | lock_page_nosync(ppage); | ||
909 | 949 | ||
910 | ret = try_to_unmap(hpage, ttu); | 950 | ret = try_to_unmap(ppage, ttu); |
911 | if (ret != SWAP_SUCCESS) | 951 | if (ret != SWAP_SUCCESS) |
912 | printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", | 952 | printk(KERN_ERR "MCE %#lx: failed to unmap page (mapcount=%d)\n", |
913 | pfn, page_mapcount(hpage)); | 953 | pfn, page_mapcount(ppage)); |
954 | |||
955 | if (hpage != ppage) | ||
956 | unlock_page(ppage); | ||
914 | 957 | ||
915 | /* | 958 | /* |
916 | * Now that the dirty bit has been propagated to the | 959 | * Now that the dirty bit has been propagated to the |
@@ -921,7 +964,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn, | |||
921 | * use a more force-full uncatchable kill to prevent | 964 | * use a more force-full uncatchable kill to prevent |
922 | * any accesses to the poisoned memory. | 965 | * any accesses to the poisoned memory. |
923 | */ | 966 | */ |
924 | kill_procs_ao(&tokill, !!PageDirty(hpage), trapno, | 967 | kill_procs_ao(&tokill, !!PageDirty(ppage), trapno, |
925 | ret != SWAP_SUCCESS, p, pfn); | 968 | ret != SWAP_SUCCESS, p, pfn); |
926 | 969 | ||
927 | return ret; | 970 | return ret; |
@@ -1022,19 +1065,22 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
1022 | * The check (unnecessarily) ignores LRU pages being isolated and | 1065 | * The check (unnecessarily) ignores LRU pages being isolated and |
1023 | * walked by the page reclaim code, however that's not a big loss. | 1066 | * walked by the page reclaim code, however that's not a big loss. |
1024 | */ | 1067 | */ |
1025 | if (!PageLRU(p) && !PageHuge(p)) | 1068 | if (!PageHuge(p) && !PageTransCompound(p)) { |
1026 | shake_page(p, 0); | 1069 | if (!PageLRU(p)) |
1027 | if (!PageLRU(p) && !PageHuge(p)) { | 1070 | shake_page(p, 0); |
1028 | /* | 1071 | if (!PageLRU(p)) { |
1029 | * shake_page could have turned it free. | 1072 | /* |
1030 | */ | 1073 | * shake_page could have turned it free. |
1031 | if (is_free_buddy_page(p)) { | 1074 | */ |
1032 | action_result(pfn, "free buddy, 2nd try", DELAYED); | 1075 | if (is_free_buddy_page(p)) { |
1033 | return 0; | 1076 | action_result(pfn, "free buddy, 2nd try", |
1077 | DELAYED); | ||
1078 | return 0; | ||
1079 | } | ||
1080 | action_result(pfn, "non LRU", IGNORED); | ||
1081 | put_page(p); | ||
1082 | return -EBUSY; | ||
1034 | } | 1083 | } |
1035 | action_result(pfn, "non LRU", IGNORED); | ||
1036 | put_page(p); | ||
1037 | return -EBUSY; | ||
1038 | } | 1084 | } |
1039 | 1085 | ||
1040 | /* | 1086 | /* |
@@ -1064,7 +1110,7 @@ int __memory_failure(unsigned long pfn, int trapno, int flags) | |||
1064 | * For error on the tail page, we should set PG_hwpoison | 1110 | * For error on the tail page, we should set PG_hwpoison |
1065 | * on the head page to show that the hugepage is hwpoisoned | 1111 | * on the head page to show that the hugepage is hwpoisoned |
1066 | */ | 1112 | */ |
1067 | if (PageTail(p) && TestSetPageHWPoison(hpage)) { | 1113 | if (PageHuge(p) && PageTail(p) && TestSetPageHWPoison(hpage)) { |
1068 | action_result(pfn, "hugepage already hardware poisoned", | 1114 | action_result(pfn, "hugepage already hardware poisoned", |
1069 | IGNORED); | 1115 | IGNORED); |
1070 | unlock_page(hpage); | 1116 | unlock_page(hpage); |
@@ -1295,7 +1341,10 @@ static int soft_offline_huge_page(struct page *page, int flags) | |||
1295 | ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0, | 1341 | ret = migrate_huge_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, 0, |
1296 | true); | 1342 | true); |
1297 | if (ret) { | 1343 | if (ret) { |
1298 | putback_lru_pages(&pagelist); | 1344 | struct page *page1, *page2; |
1345 | list_for_each_entry_safe(page1, page2, &pagelist, lru) | ||
1346 | put_page(page1); | ||
1347 | |||
1299 | pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", | 1348 | pr_debug("soft offline: %#lx: migration failed %d, type %lx\n", |
1300 | pfn, ret, page->flags); | 1349 | pfn, ret, page->flags); |
1301 | if (ret > 0) | 1350 | if (ret > 0) |
@@ -1419,6 +1468,7 @@ int soft_offline_page(struct page *page, int flags) | |||
1419 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, | 1468 | ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL, |
1420 | 0, true); | 1469 | 0, true); |
1421 | if (ret) { | 1470 | if (ret) { |
1471 | putback_lru_pages(&pagelist); | ||
1422 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", | 1472 | pr_info("soft offline: %#lx: migration failed %d, type %lx\n", |
1423 | pfn, ret, page->flags); | 1473 | pfn, ret, page->flags); |
1424 | if (ret > 0) | 1474 | if (ret > 0) |
diff --git a/mm/memory.c b/mm/memory.c index 31250faff390..5823698c2b71 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2219,7 +2219,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2219 | &ptl); | 2219 | &ptl); |
2220 | if (!pte_same(*page_table, orig_pte)) { | 2220 | if (!pte_same(*page_table, orig_pte)) { |
2221 | unlock_page(old_page); | 2221 | unlock_page(old_page); |
2222 | page_cache_release(old_page); | ||
2223 | goto unlock; | 2222 | goto unlock; |
2224 | } | 2223 | } |
2225 | page_cache_release(old_page); | 2224 | page_cache_release(old_page); |
@@ -2289,7 +2288,6 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2289 | &ptl); | 2288 | &ptl); |
2290 | if (!pte_same(*page_table, orig_pte)) { | 2289 | if (!pte_same(*page_table, orig_pte)) { |
2291 | unlock_page(old_page); | 2290 | unlock_page(old_page); |
2292 | page_cache_release(old_page); | ||
2293 | goto unlock; | 2291 | goto unlock; |
2294 | } | 2292 | } |
2295 | 2293 | ||
@@ -2367,16 +2365,6 @@ gotten: | |||
2367 | } | 2365 | } |
2368 | __SetPageUptodate(new_page); | 2366 | __SetPageUptodate(new_page); |
2369 | 2367 | ||
2370 | /* | ||
2371 | * Don't let another task, with possibly unlocked vma, | ||
2372 | * keep the mlocked page. | ||
2373 | */ | ||
2374 | if ((vma->vm_flags & VM_LOCKED) && old_page) { | ||
2375 | lock_page(old_page); /* for LRU manipulation */ | ||
2376 | clear_page_mlock(old_page); | ||
2377 | unlock_page(old_page); | ||
2378 | } | ||
2379 | |||
2380 | if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) | 2368 | if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) |
2381 | goto oom_free_new; | 2369 | goto oom_free_new; |
2382 | 2370 | ||
@@ -2444,10 +2432,20 @@ gotten: | |||
2444 | 2432 | ||
2445 | if (new_page) | 2433 | if (new_page) |
2446 | page_cache_release(new_page); | 2434 | page_cache_release(new_page); |
2447 | if (old_page) | ||
2448 | page_cache_release(old_page); | ||
2449 | unlock: | 2435 | unlock: |
2450 | pte_unmap_unlock(page_table, ptl); | 2436 | pte_unmap_unlock(page_table, ptl); |
2437 | if (old_page) { | ||
2438 | /* | ||
2439 | * Don't let another task, with possibly unlocked vma, | ||
2440 | * keep the mlocked page. | ||
2441 | */ | ||
2442 | if ((ret & VM_FAULT_WRITE) && (vma->vm_flags & VM_LOCKED)) { | ||
2443 | lock_page(old_page); /* LRU manipulation */ | ||
2444 | munlock_vma_page(old_page); | ||
2445 | unlock_page(old_page); | ||
2446 | } | ||
2447 | page_cache_release(old_page); | ||
2448 | } | ||
2451 | return ret; | 2449 | return ret; |
2452 | oom_free_new: | 2450 | oom_free_new: |
2453 | page_cache_release(new_page); | 2451 | page_cache_release(new_page); |
@@ -2650,6 +2648,7 @@ void unmap_mapping_range(struct address_space *mapping, | |||
2650 | details.last_index = ULONG_MAX; | 2648 | details.last_index = ULONG_MAX; |
2651 | details.i_mmap_lock = &mapping->i_mmap_lock; | 2649 | details.i_mmap_lock = &mapping->i_mmap_lock; |
2652 | 2650 | ||
2651 | mutex_lock(&mapping->unmap_mutex); | ||
2653 | spin_lock(&mapping->i_mmap_lock); | 2652 | spin_lock(&mapping->i_mmap_lock); |
2654 | 2653 | ||
2655 | /* Protect against endless unmapping loops */ | 2654 | /* Protect against endless unmapping loops */ |
@@ -2666,6 +2665,7 @@ void unmap_mapping_range(struct address_space *mapping, | |||
2666 | if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) | 2665 | if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) |
2667 | unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); | 2666 | unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); |
2668 | spin_unlock(&mapping->i_mmap_lock); | 2667 | spin_unlock(&mapping->i_mmap_lock); |
2668 | mutex_unlock(&mapping->unmap_mutex); | ||
2669 | } | 2669 | } |
2670 | EXPORT_SYMBOL(unmap_mapping_range); | 2670 | EXPORT_SYMBOL(unmap_mapping_range); |
2671 | 2671 | ||
@@ -3053,12 +3053,6 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3053 | goto out; | 3053 | goto out; |
3054 | } | 3054 | } |
3055 | charged = 1; | 3055 | charged = 1; |
3056 | /* | ||
3057 | * Don't let another task, with possibly unlocked vma, | ||
3058 | * keep the mlocked page. | ||
3059 | */ | ||
3060 | if (vma->vm_flags & VM_LOCKED) | ||
3061 | clear_page_mlock(vmf.page); | ||
3062 | copy_user_highpage(page, vmf.page, address, vma); | 3056 | copy_user_highpage(page, vmf.page, address, vma); |
3063 | __SetPageUptodate(page); | 3057 | __SetPageUptodate(page); |
3064 | } else { | 3058 | } else { |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 368fc9d23610..b53ec99f1428 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1524,10 +1524,9 @@ static nodemask_t *policy_nodemask(gfp_t gfp, struct mempolicy *policy) | |||
1524 | } | 1524 | } |
1525 | 1525 | ||
1526 | /* Return a zonelist indicated by gfp for node representing a mempolicy */ | 1526 | /* Return a zonelist indicated by gfp for node representing a mempolicy */ |
1527 | static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy) | 1527 | static struct zonelist *policy_zonelist(gfp_t gfp, struct mempolicy *policy, |
1528 | int nd) | ||
1528 | { | 1529 | { |
1529 | int nd = numa_node_id(); | ||
1530 | |||
1531 | switch (policy->mode) { | 1530 | switch (policy->mode) { |
1532 | case MPOL_PREFERRED: | 1531 | case MPOL_PREFERRED: |
1533 | if (!(policy->flags & MPOL_F_LOCAL)) | 1532 | if (!(policy->flags & MPOL_F_LOCAL)) |
@@ -1679,7 +1678,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, | |||
1679 | zl = node_zonelist(interleave_nid(*mpol, vma, addr, | 1678 | zl = node_zonelist(interleave_nid(*mpol, vma, addr, |
1680 | huge_page_shift(hstate_vma(vma))), gfp_flags); | 1679 | huge_page_shift(hstate_vma(vma))), gfp_flags); |
1681 | } else { | 1680 | } else { |
1682 | zl = policy_zonelist(gfp_flags, *mpol); | 1681 | zl = policy_zonelist(gfp_flags, *mpol, numa_node_id()); |
1683 | if ((*mpol)->mode == MPOL_BIND) | 1682 | if ((*mpol)->mode == MPOL_BIND) |
1684 | *nodemask = &(*mpol)->v.nodes; | 1683 | *nodemask = &(*mpol)->v.nodes; |
1685 | } | 1684 | } |
@@ -1820,7 +1819,7 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | |||
1820 | */ | 1819 | */ |
1821 | struct page * | 1820 | struct page * |
1822 | alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, | 1821 | alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, |
1823 | unsigned long addr) | 1822 | unsigned long addr, int node) |
1824 | { | 1823 | { |
1825 | struct mempolicy *pol = get_vma_policy(current, vma, addr); | 1824 | struct mempolicy *pol = get_vma_policy(current, vma, addr); |
1826 | struct zonelist *zl; | 1825 | struct zonelist *zl; |
@@ -1830,13 +1829,13 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma, | |||
1830 | if (unlikely(pol->mode == MPOL_INTERLEAVE)) { | 1829 | if (unlikely(pol->mode == MPOL_INTERLEAVE)) { |
1831 | unsigned nid; | 1830 | unsigned nid; |
1832 | 1831 | ||
1833 | nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); | 1832 | nid = interleave_nid(pol, vma, addr, PAGE_SHIFT + order); |
1834 | mpol_cond_put(pol); | 1833 | mpol_cond_put(pol); |
1835 | page = alloc_page_interleave(gfp, order, nid); | 1834 | page = alloc_page_interleave(gfp, order, nid); |
1836 | put_mems_allowed(); | 1835 | put_mems_allowed(); |
1837 | return page; | 1836 | return page; |
1838 | } | 1837 | } |
1839 | zl = policy_zonelist(gfp, pol); | 1838 | zl = policy_zonelist(gfp, pol, node); |
1840 | if (unlikely(mpol_needs_cond_ref(pol))) { | 1839 | if (unlikely(mpol_needs_cond_ref(pol))) { |
1841 | /* | 1840 | /* |
1842 | * slow path: ref counted shared policy | 1841 | * slow path: ref counted shared policy |
@@ -1892,7 +1891,8 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) | |||
1892 | page = alloc_page_interleave(gfp, order, interleave_nodes(pol)); | 1891 | page = alloc_page_interleave(gfp, order, interleave_nodes(pol)); |
1893 | else | 1892 | else |
1894 | page = __alloc_pages_nodemask(gfp, order, | 1893 | page = __alloc_pages_nodemask(gfp, order, |
1895 | policy_zonelist(gfp, pol), policy_nodemask(gfp, pol)); | 1894 | policy_zonelist(gfp, pol, numa_node_id()), |
1895 | policy_nodemask(gfp, pol)); | ||
1896 | put_mems_allowed(); | 1896 | put_mems_allowed(); |
1897 | return page; | 1897 | return page; |
1898 | } | 1898 | } |
diff --git a/mm/migrate.c b/mm/migrate.c index 46fe8cc13d67..352de555626c 100644 --- a/mm/migrate.c +++ b/mm/migrate.c | |||
@@ -772,6 +772,7 @@ uncharge: | |||
772 | unlock: | 772 | unlock: |
773 | unlock_page(page); | 773 | unlock_page(page); |
774 | 774 | ||
775 | move_newpage: | ||
775 | if (rc != -EAGAIN) { | 776 | if (rc != -EAGAIN) { |
776 | /* | 777 | /* |
777 | * A page that has been migrated has all references | 778 | * A page that has been migrated has all references |
@@ -785,8 +786,6 @@ unlock: | |||
785 | putback_lru_page(page); | 786 | putback_lru_page(page); |
786 | } | 787 | } |
787 | 788 | ||
788 | move_newpage: | ||
789 | |||
790 | /* | 789 | /* |
791 | * Move the new page to the LRU. If migration was not successful | 790 | * Move the new page to the LRU. If migration was not successful |
792 | * then this will free the page. | 791 | * then this will free the page. |
@@ -888,7 +887,7 @@ out: | |||
888 | * are movable anymore because to has become empty | 887 | * are movable anymore because to has become empty |
889 | * or no retryable pages exist anymore. | 888 | * or no retryable pages exist anymore. |
890 | * Caller should call putback_lru_pages to return pages to the LRU | 889 | * Caller should call putback_lru_pages to return pages to the LRU |
891 | * or free list. | 890 | * or free list only if ret != 0. |
892 | * | 891 | * |
893 | * Return: Number of pages not migrated or error code. | 892 | * Return: Number of pages not migrated or error code. |
894 | */ | 893 | */ |
@@ -981,10 +980,6 @@ int migrate_huge_pages(struct list_head *from, | |||
981 | } | 980 | } |
982 | rc = 0; | 981 | rc = 0; |
983 | out: | 982 | out: |
984 | |||
985 | list_for_each_entry_safe(page, page2, from, lru) | ||
986 | put_page(page); | ||
987 | |||
988 | if (rc) | 983 | if (rc) |
989 | return rc; | 984 | return rc; |
990 | 985 | ||
@@ -1292,14 +1287,14 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, | |||
1292 | return -EPERM; | 1287 | return -EPERM; |
1293 | 1288 | ||
1294 | /* Find the mm_struct */ | 1289 | /* Find the mm_struct */ |
1295 | read_lock(&tasklist_lock); | 1290 | rcu_read_lock(); |
1296 | task = pid ? find_task_by_vpid(pid) : current; | 1291 | task = pid ? find_task_by_vpid(pid) : current; |
1297 | if (!task) { | 1292 | if (!task) { |
1298 | read_unlock(&tasklist_lock); | 1293 | rcu_read_unlock(); |
1299 | return -ESRCH; | 1294 | return -ESRCH; |
1300 | } | 1295 | } |
1301 | mm = get_task_mm(task); | 1296 | mm = get_task_mm(task); |
1302 | read_unlock(&tasklist_lock); | 1297 | rcu_read_unlock(); |
1303 | 1298 | ||
1304 | if (!mm) | 1299 | if (!mm) |
1305 | return -EINVAL; | 1300 | return -EINVAL; |
diff --git a/mm/mlock.c b/mm/mlock.c index 13e81ee8be9d..c3924c7f00be 100644 --- a/mm/mlock.c +++ b/mm/mlock.c | |||
@@ -178,6 +178,13 @@ static long __mlock_vma_pages_range(struct vm_area_struct *vma, | |||
178 | if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) | 178 | if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) |
179 | gup_flags |= FOLL_WRITE; | 179 | gup_flags |= FOLL_WRITE; |
180 | 180 | ||
181 | /* | ||
182 | * We want mlock to succeed for regions that have any permissions | ||
183 | * other than PROT_NONE. | ||
184 | */ | ||
185 | if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) | ||
186 | gup_flags |= FOLL_FORCE; | ||
187 | |||
181 | if (vma->vm_flags & VM_LOCKED) | 188 | if (vma->vm_flags & VM_LOCKED) |
182 | gup_flags |= FOLL_MLOCK; | 189 | gup_flags |= FOLL_MLOCK; |
183 | 190 | ||
diff --git a/mm/mremap.c b/mm/mremap.c index 9925b6391b80..1de98d492ddc 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -94,9 +94,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, | |||
94 | */ | 94 | */ |
95 | mapping = vma->vm_file->f_mapping; | 95 | mapping = vma->vm_file->f_mapping; |
96 | spin_lock(&mapping->i_mmap_lock); | 96 | spin_lock(&mapping->i_mmap_lock); |
97 | if (new_vma->vm_truncate_count && | 97 | new_vma->vm_truncate_count = 0; |
98 | new_vma->vm_truncate_count != vma->vm_truncate_count) | ||
99 | new_vma->vm_truncate_count = 0; | ||
100 | } | 98 | } |
101 | 99 | ||
102 | /* | 100 | /* |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 90c1439549fd..cdef1d4b4e47 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -1088,8 +1088,10 @@ static void drain_pages(unsigned int cpu) | |||
1088 | pset = per_cpu_ptr(zone->pageset, cpu); | 1088 | pset = per_cpu_ptr(zone->pageset, cpu); |
1089 | 1089 | ||
1090 | pcp = &pset->pcp; | 1090 | pcp = &pset->pcp; |
1091 | free_pcppages_bulk(zone, pcp->count, pcp); | 1091 | if (pcp->count) { |
1092 | pcp->count = 0; | 1092 | free_pcppages_bulk(zone, pcp->count, pcp); |
1093 | pcp->count = 0; | ||
1094 | } | ||
1093 | local_irq_restore(flags); | 1095 | local_irq_restore(flags); |
1094 | } | 1096 | } |
1095 | } | 1097 | } |
@@ -2034,6 +2036,14 @@ restart: | |||
2034 | */ | 2036 | */ |
2035 | alloc_flags = gfp_to_alloc_flags(gfp_mask); | 2037 | alloc_flags = gfp_to_alloc_flags(gfp_mask); |
2036 | 2038 | ||
2039 | /* | ||
2040 | * Find the true preferred zone if the allocation is unconstrained by | ||
2041 | * cpusets. | ||
2042 | */ | ||
2043 | if (!(alloc_flags & ALLOC_CPUSET) && !nodemask) | ||
2044 | first_zones_zonelist(zonelist, high_zoneidx, NULL, | ||
2045 | &preferred_zone); | ||
2046 | |||
2037 | /* This is the last chance, in general, before the goto nopage. */ | 2047 | /* This is the last chance, in general, before the goto nopage. */ |
2038 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, | 2048 | page = get_page_from_freelist(gfp_mask, nodemask, order, zonelist, |
2039 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, | 2049 | high_zoneidx, alloc_flags & ~ALLOC_NO_WATERMARKS, |
@@ -2192,7 +2202,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, | |||
2192 | 2202 | ||
2193 | get_mems_allowed(); | 2203 | get_mems_allowed(); |
2194 | /* The preferred zone is used for statistics later */ | 2204 | /* The preferred zone is used for statistics later */ |
2195 | first_zones_zonelist(zonelist, high_zoneidx, nodemask, &preferred_zone); | 2205 | first_zones_zonelist(zonelist, high_zoneidx, |
2206 | nodemask ? : &cpuset_current_mems_allowed, | ||
2207 | &preferred_zone); | ||
2196 | if (!preferred_zone) { | 2208 | if (!preferred_zone) { |
2197 | put_mems_allowed(); | 2209 | put_mems_allowed(); |
2198 | return NULL; | 2210 | return NULL; |
@@ -5364,10 +5376,9 @@ __count_immobile_pages(struct zone *zone, struct page *page, int count) | |||
5364 | for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) { | 5376 | for (found = 0, iter = 0; iter < pageblock_nr_pages; iter++) { |
5365 | unsigned long check = pfn + iter; | 5377 | unsigned long check = pfn + iter; |
5366 | 5378 | ||
5367 | if (!pfn_valid_within(check)) { | 5379 | if (!pfn_valid_within(check)) |
5368 | iter++; | ||
5369 | continue; | 5380 | continue; |
5370 | } | 5381 | |
5371 | page = pfn_to_page(check); | 5382 | page = pfn_to_page(check); |
5372 | if (!page_count(page)) { | 5383 | if (!page_count(page)) { |
5373 | if (PageBuddy(page)) | 5384 | if (PageBuddy(page)) |
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 0369f5b3ba1b..eb663fb533e0 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c | |||
@@ -6,6 +6,7 @@ | |||
6 | * Copyright (C) 2010 Linus Torvalds | 6 | * Copyright (C) 2010 Linus Torvalds |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/pagemap.h> | ||
9 | #include <asm/tlb.h> | 10 | #include <asm/tlb.h> |
10 | #include <asm-generic/pgtable.h> | 11 | #include <asm-generic/pgtable.h> |
11 | 12 | ||
@@ -497,41 +497,51 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, | |||
497 | struct mm_struct *mm = vma->vm_mm; | 497 | struct mm_struct *mm = vma->vm_mm; |
498 | int referenced = 0; | 498 | int referenced = 0; |
499 | 499 | ||
500 | /* | ||
501 | * Don't want to elevate referenced for mlocked page that gets this far, | ||
502 | * in order that it progresses to try_to_unmap and is moved to the | ||
503 | * unevictable list. | ||
504 | */ | ||
505 | if (vma->vm_flags & VM_LOCKED) { | ||
506 | *mapcount = 0; /* break early from loop */ | ||
507 | *vm_flags |= VM_LOCKED; | ||
508 | goto out; | ||
509 | } | ||
510 | |||
511 | /* Pretend the page is referenced if the task has the | ||
512 | swap token and is in the middle of a page fault. */ | ||
513 | if (mm != current->mm && has_swap_token(mm) && | ||
514 | rwsem_is_locked(&mm->mmap_sem)) | ||
515 | referenced++; | ||
516 | |||
517 | if (unlikely(PageTransHuge(page))) { | 500 | if (unlikely(PageTransHuge(page))) { |
518 | pmd_t *pmd; | 501 | pmd_t *pmd; |
519 | 502 | ||
520 | spin_lock(&mm->page_table_lock); | 503 | spin_lock(&mm->page_table_lock); |
504 | /* | ||
505 | * rmap might return false positives; we must filter | ||
506 | * these out using page_check_address_pmd(). | ||
507 | */ | ||
521 | pmd = page_check_address_pmd(page, mm, address, | 508 | pmd = page_check_address_pmd(page, mm, address, |
522 | PAGE_CHECK_ADDRESS_PMD_FLAG); | 509 | PAGE_CHECK_ADDRESS_PMD_FLAG); |
523 | if (pmd && !pmd_trans_splitting(*pmd) && | 510 | if (!pmd) { |
524 | pmdp_clear_flush_young_notify(vma, address, pmd)) | 511 | spin_unlock(&mm->page_table_lock); |
512 | goto out; | ||
513 | } | ||
514 | |||
515 | if (vma->vm_flags & VM_LOCKED) { | ||
516 | spin_unlock(&mm->page_table_lock); | ||
517 | *mapcount = 0; /* break early from loop */ | ||
518 | *vm_flags |= VM_LOCKED; | ||
519 | goto out; | ||
520 | } | ||
521 | |||
522 | /* go ahead even if the pmd is pmd_trans_splitting() */ | ||
523 | if (pmdp_clear_flush_young_notify(vma, address, pmd)) | ||
525 | referenced++; | 524 | referenced++; |
526 | spin_unlock(&mm->page_table_lock); | 525 | spin_unlock(&mm->page_table_lock); |
527 | } else { | 526 | } else { |
528 | pte_t *pte; | 527 | pte_t *pte; |
529 | spinlock_t *ptl; | 528 | spinlock_t *ptl; |
530 | 529 | ||
530 | /* | ||
531 | * rmap might return false positives; we must filter | ||
532 | * these out using page_check_address(). | ||
533 | */ | ||
531 | pte = page_check_address(page, mm, address, &ptl, 0); | 534 | pte = page_check_address(page, mm, address, &ptl, 0); |
532 | if (!pte) | 535 | if (!pte) |
533 | goto out; | 536 | goto out; |
534 | 537 | ||
538 | if (vma->vm_flags & VM_LOCKED) { | ||
539 | pte_unmap_unlock(pte, ptl); | ||
540 | *mapcount = 0; /* break early from loop */ | ||
541 | *vm_flags |= VM_LOCKED; | ||
542 | goto out; | ||
543 | } | ||
544 | |||
535 | if (ptep_clear_flush_young_notify(vma, address, pte)) { | 545 | if (ptep_clear_flush_young_notify(vma, address, pte)) { |
536 | /* | 546 | /* |
537 | * Don't treat a reference through a sequentially read | 547 | * Don't treat a reference through a sequentially read |
@@ -546,6 +556,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma, | |||
546 | pte_unmap_unlock(pte, ptl); | 556 | pte_unmap_unlock(pte, ptl); |
547 | } | 557 | } |
548 | 558 | ||
559 | /* Pretend the page is referenced if the task has the | ||
560 | swap token and is in the middle of a page fault. */ | ||
561 | if (mm != current->mm && has_swap_token(mm) && | ||
562 | rwsem_is_locked(&mm->mmap_sem)) | ||
563 | referenced++; | ||
564 | |||
549 | (*mapcount)--; | 565 | (*mapcount)--; |
550 | 566 | ||
551 | if (referenced) | 567 | if (referenced) |
diff --git a/mm/shmem.c b/mm/shmem.c index 5ee67c990602..3437b65d6d6e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c | |||
@@ -2144,8 +2144,10 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, | |||
2144 | { | 2144 | { |
2145 | struct inode *inode = dentry->d_inode; | 2145 | struct inode *inode = dentry->d_inode; |
2146 | 2146 | ||
2147 | if (*len < 3) | 2147 | if (*len < 3) { |
2148 | *len = 3; | ||
2148 | return 255; | 2149 | return 255; |
2150 | } | ||
2149 | 2151 | ||
2150 | if (inode_unhashed(inode)) { | 2152 | if (inode_unhashed(inode)) { |
2151 | /* Unfortunately insert_inode_hash is not idempotent, | 2153 | /* Unfortunately insert_inode_hash is not idempotent, |
diff --git a/mm/swapfile.c b/mm/swapfile.c index 07a458d72fa8..0341c5700e34 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c | |||
@@ -1940,7 +1940,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) | |||
1940 | 1940 | ||
1941 | error = -EINVAL; | 1941 | error = -EINVAL; |
1942 | if (S_ISBLK(inode->i_mode)) { | 1942 | if (S_ISBLK(inode->i_mode)) { |
1943 | bdev = I_BDEV(inode); | 1943 | bdev = bdgrab(I_BDEV(inode)); |
1944 | error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, | 1944 | error = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, |
1945 | sys_swapon); | 1945 | sys_swapon); |
1946 | if (error < 0) { | 1946 | if (error < 0) { |
diff --git a/mm/truncate.c b/mm/truncate.c index 49feb46e77b8..d64296be00d3 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -225,6 +225,7 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
225 | next = start; | 225 | next = start; |
226 | while (next <= end && | 226 | while (next <= end && |
227 | pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { | 227 | pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { |
228 | mem_cgroup_uncharge_start(); | ||
228 | for (i = 0; i < pagevec_count(&pvec); i++) { | 229 | for (i = 0; i < pagevec_count(&pvec); i++) { |
229 | struct page *page = pvec.pages[i]; | 230 | struct page *page = pvec.pages[i]; |
230 | pgoff_t page_index = page->index; | 231 | pgoff_t page_index = page->index; |
@@ -247,6 +248,7 @@ void truncate_inode_pages_range(struct address_space *mapping, | |||
247 | unlock_page(page); | 248 | unlock_page(page); |
248 | } | 249 | } |
249 | pagevec_release(&pvec); | 250 | pagevec_release(&pvec); |
251 | mem_cgroup_uncharge_end(); | ||
250 | cond_resched(); | 252 | cond_resched(); |
251 | } | 253 | } |
252 | 254 | ||
diff --git a/mm/vmscan.c b/mm/vmscan.c index f5d90dedebba..6771ea70bfe7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -1841,16 +1841,28 @@ static inline bool should_continue_reclaim(struct zone *zone, | |||
1841 | if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION)) | 1841 | if (!(sc->reclaim_mode & RECLAIM_MODE_COMPACTION)) |
1842 | return false; | 1842 | return false; |
1843 | 1843 | ||
1844 | /* | 1844 | /* Consider stopping depending on scan and reclaim activity */ |
1845 | * If we failed to reclaim and have scanned the full list, stop. | 1845 | if (sc->gfp_mask & __GFP_REPEAT) { |
1846 | * NOTE: Checking just nr_reclaimed would exit reclaim/compaction far | 1846 | /* |
1847 | * faster but obviously would be less likely to succeed | 1847 | * For __GFP_REPEAT allocations, stop reclaiming if the |
1848 | * allocation. If this is desirable, use GFP_REPEAT to decide | 1848 | * full LRU list has been scanned and we are still failing |
1849 | * if both reclaimed and scanned should be checked or just | 1849 | * to reclaim pages. This full LRU scan is potentially |
1850 | * reclaimed | 1850 | * expensive but a __GFP_REPEAT caller really wants to succeed |
1851 | */ | 1851 | */ |
1852 | if (!nr_reclaimed && !nr_scanned) | 1852 | if (!nr_reclaimed && !nr_scanned) |
1853 | return false; | 1853 | return false; |
1854 | } else { | ||
1855 | /* | ||
1856 | * For non-__GFP_REPEAT allocations which can presumably | ||
1857 | * fail without consequence, stop if we failed to reclaim | ||
1858 | * any pages from the last SWAP_CLUSTER_MAX number of | ||
1859 | * pages that were scanned. This will return to the | ||
1860 | * caller faster at the risk reclaim/compaction and | ||
1861 | * the resulting allocation attempt fails | ||
1862 | */ | ||
1863 | if (!nr_reclaimed) | ||
1864 | return false; | ||
1865 | } | ||
1854 | 1866 | ||
1855 | /* | 1867 | /* |
1856 | * If we have not reclaimed enough pages for compaction and the | 1868 | * If we have not reclaimed enough pages for compaction and the |
@@ -1882,12 +1894,12 @@ static void shrink_zone(int priority, struct zone *zone, | |||
1882 | unsigned long nr[NR_LRU_LISTS]; | 1894 | unsigned long nr[NR_LRU_LISTS]; |
1883 | unsigned long nr_to_scan; | 1895 | unsigned long nr_to_scan; |
1884 | enum lru_list l; | 1896 | enum lru_list l; |
1885 | unsigned long nr_reclaimed; | 1897 | unsigned long nr_reclaimed, nr_scanned; |
1886 | unsigned long nr_to_reclaim = sc->nr_to_reclaim; | 1898 | unsigned long nr_to_reclaim = sc->nr_to_reclaim; |
1887 | unsigned long nr_scanned = sc->nr_scanned; | ||
1888 | 1899 | ||
1889 | restart: | 1900 | restart: |
1890 | nr_reclaimed = 0; | 1901 | nr_reclaimed = 0; |
1902 | nr_scanned = sc->nr_scanned; | ||
1891 | get_scan_count(zone, sc, nr, priority); | 1903 | get_scan_count(zone, sc, nr, priority); |
1892 | 1904 | ||
1893 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1905 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
@@ -2083,7 +2095,8 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2083 | struct zone *preferred_zone; | 2095 | struct zone *preferred_zone; |
2084 | 2096 | ||
2085 | first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), | 2097 | first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), |
2086 | NULL, &preferred_zone); | 2098 | &cpuset_current_mems_allowed, |
2099 | &preferred_zone); | ||
2087 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); | 2100 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); |
2088 | } | 2101 | } |
2089 | } | 2102 | } |