aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c71
1 files changed, 41 insertions, 30 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b6c1ce3c53b5..113e35c47502 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -650,10 +650,10 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag)
650 650
651static inline struct page *alloc_hugepage_vma(int defrag, 651static inline struct page *alloc_hugepage_vma(int defrag,
652 struct vm_area_struct *vma, 652 struct vm_area_struct *vma,
653 unsigned long haddr) 653 unsigned long haddr, int nd)
654{ 654{
655 return alloc_pages_vma(alloc_hugepage_gfpmask(defrag), 655 return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
656 HPAGE_PMD_ORDER, vma, haddr); 656 HPAGE_PMD_ORDER, vma, haddr, nd);
657} 657}
658 658
659#ifndef CONFIG_NUMA 659#ifndef CONFIG_NUMA
@@ -678,7 +678,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
678 if (unlikely(khugepaged_enter(vma))) 678 if (unlikely(khugepaged_enter(vma)))
679 return VM_FAULT_OOM; 679 return VM_FAULT_OOM;
680 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), 680 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
681 vma, haddr); 681 vma, haddr, numa_node_id());
682 if (unlikely(!page)) 682 if (unlikely(!page))
683 goto out; 683 goto out;
684 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { 684 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
@@ -799,8 +799,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
799 } 799 }
800 800
801 for (i = 0; i < HPAGE_PMD_NR; i++) { 801 for (i = 0; i < HPAGE_PMD_NR; i++) {
802 pages[i] = alloc_page_vma(GFP_HIGHUSER_MOVABLE, 802 pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE,
803 vma, address); 803 vma, address, page_to_nid(page));
804 if (unlikely(!pages[i] || 804 if (unlikely(!pages[i] ||
805 mem_cgroup_newpage_charge(pages[i], mm, 805 mem_cgroup_newpage_charge(pages[i], mm,
806 GFP_KERNEL))) { 806 GFP_KERNEL))) {
@@ -902,7 +902,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
902 if (transparent_hugepage_enabled(vma) && 902 if (transparent_hugepage_enabled(vma) &&
903 !transparent_hugepage_debug_cow()) 903 !transparent_hugepage_debug_cow())
904 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), 904 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
905 vma, haddr); 905 vma, haddr, numa_node_id());
906 else 906 else
907 new_page = NULL; 907 new_page = NULL;
908 908
@@ -1745,7 +1745,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
1745static void collapse_huge_page(struct mm_struct *mm, 1745static void collapse_huge_page(struct mm_struct *mm,
1746 unsigned long address, 1746 unsigned long address,
1747 struct page **hpage, 1747 struct page **hpage,
1748 struct vm_area_struct *vma) 1748 struct vm_area_struct *vma,
1749 int node)
1749{ 1750{
1750 pgd_t *pgd; 1751 pgd_t *pgd;
1751 pud_t *pud; 1752 pud_t *pud;
@@ -1761,6 +1762,10 @@ static void collapse_huge_page(struct mm_struct *mm,
1761#ifndef CONFIG_NUMA 1762#ifndef CONFIG_NUMA
1762 VM_BUG_ON(!*hpage); 1763 VM_BUG_ON(!*hpage);
1763 new_page = *hpage; 1764 new_page = *hpage;
1765 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
1766 up_read(&mm->mmap_sem);
1767 return;
1768 }
1764#else 1769#else
1765 VM_BUG_ON(*hpage); 1770 VM_BUG_ON(*hpage);
1766 /* 1771 /*
@@ -1773,18 +1778,19 @@ static void collapse_huge_page(struct mm_struct *mm,
1773 * mmap_sem in read mode is good idea also to allow greater 1778 * mmap_sem in read mode is good idea also to allow greater
1774 * scalability. 1779 * scalability.
1775 */ 1780 */
1776 new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address); 1781 new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
1782 node);
1777 if (unlikely(!new_page)) { 1783 if (unlikely(!new_page)) {
1778 up_read(&mm->mmap_sem); 1784 up_read(&mm->mmap_sem);
1779 *hpage = ERR_PTR(-ENOMEM); 1785 *hpage = ERR_PTR(-ENOMEM);
1780 return; 1786 return;
1781 } 1787 }
1782#endif
1783 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { 1788 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
1784 up_read(&mm->mmap_sem); 1789 up_read(&mm->mmap_sem);
1785 put_page(new_page); 1790 put_page(new_page);
1786 return; 1791 return;
1787 } 1792 }
1793#endif
1788 1794
1789 /* after allocating the hugepage upgrade to mmap_sem write mode */ 1795 /* after allocating the hugepage upgrade to mmap_sem write mode */
1790 up_read(&mm->mmap_sem); 1796 up_read(&mm->mmap_sem);
@@ -1811,6 +1817,8 @@ static void collapse_huge_page(struct mm_struct *mm,
1811 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ 1817 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */
1812 if (!vma->anon_vma || vma->vm_ops || vma->vm_file) 1818 if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
1813 goto out; 1819 goto out;
1820 if (is_vma_temporary_stack(vma))
1821 goto out;
1814 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); 1822 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
1815 1823
1816 pgd = pgd_offset(mm, address); 1824 pgd = pgd_offset(mm, address);
@@ -1852,7 +1860,6 @@ static void collapse_huge_page(struct mm_struct *mm,
1852 set_pmd_at(mm, address, pmd, _pmd); 1860 set_pmd_at(mm, address, pmd, _pmd);
1853 spin_unlock(&mm->page_table_lock); 1861 spin_unlock(&mm->page_table_lock);
1854 anon_vma_unlock(vma->anon_vma); 1862 anon_vma_unlock(vma->anon_vma);
1855 mem_cgroup_uncharge_page(new_page);
1856 goto out; 1863 goto out;
1857 } 1864 }
1858 1865
@@ -1898,6 +1905,7 @@ out_up_write:
1898 return; 1905 return;
1899 1906
1900out: 1907out:
1908 mem_cgroup_uncharge_page(new_page);
1901#ifdef CONFIG_NUMA 1909#ifdef CONFIG_NUMA
1902 put_page(new_page); 1910 put_page(new_page);
1903#endif 1911#endif
@@ -1917,6 +1925,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1917 struct page *page; 1925 struct page *page;
1918 unsigned long _address; 1926 unsigned long _address;
1919 spinlock_t *ptl; 1927 spinlock_t *ptl;
1928 int node = -1;
1920 1929
1921 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1930 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1922 1931
@@ -1947,6 +1956,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1947 page = vm_normal_page(vma, _address, pteval); 1956 page = vm_normal_page(vma, _address, pteval);
1948 if (unlikely(!page)) 1957 if (unlikely(!page))
1949 goto out_unmap; 1958 goto out_unmap;
1959 /*
1960 * Chose the node of the first page. This could
1961 * be more sophisticated and look at more pages,
1962 * but isn't for now.
1963 */
1964 if (node == -1)
1965 node = page_to_nid(page);
1950 VM_BUG_ON(PageCompound(page)); 1966 VM_BUG_ON(PageCompound(page));
1951 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) 1967 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
1952 goto out_unmap; 1968 goto out_unmap;
@@ -1963,7 +1979,7 @@ out_unmap:
1963 pte_unmap_unlock(pte, ptl); 1979 pte_unmap_unlock(pte, ptl);
1964 if (ret) 1980 if (ret)
1965 /* collapse_huge_page will return with the mmap_sem released */ 1981 /* collapse_huge_page will return with the mmap_sem released */
1966 collapse_huge_page(mm, address, hpage, vma); 1982 collapse_huge_page(mm, address, hpage, vma, node);
1967out: 1983out:
1968 return ret; 1984 return ret;
1969} 1985}
@@ -2032,32 +2048,27 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
2032 if ((!(vma->vm_flags & VM_HUGEPAGE) && 2048 if ((!(vma->vm_flags & VM_HUGEPAGE) &&
2033 !khugepaged_always()) || 2049 !khugepaged_always()) ||
2034 (vma->vm_flags & VM_NOHUGEPAGE)) { 2050 (vma->vm_flags & VM_NOHUGEPAGE)) {
2051 skip:
2035 progress++; 2052 progress++;
2036 continue; 2053 continue;
2037 } 2054 }
2038
2039 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ 2055 /* VM_PFNMAP vmas may have vm_ops null but vm_file set */
2040 if (!vma->anon_vma || vma->vm_ops || vma->vm_file) { 2056 if (!vma->anon_vma || vma->vm_ops || vma->vm_file)
2041 khugepaged_scan.address = vma->vm_end; 2057 goto skip;
2042 progress++; 2058 if (is_vma_temporary_stack(vma))
2043 continue; 2059 goto skip;
2044 } 2060
2045 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); 2061 VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma));
2046 2062
2047 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; 2063 hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
2048 hend = vma->vm_end & HPAGE_PMD_MASK; 2064 hend = vma->vm_end & HPAGE_PMD_MASK;
2049 if (hstart >= hend) { 2065 if (hstart >= hend)
2050 progress++; 2066 goto skip;
2051 continue; 2067 if (khugepaged_scan.address > hend)
2052 } 2068 goto skip;
2053 if (khugepaged_scan.address < hstart) 2069 if (khugepaged_scan.address < hstart)
2054 khugepaged_scan.address = hstart; 2070 khugepaged_scan.address = hstart;
2055 if (khugepaged_scan.address > hend) { 2071 VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
2056 khugepaged_scan.address = hend + HPAGE_PMD_SIZE;
2057 progress++;
2058 continue;
2059 }
2060 BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK);
2061 2072
2062 while (khugepaged_scan.address < hend) { 2073 while (khugepaged_scan.address < hend) {
2063 int ret; 2074 int ret;
@@ -2086,7 +2097,7 @@ breakouterloop:
2086breakouterloop_mmap_sem: 2097breakouterloop_mmap_sem:
2087 2098
2088 spin_lock(&khugepaged_mm_lock); 2099 spin_lock(&khugepaged_mm_lock);
2089 BUG_ON(khugepaged_scan.mm_slot != mm_slot); 2100 VM_BUG_ON(khugepaged_scan.mm_slot != mm_slot);
2090 /* 2101 /*
2091 * Release the current mm_slot if this mm is about to die, or 2102 * Release the current mm_slot if this mm is about to die, or
2092 * if we scanned all vmas of this mm. 2103 * if we scanned all vmas of this mm.
@@ -2241,9 +2252,9 @@ static int khugepaged(void *none)
2241 2252
2242 for (;;) { 2253 for (;;) {
2243 mutex_unlock(&khugepaged_mutex); 2254 mutex_unlock(&khugepaged_mutex);
2244 BUG_ON(khugepaged_thread != current); 2255 VM_BUG_ON(khugepaged_thread != current);
2245 khugepaged_loop(); 2256 khugepaged_loop();
2246 BUG_ON(khugepaged_thread != current); 2257 VM_BUG_ON(khugepaged_thread != current);
2247 2258
2248 mutex_lock(&khugepaged_mutex); 2259 mutex_lock(&khugepaged_mutex);
2249 if (!khugepaged_enabled()) 2260 if (!khugepaged_enabled())