diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 71 |
1 files changed, 41 insertions, 30 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index b6c1ce3c53b5..113e35c47502 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -650,10 +650,10 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag) | |||
650 | 650 | ||
651 | static inline struct page *alloc_hugepage_vma(int defrag, | 651 | static inline struct page *alloc_hugepage_vma(int defrag, |
652 | struct vm_area_struct *vma, | 652 | struct vm_area_struct *vma, |
653 | unsigned long haddr) | 653 | unsigned long haddr, int nd) |
654 | { | 654 | { |
655 | return alloc_pages_vma(alloc_hugepage_gfpmask(defrag), | 655 | return alloc_pages_vma(alloc_hugepage_gfpmask(defrag), |
656 | HPAGE_PMD_ORDER, vma, haddr); | 656 | HPAGE_PMD_ORDER, vma, haddr, nd); |
657 | } | 657 | } |
658 | 658 | ||
659 | #ifndef CONFIG_NUMA | 659 | #ifndef CONFIG_NUMA |
@@ -678,7 +678,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
678 | if (unlikely(khugepaged_enter(vma))) | 678 | if (unlikely(khugepaged_enter(vma))) |
679 | return VM_FAULT_OOM; | 679 | return VM_FAULT_OOM; |
680 | page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), | 680 | page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), |
681 | vma, haddr); | 681 | vma, haddr, numa_node_id()); |
682 | if (unlikely(!page)) | 682 | if (unlikely(!page)) |
683 | goto out; | 683 | goto out; |
684 | if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { | 684 | if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { |
@@ -799,8 +799,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | |||
799 | } | 799 | } |
800 | 800 | ||
801 | for (i = 0; i < HPAGE_PMD_NR; i++) { | 801 | for (i = 0; i < HPAGE_PMD_NR; i++) { |
802 | pages[i] = alloc_page_vma(GFP_HIGHUSER_MOVABLE, | 802 | pages[i] = alloc_page_vma_node(GFP_HIGHUSER_MOVABLE, |
803 | vma, address); | 803 | vma, address, page_to_nid(page)); |
804 | if (unlikely(!pages[i] || | 804 | if (unlikely(!pages[i] || |
805 | mem_cgroup_newpage_charge(pages[i], mm, | 805 | mem_cgroup_newpage_charge(pages[i], mm, |
806 | GFP_KERNEL))) { | 806 | GFP_KERNEL))) { |
@@ -902,7 +902,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
902 | if (transparent_hugepage_enabled(vma) && | 902 | if (transparent_hugepage_enabled(vma) && |
903 | !transparent_hugepage_debug_cow()) | 903 | !transparent_hugepage_debug_cow()) |
904 | new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), | 904 | new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), |
905 | vma, haddr); | 905 | vma, haddr, numa_node_id()); |
906 | else | 906 | else |
907 | new_page = NULL; | 907 | new_page = NULL; |
908 | 908 | ||
@@ -1745,7 +1745,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, | |||
1745 | static void collapse_huge_page(struct mm_struct *mm, | 1745 | static void collapse_huge_page(struct mm_struct *mm, |
1746 | unsigned long address, | 1746 | unsigned long address, |
1747 | struct page **hpage, | 1747 | struct page **hpage, |
1748 | struct vm_area_struct *vma) | 1748 | struct vm_area_struct *vma, |
1749 | int node) | ||
1749 | { | 1750 | { |
1750 | pgd_t *pgd; | 1751 | pgd_t *pgd; |
1751 | pud_t *pud; | 1752 | pud_t *pud; |
@@ -1761,6 +1762,10 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1761 | #ifndef CONFIG_NUMA | 1762 | #ifndef CONFIG_NUMA |
1762 | VM_BUG_ON(!*hpage); | 1763 | VM_BUG_ON(!*hpage); |
1763 | new_page = *hpage; | 1764 | new_page = *hpage; |
1765 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { | ||
1766 | up_read(&mm->mmap_sem); | ||
1767 | return; | ||
1768 | } | ||
1764 | #else | 1769 | #else |
1765 | VM_BUG_ON(*hpage); | 1770 | VM_BUG_ON(*hpage); |
1766 | /* | 1771 | /* |
@@ -1773,18 +1778,19 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1773 | * mmap_sem in read mode is good idea also to allow greater | 1778 | * mmap_sem in read mode is good idea also to allow greater |
1774 | * scalability. | 1779 | * scalability. |
1775 | */ | 1780 | */ |
1776 | new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address); | 1781 | new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address, |
1782 | node); | ||
1777 | if (unlikely(!new_page)) { | 1783 | if (unlikely(!new_page)) { |
1778 | up_read(&mm->mmap_sem); | 1784 | up_read(&mm->mmap_sem); |
1779 | *hpage = ERR_PTR(-ENOMEM); | 1785 | *hpage = ERR_PTR(-ENOMEM); |
1780 | return; | 1786 | return; |
1781 | } | 1787 | } |
1782 | #endif | ||
1783 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { | 1788 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { |
1784 | up_read(&mm->mmap_sem); | 1789 | up_read(&mm->mmap_sem); |
1785 | put_page(new_page); | 1790 | put_page(new_page); |
1786 | return; | 1791 | return; |
1787 | } | 1792 | } |
1793 | #endif | ||
1788 | 1794 | ||
1789 | /* after allocating the hugepage upgrade to mmap_sem write mode */ | 1795 | /* after allocating the hugepage upgrade to mmap_sem write mode */ |
1790 | up_read(&mm->mmap_sem); | 1796 | up_read(&mm->mmap_sem); |
@@ -1811,6 +1817,8 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1811 | /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ | 1817 | /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ |
1812 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) | 1818 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) |
1813 | goto out; | 1819 | goto out; |
1820 | if (is_vma_temporary_stack(vma)) | ||
1821 | goto out; | ||
1814 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); | 1822 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); |
1815 | 1823 | ||
1816 | pgd = pgd_offset(mm, address); | 1824 | pgd = pgd_offset(mm, address); |
@@ -1852,7 +1860,6 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1852 | set_pmd_at(mm, address, pmd, _pmd); | 1860 | set_pmd_at(mm, address, pmd, _pmd); |
1853 | spin_unlock(&mm->page_table_lock); | 1861 | spin_unlock(&mm->page_table_lock); |
1854 | anon_vma_unlock(vma->anon_vma); | 1862 | anon_vma_unlock(vma->anon_vma); |
1855 | mem_cgroup_uncharge_page(new_page); | ||
1856 | goto out; | 1863 | goto out; |
1857 | } | 1864 | } |
1858 | 1865 | ||
@@ -1898,6 +1905,7 @@ out_up_write: | |||
1898 | return; | 1905 | return; |
1899 | 1906 | ||
1900 | out: | 1907 | out: |
1908 | mem_cgroup_uncharge_page(new_page); | ||
1901 | #ifdef CONFIG_NUMA | 1909 | #ifdef CONFIG_NUMA |
1902 | put_page(new_page); | 1910 | put_page(new_page); |
1903 | #endif | 1911 | #endif |
@@ -1917,6 +1925,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
1917 | struct page *page; | 1925 | struct page *page; |
1918 | unsigned long _address; | 1926 | unsigned long _address; |
1919 | spinlock_t *ptl; | 1927 | spinlock_t *ptl; |
1928 | int node = -1; | ||
1920 | 1929 | ||
1921 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | 1930 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); |
1922 | 1931 | ||
@@ -1947,6 +1956,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm, | |||
1947 | page = vm_normal_page(vma, _address, pteval); | 1956 | page = vm_normal_page(vma, _address, pteval); |
1948 | if (unlikely(!page)) | 1957 | if (unlikely(!page)) |
1949 | goto out_unmap; | 1958 | goto out_unmap; |
1959 | /* | ||
1960 | * Chose the node of the first page. This could | ||
1961 | * be more sophisticated and look at more pages, | ||
1962 | * but isn't for now. | ||
1963 | */ | ||
1964 | if (node == -1) | ||
1965 | node = page_to_nid(page); | ||
1950 | VM_BUG_ON(PageCompound(page)); | 1966 | VM_BUG_ON(PageCompound(page)); |
1951 | if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) | 1967 | if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) |
1952 | goto out_unmap; | 1968 | goto out_unmap; |
@@ -1963,7 +1979,7 @@ out_unmap: | |||
1963 | pte_unmap_unlock(pte, ptl); | 1979 | pte_unmap_unlock(pte, ptl); |
1964 | if (ret) | 1980 | if (ret) |
1965 | /* collapse_huge_page will return with the mmap_sem released */ | 1981 | /* collapse_huge_page will return with the mmap_sem released */ |
1966 | collapse_huge_page(mm, address, hpage, vma); | 1982 | collapse_huge_page(mm, address, hpage, vma, node); |
1967 | out: | 1983 | out: |
1968 | return ret; | 1984 | return ret; |
1969 | } | 1985 | } |
@@ -2032,32 +2048,27 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, | |||
2032 | if ((!(vma->vm_flags & VM_HUGEPAGE) && | 2048 | if ((!(vma->vm_flags & VM_HUGEPAGE) && |
2033 | !khugepaged_always()) || | 2049 | !khugepaged_always()) || |
2034 | (vma->vm_flags & VM_NOHUGEPAGE)) { | 2050 | (vma->vm_flags & VM_NOHUGEPAGE)) { |
2051 | skip: | ||
2035 | progress++; | 2052 | progress++; |
2036 | continue; | 2053 | continue; |
2037 | } | 2054 | } |
2038 | |||
2039 | /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ | 2055 | /* VM_PFNMAP vmas may have vm_ops null but vm_file set */ |
2040 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) { | 2056 | if (!vma->anon_vma || vma->vm_ops || vma->vm_file) |
2041 | khugepaged_scan.address = vma->vm_end; | 2057 | goto skip; |
2042 | progress++; | 2058 | if (is_vma_temporary_stack(vma)) |
2043 | continue; | 2059 | goto skip; |
2044 | } | 2060 | |
2045 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); | 2061 | VM_BUG_ON(is_linear_pfn_mapping(vma) || is_pfn_mapping(vma)); |
2046 | 2062 | ||
2047 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 2063 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
2048 | hend = vma->vm_end & HPAGE_PMD_MASK; | 2064 | hend = vma->vm_end & HPAGE_PMD_MASK; |
2049 | if (hstart >= hend) { | 2065 | if (hstart >= hend) |
2050 | progress++; | 2066 | goto skip; |
2051 | continue; | 2067 | if (khugepaged_scan.address > hend) |
2052 | } | 2068 | goto skip; |
2053 | if (khugepaged_scan.address < hstart) | 2069 | if (khugepaged_scan.address < hstart) |
2054 | khugepaged_scan.address = hstart; | 2070 | khugepaged_scan.address = hstart; |
2055 | if (khugepaged_scan.address > hend) { | 2071 | VM_BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK); |
2056 | khugepaged_scan.address = hend + HPAGE_PMD_SIZE; | ||
2057 | progress++; | ||
2058 | continue; | ||
2059 | } | ||
2060 | BUG_ON(khugepaged_scan.address & ~HPAGE_PMD_MASK); | ||
2061 | 2072 | ||
2062 | while (khugepaged_scan.address < hend) { | 2073 | while (khugepaged_scan.address < hend) { |
2063 | int ret; | 2074 | int ret; |
@@ -2086,7 +2097,7 @@ breakouterloop: | |||
2086 | breakouterloop_mmap_sem: | 2097 | breakouterloop_mmap_sem: |
2087 | 2098 | ||
2088 | spin_lock(&khugepaged_mm_lock); | 2099 | spin_lock(&khugepaged_mm_lock); |
2089 | BUG_ON(khugepaged_scan.mm_slot != mm_slot); | 2100 | VM_BUG_ON(khugepaged_scan.mm_slot != mm_slot); |
2090 | /* | 2101 | /* |
2091 | * Release the current mm_slot if this mm is about to die, or | 2102 | * Release the current mm_slot if this mm is about to die, or |
2092 | * if we scanned all vmas of this mm. | 2103 | * if we scanned all vmas of this mm. |
@@ -2241,9 +2252,9 @@ static int khugepaged(void *none) | |||
2241 | 2252 | ||
2242 | for (;;) { | 2253 | for (;;) { |
2243 | mutex_unlock(&khugepaged_mutex); | 2254 | mutex_unlock(&khugepaged_mutex); |
2244 | BUG_ON(khugepaged_thread != current); | 2255 | VM_BUG_ON(khugepaged_thread != current); |
2245 | khugepaged_loop(); | 2256 | khugepaged_loop(); |
2246 | BUG_ON(khugepaged_thread != current); | 2257 | VM_BUG_ON(khugepaged_thread != current); |
2247 | 2258 | ||
2248 | mutex_lock(&khugepaged_mutex); | 2259 | mutex_lock(&khugepaged_mutex); |
2249 | if (!khugepaged_enabled()) | 2260 | if (!khugepaged_enabled()) |