aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndi Kleen <ak@linux.intel.com>2011-03-04 20:36:32 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-04 20:53:39 -0500
commit5c4b4be3b6b937256103a5ae49177e0c3a17cb8f (patch)
treef0b7a74e61af26576e48581b70b7bad0a82d0ee7
parent19ee151e140daa5183c4984981801e542e0544fb (diff)
mm: use correct numa policy node for transparent hugepages
Pass down the correct node for a transparent hugepage allocation. Most callers continue to use the current node, however the hugepaged daemon now uses the previous node of the first to be collapsed page instead. This ensures that khugepaged does not mess up local memory for an existing process which uses local policy. The choice of node is somewhat primitive currently: it just uses the node of the first page in the pmd range. An alternative would be to look at multiple pages and use the most popular node. I used the simplest variant for now which should work well enough for the case of all pages being on the same node. [akpm@linux-foundation.org: coding-style fixes] Acked-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andi Kleen <ak@linux.intel.com> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/huge_memory.c24
-rw-r--r--mm/mempolicy.c3
2 files changed, 19 insertions, 8 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1802db819e28..dbe99a5f2073 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -650,10 +650,10 @@ static inline gfp_t alloc_hugepage_gfpmask(int defrag)
650 650
651static inline struct page *alloc_hugepage_vma(int defrag, 651static inline struct page *alloc_hugepage_vma(int defrag,
652 struct vm_area_struct *vma, 652 struct vm_area_struct *vma,
653 unsigned long haddr) 653 unsigned long haddr, int nd)
654{ 654{
655 return alloc_pages_vma(alloc_hugepage_gfpmask(defrag), 655 return alloc_pages_vma(alloc_hugepage_gfpmask(defrag),
656 HPAGE_PMD_ORDER, vma, haddr, numa_node_id()); 656 HPAGE_PMD_ORDER, vma, haddr, nd);
657} 657}
658 658
659#ifndef CONFIG_NUMA 659#ifndef CONFIG_NUMA
@@ -678,7 +678,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
678 if (unlikely(khugepaged_enter(vma))) 678 if (unlikely(khugepaged_enter(vma)))
679 return VM_FAULT_OOM; 679 return VM_FAULT_OOM;
680 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), 680 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
681 vma, haddr); 681 vma, haddr, numa_node_id());
682 if (unlikely(!page)) 682 if (unlikely(!page))
683 goto out; 683 goto out;
684 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { 684 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
@@ -902,7 +902,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
902 if (transparent_hugepage_enabled(vma) && 902 if (transparent_hugepage_enabled(vma) &&
903 !transparent_hugepage_debug_cow()) 903 !transparent_hugepage_debug_cow())
904 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), 904 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
905 vma, haddr); 905 vma, haddr, numa_node_id());
906 else 906 else
907 new_page = NULL; 907 new_page = NULL;
908 908
@@ -1745,7 +1745,8 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
1745static void collapse_huge_page(struct mm_struct *mm, 1745static void collapse_huge_page(struct mm_struct *mm,
1746 unsigned long address, 1746 unsigned long address,
1747 struct page **hpage, 1747 struct page **hpage,
1748 struct vm_area_struct *vma) 1748 struct vm_area_struct *vma,
1749 int node)
1749{ 1750{
1750 pgd_t *pgd; 1751 pgd_t *pgd;
1751 pud_t *pud; 1752 pud_t *pud;
@@ -1773,7 +1774,8 @@ static void collapse_huge_page(struct mm_struct *mm,
1773 * mmap_sem in read mode is good idea also to allow greater 1774 * mmap_sem in read mode is good idea also to allow greater
1774 * scalability. 1775 * scalability.
1775 */ 1776 */
1776 new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address); 1777 new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address,
1778 node);
1777 if (unlikely(!new_page)) { 1779 if (unlikely(!new_page)) {
1778 up_read(&mm->mmap_sem); 1780 up_read(&mm->mmap_sem);
1779 *hpage = ERR_PTR(-ENOMEM); 1781 *hpage = ERR_PTR(-ENOMEM);
@@ -1919,6 +1921,7 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1919 struct page *page; 1921 struct page *page;
1920 unsigned long _address; 1922 unsigned long _address;
1921 spinlock_t *ptl; 1923 spinlock_t *ptl;
1924 int node = -1;
1922 1925
1923 VM_BUG_ON(address & ~HPAGE_PMD_MASK); 1926 VM_BUG_ON(address & ~HPAGE_PMD_MASK);
1924 1927
@@ -1949,6 +1952,13 @@ static int khugepaged_scan_pmd(struct mm_struct *mm,
1949 page = vm_normal_page(vma, _address, pteval); 1952 page = vm_normal_page(vma, _address, pteval);
1950 if (unlikely(!page)) 1953 if (unlikely(!page))
1951 goto out_unmap; 1954 goto out_unmap;
1955 /*
1956 * Chose the node of the first page. This could
1957 * be more sophisticated and look at more pages,
1958 * but isn't for now.
1959 */
1960 if (node == -1)
1961 node = page_to_nid(page);
1952 VM_BUG_ON(PageCompound(page)); 1962 VM_BUG_ON(PageCompound(page));
1953 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page)) 1963 if (!PageLRU(page) || PageLocked(page) || !PageAnon(page))
1954 goto out_unmap; 1964 goto out_unmap;
@@ -1965,7 +1975,7 @@ out_unmap:
1965 pte_unmap_unlock(pte, ptl); 1975 pte_unmap_unlock(pte, ptl);
1966 if (ret) 1976 if (ret)
1967 /* collapse_huge_page will return with the mmap_sem released */ 1977 /* collapse_huge_page will return with the mmap_sem released */
1968 collapse_huge_page(mm, address, hpage, vma); 1978 collapse_huge_page(mm, address, hpage, vma, node);
1969out: 1979out:
1970 return ret; 1980 return ret;
1971} 1981}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 25a5a9146619..b53ec99f1428 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1891,7 +1891,8 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order)
1891 page = alloc_page_interleave(gfp, order, interleave_nodes(pol)); 1891 page = alloc_page_interleave(gfp, order, interleave_nodes(pol));
1892 else 1892 else
1893 page = __alloc_pages_nodemask(gfp, order, 1893 page = __alloc_pages_nodemask(gfp, order,
1894 policy_zonelist(gfp, pol), policy_nodemask(gfp, pol)); 1894 policy_zonelist(gfp, pol, numa_node_id()),
1895 policy_nodemask(gfp, pol));
1895 put_mems_allowed(); 1896 put_mems_allowed();
1896 return page; 1897 return page;
1897} 1898}