diff options
author | Christoph Lameter <clameter@engr.sgi.com> | 2006-01-06 03:10:46 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-06 11:33:23 -0500 |
commit | 5da7ca86078964cbfe6c83efc1205904587706fe (patch) | |
tree | a64a7824e90b42d6fdd71e6cb652362beb8983a1 /mm | |
parent | 96df9333c94d7d5aeceb21f6c5e7ae8ff34753cf (diff) |
[PATCH] Add NUMA policy support for huge pages.
The huge_zonelist() function in the memory policy layer provides an list of
zones ordered by NUMA distance. The hugetlb layer will walk that list looking
for a zone that has available huge pages but is also in the nodeset of the
current cpuset.
This patch does not contain the folding of find_or_alloc_huge_page() that was
controversial in the earlier discussion.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@muc.de>
Acked-by: William Lee Irwin III <wli@holomorphy.com>
Cc: Adam Litke <agl@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/hugetlb.c | 24 | ||||
-rw-r--r-- | mm/mempolicy.c | 39 |
2 files changed, 44 insertions, 19 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e93bd63462f0..eb405565949d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -11,6 +11,8 @@ | |||
11 | #include <linux/highmem.h> | 11 | #include <linux/highmem.h> |
12 | #include <linux/nodemask.h> | 12 | #include <linux/nodemask.h> |
13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
14 | #include <linux/mempolicy.h> | ||
15 | |||
14 | #include <asm/page.h> | 16 | #include <asm/page.h> |
15 | #include <asm/pgtable.h> | 17 | #include <asm/pgtable.h> |
16 | 18 | ||
@@ -36,11 +38,12 @@ static void enqueue_huge_page(struct page *page) | |||
36 | free_huge_pages_node[nid]++; | 38 | free_huge_pages_node[nid]++; |
37 | } | 39 | } |
38 | 40 | ||
39 | static struct page *dequeue_huge_page(void) | 41 | static struct page *dequeue_huge_page(struct vm_area_struct *vma, |
42 | unsigned long address) | ||
40 | { | 43 | { |
41 | int nid = numa_node_id(); | 44 | int nid = numa_node_id(); |
42 | struct page *page = NULL; | 45 | struct page *page = NULL; |
43 | struct zonelist *zonelist = NODE_DATA(nid)->node_zonelists; | 46 | struct zonelist *zonelist = huge_zonelist(vma, address); |
44 | struct zone **z; | 47 | struct zone **z; |
45 | 48 | ||
46 | for (z = zonelist->zones; *z; z++) { | 49 | for (z = zonelist->zones; *z; z++) { |
@@ -87,13 +90,13 @@ void free_huge_page(struct page *page) | |||
87 | spin_unlock(&hugetlb_lock); | 90 | spin_unlock(&hugetlb_lock); |
88 | } | 91 | } |
89 | 92 | ||
90 | struct page *alloc_huge_page(void) | 93 | struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr) |
91 | { | 94 | { |
92 | struct page *page; | 95 | struct page *page; |
93 | int i; | 96 | int i; |
94 | 97 | ||
95 | spin_lock(&hugetlb_lock); | 98 | spin_lock(&hugetlb_lock); |
96 | page = dequeue_huge_page(); | 99 | page = dequeue_huge_page(vma, addr); |
97 | if (!page) { | 100 | if (!page) { |
98 | spin_unlock(&hugetlb_lock); | 101 | spin_unlock(&hugetlb_lock); |
99 | return NULL; | 102 | return NULL; |
@@ -196,7 +199,7 @@ static unsigned long set_max_huge_pages(unsigned long count) | |||
196 | spin_lock(&hugetlb_lock); | 199 | spin_lock(&hugetlb_lock); |
197 | try_to_free_low(count); | 200 | try_to_free_low(count); |
198 | while (count < nr_huge_pages) { | 201 | while (count < nr_huge_pages) { |
199 | struct page *page = dequeue_huge_page(); | 202 | struct page *page = dequeue_huge_page(NULL, 0); |
200 | if (!page) | 203 | if (!page) |
201 | break; | 204 | break; |
202 | update_and_free_page(page); | 205 | update_and_free_page(page); |
@@ -365,8 +368,9 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
365 | flush_tlb_range(vma, start, end); | 368 | flush_tlb_range(vma, start, end); |
366 | } | 369 | } |
367 | 370 | ||
368 | static struct page *find_or_alloc_huge_page(struct address_space *mapping, | 371 | static struct page *find_or_alloc_huge_page(struct vm_area_struct *vma, |
369 | unsigned long idx, int shared) | 372 | unsigned long addr, struct address_space *mapping, |
373 | unsigned long idx, int shared) | ||
370 | { | 374 | { |
371 | struct page *page; | 375 | struct page *page; |
372 | int err; | 376 | int err; |
@@ -378,7 +382,7 @@ retry: | |||
378 | 382 | ||
379 | if (hugetlb_get_quota(mapping)) | 383 | if (hugetlb_get_quota(mapping)) |
380 | goto out; | 384 | goto out; |
381 | page = alloc_huge_page(); | 385 | page = alloc_huge_page(vma, addr); |
382 | if (!page) { | 386 | if (!page) { |
383 | hugetlb_put_quota(mapping); | 387 | hugetlb_put_quota(mapping); |
384 | goto out; | 388 | goto out; |
@@ -418,7 +422,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
418 | } | 422 | } |
419 | 423 | ||
420 | page_cache_get(old_page); | 424 | page_cache_get(old_page); |
421 | new_page = alloc_huge_page(); | 425 | new_page = alloc_huge_page(vma, address); |
422 | 426 | ||
423 | if (!new_page) { | 427 | if (!new_page) { |
424 | page_cache_release(old_page); | 428 | page_cache_release(old_page); |
@@ -467,7 +471,7 @@ int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
467 | * Use page lock to guard against racing truncation | 471 | * Use page lock to guard against racing truncation |
468 | * before we get page_table_lock. | 472 | * before we get page_table_lock. |
469 | */ | 473 | */ |
470 | page = find_or_alloc_huge_page(mapping, idx, | 474 | page = find_or_alloc_huge_page(vma, address, mapping, idx, |
471 | vma->vm_flags & VM_SHARED); | 475 | vma->vm_flags & VM_SHARED); |
472 | if (!page) | 476 | if (!page) |
473 | goto out; | 477 | goto out; |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 72f402cc9c9a..45c51ac63443 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -785,6 +785,34 @@ static unsigned offset_il_node(struct mempolicy *pol, | |||
785 | return nid; | 785 | return nid; |
786 | } | 786 | } |
787 | 787 | ||
788 | /* Determine a node number for interleave */ | ||
789 | static inline unsigned interleave_nid(struct mempolicy *pol, | ||
790 | struct vm_area_struct *vma, unsigned long addr, int shift) | ||
791 | { | ||
792 | if (vma) { | ||
793 | unsigned long off; | ||
794 | |||
795 | off = vma->vm_pgoff; | ||
796 | off += (addr - vma->vm_start) >> shift; | ||
797 | return offset_il_node(pol, vma, off); | ||
798 | } else | ||
799 | return interleave_nodes(pol); | ||
800 | } | ||
801 | |||
802 | /* Return a zonelist suitable for a huge page allocation. */ | ||
803 | struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) | ||
804 | { | ||
805 | struct mempolicy *pol = get_vma_policy(current, vma, addr); | ||
806 | |||
807 | if (pol->policy == MPOL_INTERLEAVE) { | ||
808 | unsigned nid; | ||
809 | |||
810 | nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); | ||
811 | return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER); | ||
812 | } | ||
813 | return zonelist_policy(GFP_HIGHUSER, pol); | ||
814 | } | ||
815 | |||
788 | /* Allocate a page in interleaved policy. | 816 | /* Allocate a page in interleaved policy. |
789 | Own path because it needs to do special accounting. */ | 817 | Own path because it needs to do special accounting. */ |
790 | static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | 818 | static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, |
@@ -833,15 +861,8 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) | |||
833 | 861 | ||
834 | if (unlikely(pol->policy == MPOL_INTERLEAVE)) { | 862 | if (unlikely(pol->policy == MPOL_INTERLEAVE)) { |
835 | unsigned nid; | 863 | unsigned nid; |
836 | if (vma) { | 864 | |
837 | unsigned long off; | 865 | nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); |
838 | off = vma->vm_pgoff; | ||
839 | off += (addr - vma->vm_start) >> PAGE_SHIFT; | ||
840 | nid = offset_il_node(pol, vma, off); | ||
841 | } else { | ||
842 | /* fall back to process interleaving */ | ||
843 | nid = interleave_nodes(pol); | ||
844 | } | ||
845 | return alloc_page_interleave(gfp, 0, nid); | 866 | return alloc_page_interleave(gfp, 0, nid); |
846 | } | 867 | } |
847 | return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); | 868 | return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); |