diff options
author | Christoph Lameter <clameter@engr.sgi.com> | 2006-01-06 03:10:46 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-01-06 11:33:23 -0500 |
commit | 5da7ca86078964cbfe6c83efc1205904587706fe (patch) | |
tree | a64a7824e90b42d6fdd71e6cb652362beb8983a1 | |
parent | 96df9333c94d7d5aeceb21f6c5e7ae8ff34753cf (diff) |
[PATCH] Add NUMA policy support for huge pages.
The huge_zonelist() function in the memory policy layer provides an list of
zones ordered by NUMA distance. The hugetlb layer will walk that list looking
for a zone that has available huge pages but is also in the nodeset of the
current cpuset.
This patch does not contain the folding of find_or_alloc_huge_page() that was
controversial in the earlier discussion.
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@muc.de>
Acked-by: William Lee Irwin III <wli@holomorphy.com>
Cc: Adam Litke <agl@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | include/linux/hugetlb.h | 4 | ||||
-rw-r--r-- | include/linux/mempolicy.h | 8 | ||||
-rw-r--r-- | mm/hugetlb.c | 24 | ||||
-rw-r--r-- | mm/mempolicy.c | 39 |
4 files changed, 54 insertions, 21 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 1056717ee501..68d82ad6b17c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -22,7 +22,7 @@ int hugetlb_report_meminfo(char *); | |||
22 | int hugetlb_report_node_meminfo(int, char *); | 22 | int hugetlb_report_node_meminfo(int, char *); |
23 | int is_hugepage_mem_enough(size_t); | 23 | int is_hugepage_mem_enough(size_t); |
24 | unsigned long hugetlb_total_pages(void); | 24 | unsigned long hugetlb_total_pages(void); |
25 | struct page *alloc_huge_page(void); | 25 | struct page *alloc_huge_page(struct vm_area_struct *, unsigned long); |
26 | void free_huge_page(struct page *); | 26 | void free_huge_page(struct page *); |
27 | int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 27 | int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
28 | unsigned long address, int write_access); | 28 | unsigned long address, int write_access); |
@@ -97,7 +97,7 @@ static inline unsigned long hugetlb_total_pages(void) | |||
97 | #define is_hugepage_only_range(mm, addr, len) 0 | 97 | #define is_hugepage_only_range(mm, addr, len) 0 |
98 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ | 98 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ |
99 | do { } while (0) | 99 | do { } while (0) |
100 | #define alloc_huge_page() ({ NULL; }) | 100 | #define alloc_huge_page(vma, addr) ({ NULL; }) |
101 | #define free_huge_page(p) ({ (void)(p); BUG(); }) | 101 | #define free_huge_page(p) ({ (void)(p); BUG(); }) |
102 | #define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) | 102 | #define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) |
103 | 103 | ||
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 8b67cf837ca9..817db6427113 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -156,6 +156,8 @@ extern void numa_default_policy(void); | |||
156 | extern void numa_policy_init(void); | 156 | extern void numa_policy_init(void); |
157 | extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new); | 157 | extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new); |
158 | extern struct mempolicy default_policy; | 158 | extern struct mempolicy default_policy; |
159 | extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, | ||
160 | unsigned long addr); | ||
159 | 161 | ||
160 | #else | 162 | #else |
161 | 163 | ||
@@ -232,6 +234,12 @@ static inline void numa_policy_rebind(const nodemask_t *old, | |||
232 | { | 234 | { |
233 | } | 235 | } |
234 | 236 | ||
237 | static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, | ||
238 | unsigned long addr) | ||
239 | { | ||
240 | return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER); | ||
241 | } | ||
242 | |||
235 | #endif /* CONFIG_NUMA */ | 243 | #endif /* CONFIG_NUMA */ |
236 | #endif /* __KERNEL__ */ | 244 | #endif /* __KERNEL__ */ |
237 | 245 | ||
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e93bd63462f0..eb405565949d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -11,6 +11,8 @@ | |||
11 | #include <linux/highmem.h> | 11 | #include <linux/highmem.h> |
12 | #include <linux/nodemask.h> | 12 | #include <linux/nodemask.h> |
13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
14 | #include <linux/mempolicy.h> | ||
15 | |||
14 | #include <asm/page.h> | 16 | #include <asm/page.h> |
15 | #include <asm/pgtable.h> | 17 | #include <asm/pgtable.h> |
16 | 18 | ||
@@ -36,11 +38,12 @@ static void enqueue_huge_page(struct page *page) | |||
36 | free_huge_pages_node[nid]++; | 38 | free_huge_pages_node[nid]++; |
37 | } | 39 | } |
38 | 40 | ||
39 | static struct page *dequeue_huge_page(void) | 41 | static struct page *dequeue_huge_page(struct vm_area_struct *vma, |
42 | unsigned long address) | ||
40 | { | 43 | { |
41 | int nid = numa_node_id(); | 44 | int nid = numa_node_id(); |
42 | struct page *page = NULL; | 45 | struct page *page = NULL; |
43 | struct zonelist *zonelist = NODE_DATA(nid)->node_zonelists; | 46 | struct zonelist *zonelist = huge_zonelist(vma, address); |
44 | struct zone **z; | 47 | struct zone **z; |
45 | 48 | ||
46 | for (z = zonelist->zones; *z; z++) { | 49 | for (z = zonelist->zones; *z; z++) { |
@@ -87,13 +90,13 @@ void free_huge_page(struct page *page) | |||
87 | spin_unlock(&hugetlb_lock); | 90 | spin_unlock(&hugetlb_lock); |
88 | } | 91 | } |
89 | 92 | ||
90 | struct page *alloc_huge_page(void) | 93 | struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr) |
91 | { | 94 | { |
92 | struct page *page; | 95 | struct page *page; |
93 | int i; | 96 | int i; |
94 | 97 | ||
95 | spin_lock(&hugetlb_lock); | 98 | spin_lock(&hugetlb_lock); |
96 | page = dequeue_huge_page(); | 99 | page = dequeue_huge_page(vma, addr); |
97 | if (!page) { | 100 | if (!page) { |
98 | spin_unlock(&hugetlb_lock); | 101 | spin_unlock(&hugetlb_lock); |
99 | return NULL; | 102 | return NULL; |
@@ -196,7 +199,7 @@ static unsigned long set_max_huge_pages(unsigned long count) | |||
196 | spin_lock(&hugetlb_lock); | 199 | spin_lock(&hugetlb_lock); |
197 | try_to_free_low(count); | 200 | try_to_free_low(count); |
198 | while (count < nr_huge_pages) { | 201 | while (count < nr_huge_pages) { |
199 | struct page *page = dequeue_huge_page(); | 202 | struct page *page = dequeue_huge_page(NULL, 0); |
200 | if (!page) | 203 | if (!page) |
201 | break; | 204 | break; |
202 | update_and_free_page(page); | 205 | update_and_free_page(page); |
@@ -365,8 +368,9 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
365 | flush_tlb_range(vma, start, end); | 368 | flush_tlb_range(vma, start, end); |
366 | } | 369 | } |
367 | 370 | ||
368 | static struct page *find_or_alloc_huge_page(struct address_space *mapping, | 371 | static struct page *find_or_alloc_huge_page(struct vm_area_struct *vma, |
369 | unsigned long idx, int shared) | 372 | unsigned long addr, struct address_space *mapping, |
373 | unsigned long idx, int shared) | ||
370 | { | 374 | { |
371 | struct page *page; | 375 | struct page *page; |
372 | int err; | 376 | int err; |
@@ -378,7 +382,7 @@ retry: | |||
378 | 382 | ||
379 | if (hugetlb_get_quota(mapping)) | 383 | if (hugetlb_get_quota(mapping)) |
380 | goto out; | 384 | goto out; |
381 | page = alloc_huge_page(); | 385 | page = alloc_huge_page(vma, addr); |
382 | if (!page) { | 386 | if (!page) { |
383 | hugetlb_put_quota(mapping); | 387 | hugetlb_put_quota(mapping); |
384 | goto out; | 388 | goto out; |
@@ -418,7 +422,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
418 | } | 422 | } |
419 | 423 | ||
420 | page_cache_get(old_page); | 424 | page_cache_get(old_page); |
421 | new_page = alloc_huge_page(); | 425 | new_page = alloc_huge_page(vma, address); |
422 | 426 | ||
423 | if (!new_page) { | 427 | if (!new_page) { |
424 | page_cache_release(old_page); | 428 | page_cache_release(old_page); |
@@ -467,7 +471,7 @@ int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
467 | * Use page lock to guard against racing truncation | 471 | * Use page lock to guard against racing truncation |
468 | * before we get page_table_lock. | 472 | * before we get page_table_lock. |
469 | */ | 473 | */ |
470 | page = find_or_alloc_huge_page(mapping, idx, | 474 | page = find_or_alloc_huge_page(vma, address, mapping, idx, |
471 | vma->vm_flags & VM_SHARED); | 475 | vma->vm_flags & VM_SHARED); |
472 | if (!page) | 476 | if (!page) |
473 | goto out; | 477 | goto out; |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 72f402cc9c9a..45c51ac63443 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -785,6 +785,34 @@ static unsigned offset_il_node(struct mempolicy *pol, | |||
785 | return nid; | 785 | return nid; |
786 | } | 786 | } |
787 | 787 | ||
788 | /* Determine a node number for interleave */ | ||
789 | static inline unsigned interleave_nid(struct mempolicy *pol, | ||
790 | struct vm_area_struct *vma, unsigned long addr, int shift) | ||
791 | { | ||
792 | if (vma) { | ||
793 | unsigned long off; | ||
794 | |||
795 | off = vma->vm_pgoff; | ||
796 | off += (addr - vma->vm_start) >> shift; | ||
797 | return offset_il_node(pol, vma, off); | ||
798 | } else | ||
799 | return interleave_nodes(pol); | ||
800 | } | ||
801 | |||
802 | /* Return a zonelist suitable for a huge page allocation. */ | ||
803 | struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) | ||
804 | { | ||
805 | struct mempolicy *pol = get_vma_policy(current, vma, addr); | ||
806 | |||
807 | if (pol->policy == MPOL_INTERLEAVE) { | ||
808 | unsigned nid; | ||
809 | |||
810 | nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); | ||
811 | return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER); | ||
812 | } | ||
813 | return zonelist_policy(GFP_HIGHUSER, pol); | ||
814 | } | ||
815 | |||
788 | /* Allocate a page in interleaved policy. | 816 | /* Allocate a page in interleaved policy. |
789 | Own path because it needs to do special accounting. */ | 817 | Own path because it needs to do special accounting. */ |
790 | static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | 818 | static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, |
@@ -833,15 +861,8 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) | |||
833 | 861 | ||
834 | if (unlikely(pol->policy == MPOL_INTERLEAVE)) { | 862 | if (unlikely(pol->policy == MPOL_INTERLEAVE)) { |
835 | unsigned nid; | 863 | unsigned nid; |
836 | if (vma) { | 864 | |
837 | unsigned long off; | 865 | nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); |
838 | off = vma->vm_pgoff; | ||
839 | off += (addr - vma->vm_start) >> PAGE_SHIFT; | ||
840 | nid = offset_il_node(pol, vma, off); | ||
841 | } else { | ||
842 | /* fall back to process interleaving */ | ||
843 | nid = interleave_nodes(pol); | ||
844 | } | ||
845 | return alloc_page_interleave(gfp, 0, nid); | 866 | return alloc_page_interleave(gfp, 0, nid); |
846 | } | 867 | } |
847 | return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); | 868 | return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); |