aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Lameter <clameter@engr.sgi.com>2006-01-06 03:10:46 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-01-06 11:33:23 -0500
commit5da7ca86078964cbfe6c83efc1205904587706fe (patch)
treea64a7824e90b42d6fdd71e6cb652362beb8983a1
parent96df9333c94d7d5aeceb21f6c5e7ae8ff34753cf (diff)
[PATCH] Add NUMA policy support for huge pages.
The huge_zonelist() function in the memory policy layer provides an list of zones ordered by NUMA distance. The hugetlb layer will walk that list looking for a zone that has available huge pages but is also in the nodeset of the current cpuset. This patch does not contain the folding of find_or_alloc_huge_page() that was controversial in the earlier discussion. Signed-off-by: Christoph Lameter <clameter@sgi.com> Cc: Andi Kleen <ak@muc.de> Acked-by: William Lee Irwin III <wli@holomorphy.com> Cc: Adam Litke <agl@us.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--include/linux/hugetlb.h4
-rw-r--r--include/linux/mempolicy.h8
-rw-r--r--mm/hugetlb.c24
-rw-r--r--mm/mempolicy.c39
4 files changed, 54 insertions, 21 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 1056717ee501..68d82ad6b17c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -22,7 +22,7 @@ int hugetlb_report_meminfo(char *);
22int hugetlb_report_node_meminfo(int, char *); 22int hugetlb_report_node_meminfo(int, char *);
23int is_hugepage_mem_enough(size_t); 23int is_hugepage_mem_enough(size_t);
24unsigned long hugetlb_total_pages(void); 24unsigned long hugetlb_total_pages(void);
25struct page *alloc_huge_page(void); 25struct page *alloc_huge_page(struct vm_area_struct *, unsigned long);
26void free_huge_page(struct page *); 26void free_huge_page(struct page *);
27int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, 27int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
28 unsigned long address, int write_access); 28 unsigned long address, int write_access);
@@ -97,7 +97,7 @@ static inline unsigned long hugetlb_total_pages(void)
97#define is_hugepage_only_range(mm, addr, len) 0 97#define is_hugepage_only_range(mm, addr, len) 0
98#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ 98#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
99 do { } while (0) 99 do { } while (0)
100#define alloc_huge_page() ({ NULL; }) 100#define alloc_huge_page(vma, addr) ({ NULL; })
101#define free_huge_page(p) ({ (void)(p); BUG(); }) 101#define free_huge_page(p) ({ (void)(p); BUG(); })
102#define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) 102#define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; })
103 103
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 8b67cf837ca9..817db6427113 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -156,6 +156,8 @@ extern void numa_default_policy(void);
156extern void numa_policy_init(void); 156extern void numa_policy_init(void);
157extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new); 157extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new);
158extern struct mempolicy default_policy; 158extern struct mempolicy default_policy;
159extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
160 unsigned long addr);
159 161
160#else 162#else
161 163
@@ -232,6 +234,12 @@ static inline void numa_policy_rebind(const nodemask_t *old,
232{ 234{
233} 235}
234 236
237static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
238 unsigned long addr)
239{
240 return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER);
241}
242
235#endif /* CONFIG_NUMA */ 243#endif /* CONFIG_NUMA */
236#endif /* __KERNEL__ */ 244#endif /* __KERNEL__ */
237 245
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e93bd63462f0..eb405565949d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -11,6 +11,8 @@
11#include <linux/highmem.h> 11#include <linux/highmem.h>
12#include <linux/nodemask.h> 12#include <linux/nodemask.h>
13#include <linux/pagemap.h> 13#include <linux/pagemap.h>
14#include <linux/mempolicy.h>
15
14#include <asm/page.h> 16#include <asm/page.h>
15#include <asm/pgtable.h> 17#include <asm/pgtable.h>
16 18
@@ -36,11 +38,12 @@ static void enqueue_huge_page(struct page *page)
36 free_huge_pages_node[nid]++; 38 free_huge_pages_node[nid]++;
37} 39}
38 40
39static struct page *dequeue_huge_page(void) 41static struct page *dequeue_huge_page(struct vm_area_struct *vma,
42 unsigned long address)
40{ 43{
41 int nid = numa_node_id(); 44 int nid = numa_node_id();
42 struct page *page = NULL; 45 struct page *page = NULL;
43 struct zonelist *zonelist = NODE_DATA(nid)->node_zonelists; 46 struct zonelist *zonelist = huge_zonelist(vma, address);
44 struct zone **z; 47 struct zone **z;
45 48
46 for (z = zonelist->zones; *z; z++) { 49 for (z = zonelist->zones; *z; z++) {
@@ -87,13 +90,13 @@ void free_huge_page(struct page *page)
87 spin_unlock(&hugetlb_lock); 90 spin_unlock(&hugetlb_lock);
88} 91}
89 92
90struct page *alloc_huge_page(void) 93struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr)
91{ 94{
92 struct page *page; 95 struct page *page;
93 int i; 96 int i;
94 97
95 spin_lock(&hugetlb_lock); 98 spin_lock(&hugetlb_lock);
96 page = dequeue_huge_page(); 99 page = dequeue_huge_page(vma, addr);
97 if (!page) { 100 if (!page) {
98 spin_unlock(&hugetlb_lock); 101 spin_unlock(&hugetlb_lock);
99 return NULL; 102 return NULL;
@@ -196,7 +199,7 @@ static unsigned long set_max_huge_pages(unsigned long count)
196 spin_lock(&hugetlb_lock); 199 spin_lock(&hugetlb_lock);
197 try_to_free_low(count); 200 try_to_free_low(count);
198 while (count < nr_huge_pages) { 201 while (count < nr_huge_pages) {
199 struct page *page = dequeue_huge_page(); 202 struct page *page = dequeue_huge_page(NULL, 0);
200 if (!page) 203 if (!page)
201 break; 204 break;
202 update_and_free_page(page); 205 update_and_free_page(page);
@@ -365,8 +368,9 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
365 flush_tlb_range(vma, start, end); 368 flush_tlb_range(vma, start, end);
366} 369}
367 370
368static struct page *find_or_alloc_huge_page(struct address_space *mapping, 371static struct page *find_or_alloc_huge_page(struct vm_area_struct *vma,
369 unsigned long idx, int shared) 372 unsigned long addr, struct address_space *mapping,
373 unsigned long idx, int shared)
370{ 374{
371 struct page *page; 375 struct page *page;
372 int err; 376 int err;
@@ -378,7 +382,7 @@ retry:
378 382
379 if (hugetlb_get_quota(mapping)) 383 if (hugetlb_get_quota(mapping))
380 goto out; 384 goto out;
381 page = alloc_huge_page(); 385 page = alloc_huge_page(vma, addr);
382 if (!page) { 386 if (!page) {
383 hugetlb_put_quota(mapping); 387 hugetlb_put_quota(mapping);
384 goto out; 388 goto out;
@@ -418,7 +422,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
418 } 422 }
419 423
420 page_cache_get(old_page); 424 page_cache_get(old_page);
421 new_page = alloc_huge_page(); 425 new_page = alloc_huge_page(vma, address);
422 426
423 if (!new_page) { 427 if (!new_page) {
424 page_cache_release(old_page); 428 page_cache_release(old_page);
@@ -467,7 +471,7 @@ int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
467 * Use page lock to guard against racing truncation 471 * Use page lock to guard against racing truncation
468 * before we get page_table_lock. 472 * before we get page_table_lock.
469 */ 473 */
470 page = find_or_alloc_huge_page(mapping, idx, 474 page = find_or_alloc_huge_page(vma, address, mapping, idx,
471 vma->vm_flags & VM_SHARED); 475 vma->vm_flags & VM_SHARED);
472 if (!page) 476 if (!page)
473 goto out; 477 goto out;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 72f402cc9c9a..45c51ac63443 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -785,6 +785,34 @@ static unsigned offset_il_node(struct mempolicy *pol,
785 return nid; 785 return nid;
786} 786}
787 787
788/* Determine a node number for interleave */
789static inline unsigned interleave_nid(struct mempolicy *pol,
790 struct vm_area_struct *vma, unsigned long addr, int shift)
791{
792 if (vma) {
793 unsigned long off;
794
795 off = vma->vm_pgoff;
796 off += (addr - vma->vm_start) >> shift;
797 return offset_il_node(pol, vma, off);
798 } else
799 return interleave_nodes(pol);
800}
801
802/* Return a zonelist suitable for a huge page allocation. */
803struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr)
804{
805 struct mempolicy *pol = get_vma_policy(current, vma, addr);
806
807 if (pol->policy == MPOL_INTERLEAVE) {
808 unsigned nid;
809
810 nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
811 return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER);
812 }
813 return zonelist_policy(GFP_HIGHUSER, pol);
814}
815
788/* Allocate a page in interleaved policy. 816/* Allocate a page in interleaved policy.
789 Own path because it needs to do special accounting. */ 817 Own path because it needs to do special accounting. */
790static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, 818static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
@@ -833,15 +861,8 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
833 861
834 if (unlikely(pol->policy == MPOL_INTERLEAVE)) { 862 if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
835 unsigned nid; 863 unsigned nid;
836 if (vma) { 864
837 unsigned long off; 865 nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
838 off = vma->vm_pgoff;
839 off += (addr - vma->vm_start) >> PAGE_SHIFT;
840 nid = offset_il_node(pol, vma, off);
841 } else {
842 /* fall back to process interleaving */
843 nid = interleave_nodes(pol);
844 }
845 return alloc_page_interleave(gfp, 0, nid); 866 return alloc_page_interleave(gfp, 0, nid);
846 } 867 }
847 return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); 868 return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol));