diff options
| -rw-r--r-- | include/linux/hugetlb.h | 4 | ||||
| -rw-r--r-- | include/linux/mempolicy.h | 8 | ||||
| -rw-r--r-- | mm/hugetlb.c | 24 | ||||
| -rw-r--r-- | mm/mempolicy.c | 39 |
4 files changed, 54 insertions, 21 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 1056717ee501..68d82ad6b17c 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
| @@ -22,7 +22,7 @@ int hugetlb_report_meminfo(char *); | |||
| 22 | int hugetlb_report_node_meminfo(int, char *); | 22 | int hugetlb_report_node_meminfo(int, char *); |
| 23 | int is_hugepage_mem_enough(size_t); | 23 | int is_hugepage_mem_enough(size_t); |
| 24 | unsigned long hugetlb_total_pages(void); | 24 | unsigned long hugetlb_total_pages(void); |
| 25 | struct page *alloc_huge_page(void); | 25 | struct page *alloc_huge_page(struct vm_area_struct *, unsigned long); |
| 26 | void free_huge_page(struct page *); | 26 | void free_huge_page(struct page *); |
| 27 | int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 27 | int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
| 28 | unsigned long address, int write_access); | 28 | unsigned long address, int write_access); |
| @@ -97,7 +97,7 @@ static inline unsigned long hugetlb_total_pages(void) | |||
| 97 | #define is_hugepage_only_range(mm, addr, len) 0 | 97 | #define is_hugepage_only_range(mm, addr, len) 0 |
| 98 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ | 98 | #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ |
| 99 | do { } while (0) | 99 | do { } while (0) |
| 100 | #define alloc_huge_page() ({ NULL; }) | 100 | #define alloc_huge_page(vma, addr) ({ NULL; }) |
| 101 | #define free_huge_page(p) ({ (void)(p); BUG(); }) | 101 | #define free_huge_page(p) ({ (void)(p); BUG(); }) |
| 102 | #define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) | 102 | #define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; }) |
| 103 | 103 | ||
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 8b67cf837ca9..817db6427113 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
| @@ -156,6 +156,8 @@ extern void numa_default_policy(void); | |||
| 156 | extern void numa_policy_init(void); | 156 | extern void numa_policy_init(void); |
| 157 | extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new); | 157 | extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new); |
| 158 | extern struct mempolicy default_policy; | 158 | extern struct mempolicy default_policy; |
| 159 | extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, | ||
| 160 | unsigned long addr); | ||
| 159 | 161 | ||
| 160 | #else | 162 | #else |
| 161 | 163 | ||
| @@ -232,6 +234,12 @@ static inline void numa_policy_rebind(const nodemask_t *old, | |||
| 232 | { | 234 | { |
| 233 | } | 235 | } |
| 234 | 236 | ||
| 237 | static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, | ||
| 238 | unsigned long addr) | ||
| 239 | { | ||
| 240 | return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER); | ||
| 241 | } | ||
| 242 | |||
| 235 | #endif /* CONFIG_NUMA */ | 243 | #endif /* CONFIG_NUMA */ |
| 236 | #endif /* __KERNEL__ */ | 244 | #endif /* __KERNEL__ */ |
| 237 | 245 | ||
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e93bd63462f0..eb405565949d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
| @@ -11,6 +11,8 @@ | |||
| 11 | #include <linux/highmem.h> | 11 | #include <linux/highmem.h> |
| 12 | #include <linux/nodemask.h> | 12 | #include <linux/nodemask.h> |
| 13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
| 14 | #include <linux/mempolicy.h> | ||
| 15 | |||
| 14 | #include <asm/page.h> | 16 | #include <asm/page.h> |
| 15 | #include <asm/pgtable.h> | 17 | #include <asm/pgtable.h> |
| 16 | 18 | ||
| @@ -36,11 +38,12 @@ static void enqueue_huge_page(struct page *page) | |||
| 36 | free_huge_pages_node[nid]++; | 38 | free_huge_pages_node[nid]++; |
| 37 | } | 39 | } |
| 38 | 40 | ||
| 39 | static struct page *dequeue_huge_page(void) | 41 | static struct page *dequeue_huge_page(struct vm_area_struct *vma, |
| 42 | unsigned long address) | ||
| 40 | { | 43 | { |
| 41 | int nid = numa_node_id(); | 44 | int nid = numa_node_id(); |
| 42 | struct page *page = NULL; | 45 | struct page *page = NULL; |
| 43 | struct zonelist *zonelist = NODE_DATA(nid)->node_zonelists; | 46 | struct zonelist *zonelist = huge_zonelist(vma, address); |
| 44 | struct zone **z; | 47 | struct zone **z; |
| 45 | 48 | ||
| 46 | for (z = zonelist->zones; *z; z++) { | 49 | for (z = zonelist->zones; *z; z++) { |
| @@ -87,13 +90,13 @@ void free_huge_page(struct page *page) | |||
| 87 | spin_unlock(&hugetlb_lock); | 90 | spin_unlock(&hugetlb_lock); |
| 88 | } | 91 | } |
| 89 | 92 | ||
| 90 | struct page *alloc_huge_page(void) | 93 | struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr) |
| 91 | { | 94 | { |
| 92 | struct page *page; | 95 | struct page *page; |
| 93 | int i; | 96 | int i; |
| 94 | 97 | ||
| 95 | spin_lock(&hugetlb_lock); | 98 | spin_lock(&hugetlb_lock); |
| 96 | page = dequeue_huge_page(); | 99 | page = dequeue_huge_page(vma, addr); |
| 97 | if (!page) { | 100 | if (!page) { |
| 98 | spin_unlock(&hugetlb_lock); | 101 | spin_unlock(&hugetlb_lock); |
| 99 | return NULL; | 102 | return NULL; |
| @@ -196,7 +199,7 @@ static unsigned long set_max_huge_pages(unsigned long count) | |||
| 196 | spin_lock(&hugetlb_lock); | 199 | spin_lock(&hugetlb_lock); |
| 197 | try_to_free_low(count); | 200 | try_to_free_low(count); |
| 198 | while (count < nr_huge_pages) { | 201 | while (count < nr_huge_pages) { |
| 199 | struct page *page = dequeue_huge_page(); | 202 | struct page *page = dequeue_huge_page(NULL, 0); |
| 200 | if (!page) | 203 | if (!page) |
| 201 | break; | 204 | break; |
| 202 | update_and_free_page(page); | 205 | update_and_free_page(page); |
| @@ -365,8 +368,9 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
| 365 | flush_tlb_range(vma, start, end); | 368 | flush_tlb_range(vma, start, end); |
| 366 | } | 369 | } |
| 367 | 370 | ||
| 368 | static struct page *find_or_alloc_huge_page(struct address_space *mapping, | 371 | static struct page *find_or_alloc_huge_page(struct vm_area_struct *vma, |
| 369 | unsigned long idx, int shared) | 372 | unsigned long addr, struct address_space *mapping, |
| 373 | unsigned long idx, int shared) | ||
| 370 | { | 374 | { |
| 371 | struct page *page; | 375 | struct page *page; |
| 372 | int err; | 376 | int err; |
| @@ -378,7 +382,7 @@ retry: | |||
| 378 | 382 | ||
| 379 | if (hugetlb_get_quota(mapping)) | 383 | if (hugetlb_get_quota(mapping)) |
| 380 | goto out; | 384 | goto out; |
| 381 | page = alloc_huge_page(); | 385 | page = alloc_huge_page(vma, addr); |
| 382 | if (!page) { | 386 | if (!page) { |
| 383 | hugetlb_put_quota(mapping); | 387 | hugetlb_put_quota(mapping); |
| 384 | goto out; | 388 | goto out; |
| @@ -418,7 +422,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 418 | } | 422 | } |
| 419 | 423 | ||
| 420 | page_cache_get(old_page); | 424 | page_cache_get(old_page); |
| 421 | new_page = alloc_huge_page(); | 425 | new_page = alloc_huge_page(vma, address); |
| 422 | 426 | ||
| 423 | if (!new_page) { | 427 | if (!new_page) { |
| 424 | page_cache_release(old_page); | 428 | page_cache_release(old_page); |
| @@ -467,7 +471,7 @@ int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 467 | * Use page lock to guard against racing truncation | 471 | * Use page lock to guard against racing truncation |
| 468 | * before we get page_table_lock. | 472 | * before we get page_table_lock. |
| 469 | */ | 473 | */ |
| 470 | page = find_or_alloc_huge_page(mapping, idx, | 474 | page = find_or_alloc_huge_page(vma, address, mapping, idx, |
| 471 | vma->vm_flags & VM_SHARED); | 475 | vma->vm_flags & VM_SHARED); |
| 472 | if (!page) | 476 | if (!page) |
| 473 | goto out; | 477 | goto out; |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 72f402cc9c9a..45c51ac63443 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
| @@ -785,6 +785,34 @@ static unsigned offset_il_node(struct mempolicy *pol, | |||
| 785 | return nid; | 785 | return nid; |
| 786 | } | 786 | } |
| 787 | 787 | ||
| 788 | /* Determine a node number for interleave */ | ||
| 789 | static inline unsigned interleave_nid(struct mempolicy *pol, | ||
| 790 | struct vm_area_struct *vma, unsigned long addr, int shift) | ||
| 791 | { | ||
| 792 | if (vma) { | ||
| 793 | unsigned long off; | ||
| 794 | |||
| 795 | off = vma->vm_pgoff; | ||
| 796 | off += (addr - vma->vm_start) >> shift; | ||
| 797 | return offset_il_node(pol, vma, off); | ||
| 798 | } else | ||
| 799 | return interleave_nodes(pol); | ||
| 800 | } | ||
| 801 | |||
| 802 | /* Return a zonelist suitable for a huge page allocation. */ | ||
| 803 | struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr) | ||
| 804 | { | ||
| 805 | struct mempolicy *pol = get_vma_policy(current, vma, addr); | ||
| 806 | |||
| 807 | if (pol->policy == MPOL_INTERLEAVE) { | ||
| 808 | unsigned nid; | ||
| 809 | |||
| 810 | nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT); | ||
| 811 | return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER); | ||
| 812 | } | ||
| 813 | return zonelist_policy(GFP_HIGHUSER, pol); | ||
| 814 | } | ||
| 815 | |||
| 788 | /* Allocate a page in interleaved policy. | 816 | /* Allocate a page in interleaved policy. |
| 789 | Own path because it needs to do special accounting. */ | 817 | Own path because it needs to do special accounting. */ |
| 790 | static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, | 818 | static struct page *alloc_page_interleave(gfp_t gfp, unsigned order, |
| @@ -833,15 +861,8 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) | |||
| 833 | 861 | ||
| 834 | if (unlikely(pol->policy == MPOL_INTERLEAVE)) { | 862 | if (unlikely(pol->policy == MPOL_INTERLEAVE)) { |
| 835 | unsigned nid; | 863 | unsigned nid; |
| 836 | if (vma) { | 864 | |
| 837 | unsigned long off; | 865 | nid = interleave_nid(pol, vma, addr, PAGE_SHIFT); |
| 838 | off = vma->vm_pgoff; | ||
| 839 | off += (addr - vma->vm_start) >> PAGE_SHIFT; | ||
| 840 | nid = offset_il_node(pol, vma, off); | ||
| 841 | } else { | ||
| 842 | /* fall back to process interleaving */ | ||
| 843 | nid = interleave_nodes(pol); | ||
| 844 | } | ||
| 845 | return alloc_page_interleave(gfp, 0, nid); | 866 | return alloc_page_interleave(gfp, 0, nid); |
| 846 | } | 867 | } |
| 847 | return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); | 868 | return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol)); |
