aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNaoya Horiguchi <n-horiguchi@ah.jp.nec.com>2010-09-07 21:19:33 -0400
committerAndi Kleen <ak@linux.intel.com>2010-10-08 03:32:44 -0400
commitbf50bab2b34483316162443587b8467952e07730 (patch)
treed142f7427c6b82e11ff9333bf507aa24b9525e3e
parent998b4382c1d75a6fd3b0e334dae3ab33bd074d99 (diff)
hugetlb: add allocate function for hugepage migration
We can't use existing hugepage allocation functions to allocate hugepage for page migration, because page migration can happen asynchronously with the running processes and page migration users should call the allocation function with physical addresses (not virtual addresses) as arguments. ChangeLog since v3: - unify alloc_buddy_huge_page() and alloc_buddy_huge_page_node() ChangeLog since v2: - remove unnecessary get/put_mems_allowed() (thanks to David Rientjes) ChangeLog since v1: - add comment on top of alloc_huge_page_no_vma() Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com> Reviewed-by: Christoph Lameter <cl@linux.com> Signed-off-by: Andi Kleen <ak@linux.intel.com>
-rw-r--r--include/linux/hugetlb.h3
-rw-r--r--mm/hugetlb.c79
2 files changed, 57 insertions, 25 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index f479700df61b..0b73c536afd2 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -228,6 +228,8 @@ struct huge_bootmem_page {
228 struct hstate *hstate; 228 struct hstate *hstate;
229}; 229};
230 230
231struct page *alloc_huge_page_node(struct hstate *h, int nid);
232
231/* arch callback */ 233/* arch callback */
232int __init alloc_bootmem_huge_page(struct hstate *h); 234int __init alloc_bootmem_huge_page(struct hstate *h);
233 235
@@ -303,6 +305,7 @@ static inline struct hstate *page_hstate(struct page *page)
303 305
304#else 306#else
305struct hstate {}; 307struct hstate {};
308#define alloc_huge_page_node(h, nid) NULL
306#define alloc_bootmem_huge_page(h) NULL 309#define alloc_bootmem_huge_page(h) NULL
307#define hstate_file(f) NULL 310#define hstate_file(f) NULL
308#define hstate_vma(v) NULL 311#define hstate_vma(v) NULL
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bd031a4c738e..83fa0c3b6e2b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -466,11 +466,23 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
466 h->free_huge_pages_node[nid]++; 466 h->free_huge_pages_node[nid]++;
467} 467}
468 468
469static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
470{
471 struct page *page;
472
473 if (list_empty(&h->hugepage_freelists[nid]))
474 return NULL;
475 page = list_entry(h->hugepage_freelists[nid].next, struct page, lru);
476 list_del(&page->lru);
477 h->free_huge_pages--;
478 h->free_huge_pages_node[nid]--;
479 return page;
480}
481
469static struct page *dequeue_huge_page_vma(struct hstate *h, 482static struct page *dequeue_huge_page_vma(struct hstate *h,
470 struct vm_area_struct *vma, 483 struct vm_area_struct *vma,
471 unsigned long address, int avoid_reserve) 484 unsigned long address, int avoid_reserve)
472{ 485{
473 int nid;
474 struct page *page = NULL; 486 struct page *page = NULL;
475 struct mempolicy *mpol; 487 struct mempolicy *mpol;
476 nodemask_t *nodemask; 488 nodemask_t *nodemask;
@@ -496,19 +508,13 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
496 508
497 for_each_zone_zonelist_nodemask(zone, z, zonelist, 509 for_each_zone_zonelist_nodemask(zone, z, zonelist,
498 MAX_NR_ZONES - 1, nodemask) { 510 MAX_NR_ZONES - 1, nodemask) {
499 nid = zone_to_nid(zone); 511 if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) {
500 if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) && 512 page = dequeue_huge_page_node(h, zone_to_nid(zone));
501 !list_empty(&h->hugepage_freelists[nid])) { 513 if (page) {
502 page = list_entry(h->hugepage_freelists[nid].next, 514 if (!avoid_reserve)
503 struct page, lru); 515 decrement_hugepage_resv_vma(h, vma);
504 list_del(&page->lru); 516 break;
505 h->free_huge_pages--; 517 }
506 h->free_huge_pages_node[nid]--;
507
508 if (!avoid_reserve)
509 decrement_hugepage_resv_vma(h, vma);
510
511 break;
512 } 518 }
513 } 519 }
514err: 520err:
@@ -770,11 +776,10 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
770 return ret; 776 return ret;
771} 777}
772 778
773static struct page *alloc_buddy_huge_page(struct hstate *h, 779static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
774 struct vm_area_struct *vma, unsigned long address)
775{ 780{
776 struct page *page; 781 struct page *page;
777 unsigned int nid; 782 unsigned int r_nid;
778 783
779 if (h->order >= MAX_ORDER) 784 if (h->order >= MAX_ORDER)
780 return NULL; 785 return NULL;
@@ -812,9 +817,14 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
812 } 817 }
813 spin_unlock(&hugetlb_lock); 818 spin_unlock(&hugetlb_lock);
814 819
815 page = alloc_pages(htlb_alloc_mask|__GFP_COMP| 820 if (nid == NUMA_NO_NODE)
816 __GFP_REPEAT|__GFP_NOWARN, 821 page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
817 huge_page_order(h)); 822 __GFP_REPEAT|__GFP_NOWARN,
823 huge_page_order(h));
824 else
825 page = alloc_pages_exact_node(nid,
826 htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
827 __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h));
818 828
819 if (page && arch_prepare_hugepage(page)) { 829 if (page && arch_prepare_hugepage(page)) {
820 __free_pages(page, huge_page_order(h)); 830 __free_pages(page, huge_page_order(h));
@@ -829,13 +839,13 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
829 */ 839 */
830 put_page_testzero(page); 840 put_page_testzero(page);
831 VM_BUG_ON(page_count(page)); 841 VM_BUG_ON(page_count(page));
832 nid = page_to_nid(page); 842 r_nid = page_to_nid(page);
833 set_compound_page_dtor(page, free_huge_page); 843 set_compound_page_dtor(page, free_huge_page);
834 /* 844 /*
835 * We incremented the global counters already 845 * We incremented the global counters already
836 */ 846 */
837 h->nr_huge_pages_node[nid]++; 847 h->nr_huge_pages_node[r_nid]++;
838 h->surplus_huge_pages_node[nid]++; 848 h->surplus_huge_pages_node[r_nid]++;
839 __count_vm_event(HTLB_BUDDY_PGALLOC); 849 __count_vm_event(HTLB_BUDDY_PGALLOC);
840 } else { 850 } else {
841 h->nr_huge_pages--; 851 h->nr_huge_pages--;
@@ -848,6 +858,25 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
848} 858}
849 859
850/* 860/*
861 * This allocation function is useful in the context where vma is irrelevant.
862 * E.g. soft-offlining uses this function because it only cares physical
863 * address of error page.
864 */
865struct page *alloc_huge_page_node(struct hstate *h, int nid)
866{
867 struct page *page;
868
869 spin_lock(&hugetlb_lock);
870 page = dequeue_huge_page_node(h, nid);
871 spin_unlock(&hugetlb_lock);
872
873 if (!page)
874 page = alloc_buddy_huge_page(h, nid);
875
876 return page;
877}
878
879/*
851 * Increase the hugetlb pool such that it can accomodate a reservation 880 * Increase the hugetlb pool such that it can accomodate a reservation
852 * of size 'delta'. 881 * of size 'delta'.
853 */ 882 */
@@ -871,7 +900,7 @@ static int gather_surplus_pages(struct hstate *h, int delta)
871retry: 900retry:
872 spin_unlock(&hugetlb_lock); 901 spin_unlock(&hugetlb_lock);
873 for (i = 0; i < needed; i++) { 902 for (i = 0; i < needed; i++) {
874 page = alloc_buddy_huge_page(h, NULL, 0); 903 page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
875 if (!page) { 904 if (!page) {
876 /* 905 /*
877 * We were not able to allocate enough pages to 906 * We were not able to allocate enough pages to
@@ -1052,7 +1081,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
1052 spin_unlock(&hugetlb_lock); 1081 spin_unlock(&hugetlb_lock);
1053 1082
1054 if (!page) { 1083 if (!page) {
1055 page = alloc_buddy_huge_page(h, vma, addr); 1084 page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
1056 if (!page) { 1085 if (!page) {
1057 hugetlb_put_quota(inode->i_mapping, chg); 1086 hugetlb_put_quota(inode->i_mapping, chg);
1058 return ERR_PTR(-VM_FAULT_SIGBUS); 1087 return ERR_PTR(-VM_FAULT_SIGBUS);