diff options
-rw-r--r-- | include/linux/hugetlb.h | 3 | ||||
-rw-r--r-- | mm/hugetlb.c | 79 |
2 files changed, 57 insertions, 25 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index f479700df61b..0b73c536afd2 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -228,6 +228,8 @@ struct huge_bootmem_page { | |||
228 | struct hstate *hstate; | 228 | struct hstate *hstate; |
229 | }; | 229 | }; |
230 | 230 | ||
231 | struct page *alloc_huge_page_node(struct hstate *h, int nid); | ||
232 | |||
231 | /* arch callback */ | 233 | /* arch callback */ |
232 | int __init alloc_bootmem_huge_page(struct hstate *h); | 234 | int __init alloc_bootmem_huge_page(struct hstate *h); |
233 | 235 | ||
@@ -303,6 +305,7 @@ static inline struct hstate *page_hstate(struct page *page) | |||
303 | 305 | ||
304 | #else | 306 | #else |
305 | struct hstate {}; | 307 | struct hstate {}; |
308 | #define alloc_huge_page_node(h, nid) NULL | ||
306 | #define alloc_bootmem_huge_page(h) NULL | 309 | #define alloc_bootmem_huge_page(h) NULL |
307 | #define hstate_file(f) NULL | 310 | #define hstate_file(f) NULL |
308 | #define hstate_vma(v) NULL | 311 | #define hstate_vma(v) NULL |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index bd031a4c738e..83fa0c3b6e2b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -466,11 +466,23 @@ static void enqueue_huge_page(struct hstate *h, struct page *page) | |||
466 | h->free_huge_pages_node[nid]++; | 466 | h->free_huge_pages_node[nid]++; |
467 | } | 467 | } |
468 | 468 | ||
469 | static struct page *dequeue_huge_page_node(struct hstate *h, int nid) | ||
470 | { | ||
471 | struct page *page; | ||
472 | |||
473 | if (list_empty(&h->hugepage_freelists[nid])) | ||
474 | return NULL; | ||
475 | page = list_entry(h->hugepage_freelists[nid].next, struct page, lru); | ||
476 | list_del(&page->lru); | ||
477 | h->free_huge_pages--; | ||
478 | h->free_huge_pages_node[nid]--; | ||
479 | return page; | ||
480 | } | ||
481 | |||
469 | static struct page *dequeue_huge_page_vma(struct hstate *h, | 482 | static struct page *dequeue_huge_page_vma(struct hstate *h, |
470 | struct vm_area_struct *vma, | 483 | struct vm_area_struct *vma, |
471 | unsigned long address, int avoid_reserve) | 484 | unsigned long address, int avoid_reserve) |
472 | { | 485 | { |
473 | int nid; | ||
474 | struct page *page = NULL; | 486 | struct page *page = NULL; |
475 | struct mempolicy *mpol; | 487 | struct mempolicy *mpol; |
476 | nodemask_t *nodemask; | 488 | nodemask_t *nodemask; |
@@ -496,19 +508,13 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, | |||
496 | 508 | ||
497 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 509 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
498 | MAX_NR_ZONES - 1, nodemask) { | 510 | MAX_NR_ZONES - 1, nodemask) { |
499 | nid = zone_to_nid(zone); | 511 | if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) { |
500 | if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) && | 512 | page = dequeue_huge_page_node(h, zone_to_nid(zone)); |
501 | !list_empty(&h->hugepage_freelists[nid])) { | 513 | if (page) { |
502 | page = list_entry(h->hugepage_freelists[nid].next, | 514 | if (!avoid_reserve) |
503 | struct page, lru); | 515 | decrement_hugepage_resv_vma(h, vma); |
504 | list_del(&page->lru); | 516 | break; |
505 | h->free_huge_pages--; | 517 | } |
506 | h->free_huge_pages_node[nid]--; | ||
507 | |||
508 | if (!avoid_reserve) | ||
509 | decrement_hugepage_resv_vma(h, vma); | ||
510 | |||
511 | break; | ||
512 | } | 518 | } |
513 | } | 519 | } |
514 | err: | 520 | err: |
@@ -770,11 +776,10 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, | |||
770 | return ret; | 776 | return ret; |
771 | } | 777 | } |
772 | 778 | ||
773 | static struct page *alloc_buddy_huge_page(struct hstate *h, | 779 | static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) |
774 | struct vm_area_struct *vma, unsigned long address) | ||
775 | { | 780 | { |
776 | struct page *page; | 781 | struct page *page; |
777 | unsigned int nid; | 782 | unsigned int r_nid; |
778 | 783 | ||
779 | if (h->order >= MAX_ORDER) | 784 | if (h->order >= MAX_ORDER) |
780 | return NULL; | 785 | return NULL; |
@@ -812,9 +817,14 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, | |||
812 | } | 817 | } |
813 | spin_unlock(&hugetlb_lock); | 818 | spin_unlock(&hugetlb_lock); |
814 | 819 | ||
815 | page = alloc_pages(htlb_alloc_mask|__GFP_COMP| | 820 | if (nid == NUMA_NO_NODE) |
816 | __GFP_REPEAT|__GFP_NOWARN, | 821 | page = alloc_pages(htlb_alloc_mask|__GFP_COMP| |
817 | huge_page_order(h)); | 822 | __GFP_REPEAT|__GFP_NOWARN, |
823 | huge_page_order(h)); | ||
824 | else | ||
825 | page = alloc_pages_exact_node(nid, | ||
826 | htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| | ||
827 | __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); | ||
818 | 828 | ||
819 | if (page && arch_prepare_hugepage(page)) { | 829 | if (page && arch_prepare_hugepage(page)) { |
820 | __free_pages(page, huge_page_order(h)); | 830 | __free_pages(page, huge_page_order(h)); |
@@ -829,13 +839,13 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, | |||
829 | */ | 839 | */ |
830 | put_page_testzero(page); | 840 | put_page_testzero(page); |
831 | VM_BUG_ON(page_count(page)); | 841 | VM_BUG_ON(page_count(page)); |
832 | nid = page_to_nid(page); | 842 | r_nid = page_to_nid(page); |
833 | set_compound_page_dtor(page, free_huge_page); | 843 | set_compound_page_dtor(page, free_huge_page); |
834 | /* | 844 | /* |
835 | * We incremented the global counters already | 845 | * We incremented the global counters already |
836 | */ | 846 | */ |
837 | h->nr_huge_pages_node[nid]++; | 847 | h->nr_huge_pages_node[r_nid]++; |
838 | h->surplus_huge_pages_node[nid]++; | 848 | h->surplus_huge_pages_node[r_nid]++; |
839 | __count_vm_event(HTLB_BUDDY_PGALLOC); | 849 | __count_vm_event(HTLB_BUDDY_PGALLOC); |
840 | } else { | 850 | } else { |
841 | h->nr_huge_pages--; | 851 | h->nr_huge_pages--; |
@@ -848,6 +858,25 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, | |||
848 | } | 858 | } |
849 | 859 | ||
850 | /* | 860 | /* |
861 | * This allocation function is useful in the context where vma is irrelevant. | ||
862 | * E.g. soft-offlining uses this function because it only cares physical | ||
863 | * address of error page. | ||
864 | */ | ||
865 | struct page *alloc_huge_page_node(struct hstate *h, int nid) | ||
866 | { | ||
867 | struct page *page; | ||
868 | |||
869 | spin_lock(&hugetlb_lock); | ||
870 | page = dequeue_huge_page_node(h, nid); | ||
871 | spin_unlock(&hugetlb_lock); | ||
872 | |||
873 | if (!page) | ||
874 | page = alloc_buddy_huge_page(h, nid); | ||
875 | |||
876 | return page; | ||
877 | } | ||
878 | |||
879 | /* | ||
851 | * Increase the hugetlb pool such that it can accomodate a reservation | 880 | * Increase the hugetlb pool such that it can accomodate a reservation |
852 | * of size 'delta'. | 881 | * of size 'delta'. |
853 | */ | 882 | */ |
@@ -871,7 +900,7 @@ static int gather_surplus_pages(struct hstate *h, int delta) | |||
871 | retry: | 900 | retry: |
872 | spin_unlock(&hugetlb_lock); | 901 | spin_unlock(&hugetlb_lock); |
873 | for (i = 0; i < needed; i++) { | 902 | for (i = 0; i < needed; i++) { |
874 | page = alloc_buddy_huge_page(h, NULL, 0); | 903 | page = alloc_buddy_huge_page(h, NUMA_NO_NODE); |
875 | if (!page) { | 904 | if (!page) { |
876 | /* | 905 | /* |
877 | * We were not able to allocate enough pages to | 906 | * We were not able to allocate enough pages to |
@@ -1052,7 +1081,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, | |||
1052 | spin_unlock(&hugetlb_lock); | 1081 | spin_unlock(&hugetlb_lock); |
1053 | 1082 | ||
1054 | if (!page) { | 1083 | if (!page) { |
1055 | page = alloc_buddy_huge_page(h, vma, addr); | 1084 | page = alloc_buddy_huge_page(h, NUMA_NO_NODE); |
1056 | if (!page) { | 1085 | if (!page) { |
1057 | hugetlb_put_quota(inode->i_mapping, chg); | 1086 | hugetlb_put_quota(inode->i_mapping, chg); |
1058 | return ERR_PTR(-VM_FAULT_SIGBUS); | 1087 | return ERR_PTR(-VM_FAULT_SIGBUS); |