aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
authorLee Schermerhorn <lee.schermerhorn@hp.com>2009-09-21 20:01:22 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-22 10:17:26 -0400
commite8c5c8249878fb6564125680a1d15e06adbd5639 (patch)
tree66a69c11ea0f27e45c747e6f7236da80d9788cc1 /mm/hugetlb.c
parent55a4462af5722d2814858bc51ee8d58ca29544ab (diff)
hugetlb: balance freeing of huge pages across nodes
Free huges pages from nodes in round robin fashion in an attempt to keep [persistent a.k.a static] hugepages balanced across nodes New function free_pool_huge_page() is modeled on and performs roughly the inverse of alloc_fresh_huge_page(). Replaces dequeue_huge_page() which now has no callers, so this patch removes it. Helper function hstate_next_node_to_free() uses new hstate member next_to_free_nid to distribute "frees" across all nodes with huge pages. Acked-by: David Rientjes <rientjes@google.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Cc: Nishanth Aravamudan <nacc@us.ibm.com> Cc: Adam Litke <agl@us.ibm.com> Cc: Andy Whitcroft <apw@canonical.com> Cc: Eric Whitney <eric.whitney@hp.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c132
1 files changed, 86 insertions, 46 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index b16d63634777..38dab5586827 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -456,24 +456,6 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
456 h->free_huge_pages_node[nid]++; 456 h->free_huge_pages_node[nid]++;
457} 457}
458 458
459static struct page *dequeue_huge_page(struct hstate *h)
460{
461 int nid;
462 struct page *page = NULL;
463
464 for (nid = 0; nid < MAX_NUMNODES; ++nid) {
465 if (!list_empty(&h->hugepage_freelists[nid])) {
466 page = list_entry(h->hugepage_freelists[nid].next,
467 struct page, lru);
468 list_del(&page->lru);
469 h->free_huge_pages--;
470 h->free_huge_pages_node[nid]--;
471 break;
472 }
473 }
474 return page;
475}
476
477static struct page *dequeue_huge_page_vma(struct hstate *h, 459static struct page *dequeue_huge_page_vma(struct hstate *h,
478 struct vm_area_struct *vma, 460 struct vm_area_struct *vma,
479 unsigned long address, int avoid_reserve) 461 unsigned long address, int avoid_reserve)
@@ -641,7 +623,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
641 623
642/* 624/*
643 * Use a helper variable to find the next node and then 625 * Use a helper variable to find the next node and then
644 * copy it back to hugetlb_next_nid afterwards: 626 * copy it back to next_nid_to_alloc afterwards:
645 * otherwise there's a window in which a racer might 627 * otherwise there's a window in which a racer might
646 * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node. 628 * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node.
647 * But we don't need to use a spin_lock here: it really 629 * But we don't need to use a spin_lock here: it really
@@ -650,13 +632,13 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
650 * if we just successfully allocated a hugepage so that 632 * if we just successfully allocated a hugepage so that
651 * the next caller gets hugepages on the next node. 633 * the next caller gets hugepages on the next node.
652 */ 634 */
653static int hstate_next_node(struct hstate *h) 635static int hstate_next_node_to_alloc(struct hstate *h)
654{ 636{
655 int next_nid; 637 int next_nid;
656 next_nid = next_node(h->hugetlb_next_nid, node_online_map); 638 next_nid = next_node(h->next_nid_to_alloc, node_online_map);
657 if (next_nid == MAX_NUMNODES) 639 if (next_nid == MAX_NUMNODES)
658 next_nid = first_node(node_online_map); 640 next_nid = first_node(node_online_map);
659 h->hugetlb_next_nid = next_nid; 641 h->next_nid_to_alloc = next_nid;
660 return next_nid; 642 return next_nid;
661} 643}
662 644
@@ -667,14 +649,15 @@ static int alloc_fresh_huge_page(struct hstate *h)
667 int next_nid; 649 int next_nid;
668 int ret = 0; 650 int ret = 0;
669 651
670 start_nid = h->hugetlb_next_nid; 652 start_nid = h->next_nid_to_alloc;
653 next_nid = start_nid;
671 654
672 do { 655 do {
673 page = alloc_fresh_huge_page_node(h, h->hugetlb_next_nid); 656 page = alloc_fresh_huge_page_node(h, next_nid);
674 if (page) 657 if (page)
675 ret = 1; 658 ret = 1;
676 next_nid = hstate_next_node(h); 659 next_nid = hstate_next_node_to_alloc(h);
677 } while (!page && h->hugetlb_next_nid != start_nid); 660 } while (!page && next_nid != start_nid);
678 661
679 if (ret) 662 if (ret)
680 count_vm_event(HTLB_BUDDY_PGALLOC); 663 count_vm_event(HTLB_BUDDY_PGALLOC);
@@ -684,6 +667,52 @@ static int alloc_fresh_huge_page(struct hstate *h)
684 return ret; 667 return ret;
685} 668}
686 669
670/*
671 * helper for free_pool_huge_page() - find next node
672 * from which to free a huge page
673 */
674static int hstate_next_node_to_free(struct hstate *h)
675{
676 int next_nid;
677 next_nid = next_node(h->next_nid_to_free, node_online_map);
678 if (next_nid == MAX_NUMNODES)
679 next_nid = first_node(node_online_map);
680 h->next_nid_to_free = next_nid;
681 return next_nid;
682}
683
684/*
685 * Free huge page from pool from next node to free.
686 * Attempt to keep persistent huge pages more or less
687 * balanced over allowed nodes.
688 * Called with hugetlb_lock locked.
689 */
690static int free_pool_huge_page(struct hstate *h)
691{
692 int start_nid;
693 int next_nid;
694 int ret = 0;
695
696 start_nid = h->next_nid_to_free;
697 next_nid = start_nid;
698
699 do {
700 if (!list_empty(&h->hugepage_freelists[next_nid])) {
701 struct page *page =
702 list_entry(h->hugepage_freelists[next_nid].next,
703 struct page, lru);
704 list_del(&page->lru);
705 h->free_huge_pages--;
706 h->free_huge_pages_node[next_nid]--;
707 update_and_free_page(h, page);
708 ret = 1;
709 }
710 next_nid = hstate_next_node_to_free(h);
711 } while (!ret && next_nid != start_nid);
712
713 return ret;
714}
715
687static struct page *alloc_buddy_huge_page(struct hstate *h, 716static struct page *alloc_buddy_huge_page(struct hstate *h,
688 struct vm_area_struct *vma, unsigned long address) 717 struct vm_area_struct *vma, unsigned long address)
689{ 718{
@@ -1008,7 +1037,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
1008 void *addr; 1037 void *addr;
1009 1038
1010 addr = __alloc_bootmem_node_nopanic( 1039 addr = __alloc_bootmem_node_nopanic(
1011 NODE_DATA(h->hugetlb_next_nid), 1040 NODE_DATA(h->next_nid_to_alloc),
1012 huge_page_size(h), huge_page_size(h), 0); 1041 huge_page_size(h), huge_page_size(h), 0);
1013 1042
1014 if (addr) { 1043 if (addr) {
@@ -1020,7 +1049,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
1020 m = addr; 1049 m = addr;
1021 goto found; 1050 goto found;
1022 } 1051 }
1023 hstate_next_node(h); 1052 hstate_next_node_to_alloc(h);
1024 nr_nodes--; 1053 nr_nodes--;
1025 } 1054 }
1026 return 0; 1055 return 0;
@@ -1141,31 +1170,43 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
1141 */ 1170 */
1142static int adjust_pool_surplus(struct hstate *h, int delta) 1171static int adjust_pool_surplus(struct hstate *h, int delta)
1143{ 1172{
1144 static int prev_nid; 1173 int start_nid, next_nid;
1145 int nid = prev_nid;
1146 int ret = 0; 1174 int ret = 0;
1147 1175
1148 VM_BUG_ON(delta != -1 && delta != 1); 1176 VM_BUG_ON(delta != -1 && delta != 1);
1149 do {
1150 nid = next_node(nid, node_online_map);
1151 if (nid == MAX_NUMNODES)
1152 nid = first_node(node_online_map);
1153 1177
1154 /* To shrink on this node, there must be a surplus page */ 1178 if (delta < 0)
1155 if (delta < 0 && !h->surplus_huge_pages_node[nid]) 1179 start_nid = h->next_nid_to_alloc;
1156 continue; 1180 else
1157 /* Surplus cannot exceed the total number of pages */ 1181 start_nid = h->next_nid_to_free;
1158 if (delta > 0 && h->surplus_huge_pages_node[nid] >= 1182 next_nid = start_nid;
1183
1184 do {
1185 int nid = next_nid;
1186 if (delta < 0) {
1187 next_nid = hstate_next_node_to_alloc(h);
1188 /*
1189 * To shrink on this node, there must be a surplus page
1190 */
1191 if (!h->surplus_huge_pages_node[nid])
1192 continue;
1193 }
1194 if (delta > 0) {
1195 next_nid = hstate_next_node_to_free(h);
1196 /*
1197 * Surplus cannot exceed the total number of pages
1198 */
1199 if (h->surplus_huge_pages_node[nid] >=
1159 h->nr_huge_pages_node[nid]) 1200 h->nr_huge_pages_node[nid])
1160 continue; 1201 continue;
1202 }
1161 1203
1162 h->surplus_huge_pages += delta; 1204 h->surplus_huge_pages += delta;
1163 h->surplus_huge_pages_node[nid] += delta; 1205 h->surplus_huge_pages_node[nid] += delta;
1164 ret = 1; 1206 ret = 1;
1165 break; 1207 break;
1166 } while (nid != prev_nid); 1208 } while (next_nid != start_nid);
1167 1209
1168 prev_nid = nid;
1169 return ret; 1210 return ret;
1170} 1211}
1171 1212
@@ -1227,10 +1268,8 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
1227 min_count = max(count, min_count); 1268 min_count = max(count, min_count);
1228 try_to_free_low(h, min_count); 1269 try_to_free_low(h, min_count);
1229 while (min_count < persistent_huge_pages(h)) { 1270 while (min_count < persistent_huge_pages(h)) {
1230 struct page *page = dequeue_huge_page(h); 1271 if (!free_pool_huge_page(h))
1231 if (!page)
1232 break; 1272 break;
1233 update_and_free_page(h, page);
1234 } 1273 }
1235 while (count < persistent_huge_pages(h)) { 1274 while (count < persistent_huge_pages(h)) {
1236 if (!adjust_pool_surplus(h, 1)) 1275 if (!adjust_pool_surplus(h, 1))
@@ -1442,7 +1481,8 @@ void __init hugetlb_add_hstate(unsigned order)
1442 h->free_huge_pages = 0; 1481 h->free_huge_pages = 0;
1443 for (i = 0; i < MAX_NUMNODES; ++i) 1482 for (i = 0; i < MAX_NUMNODES; ++i)
1444 INIT_LIST_HEAD(&h->hugepage_freelists[i]); 1483 INIT_LIST_HEAD(&h->hugepage_freelists[i]);
1445 h->hugetlb_next_nid = first_node(node_online_map); 1484 h->next_nid_to_alloc = first_node(node_online_map);
1485 h->next_nid_to_free = first_node(node_online_map);
1446 snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", 1486 snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
1447 huge_page_size(h)/1024); 1487 huge_page_size(h)/1024);
1448 1488