diff options
author | Lee Schermerhorn <lee.schermerhorn@hp.com> | 2009-09-21 20:01:22 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-09-22 10:17:26 -0400 |
commit | e8c5c8249878fb6564125680a1d15e06adbd5639 (patch) | |
tree | 66a69c11ea0f27e45c747e6f7236da80d9788cc1 /mm/hugetlb.c | |
parent | 55a4462af5722d2814858bc51ee8d58ca29544ab (diff) |
hugetlb: balance freeing of huge pages across nodes
Free huges pages from nodes in round robin fashion in an attempt to keep
[persistent a.k.a static] hugepages balanced across nodes
New function free_pool_huge_page() is modeled on and performs roughly the
inverse of alloc_fresh_huge_page(). Replaces dequeue_huge_page() which
now has no callers, so this patch removes it.
Helper function hstate_next_node_to_free() uses new hstate member
next_to_free_nid to distribute "frees" across all nodes with huge pages.
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Cc: Nishanth Aravamudan <nacc@us.ibm.com>
Cc: Adam Litke <agl@us.ibm.com>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Eric Whitney <eric.whitney@hp.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 132 |
1 files changed, 86 insertions, 46 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b16d6363477..38dab558682 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -456,24 +456,6 @@ static void enqueue_huge_page(struct hstate *h, struct page *page) | |||
456 | h->free_huge_pages_node[nid]++; | 456 | h->free_huge_pages_node[nid]++; |
457 | } | 457 | } |
458 | 458 | ||
459 | static struct page *dequeue_huge_page(struct hstate *h) | ||
460 | { | ||
461 | int nid; | ||
462 | struct page *page = NULL; | ||
463 | |||
464 | for (nid = 0; nid < MAX_NUMNODES; ++nid) { | ||
465 | if (!list_empty(&h->hugepage_freelists[nid])) { | ||
466 | page = list_entry(h->hugepage_freelists[nid].next, | ||
467 | struct page, lru); | ||
468 | list_del(&page->lru); | ||
469 | h->free_huge_pages--; | ||
470 | h->free_huge_pages_node[nid]--; | ||
471 | break; | ||
472 | } | ||
473 | } | ||
474 | return page; | ||
475 | } | ||
476 | |||
477 | static struct page *dequeue_huge_page_vma(struct hstate *h, | 459 | static struct page *dequeue_huge_page_vma(struct hstate *h, |
478 | struct vm_area_struct *vma, | 460 | struct vm_area_struct *vma, |
479 | unsigned long address, int avoid_reserve) | 461 | unsigned long address, int avoid_reserve) |
@@ -641,7 +623,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) | |||
641 | 623 | ||
642 | /* | 624 | /* |
643 | * Use a helper variable to find the next node and then | 625 | * Use a helper variable to find the next node and then |
644 | * copy it back to hugetlb_next_nid afterwards: | 626 | * copy it back to next_nid_to_alloc afterwards: |
645 | * otherwise there's a window in which a racer might | 627 | * otherwise there's a window in which a racer might |
646 | * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node. | 628 | * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node. |
647 | * But we don't need to use a spin_lock here: it really | 629 | * But we don't need to use a spin_lock here: it really |
@@ -650,13 +632,13 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) | |||
650 | * if we just successfully allocated a hugepage so that | 632 | * if we just successfully allocated a hugepage so that |
651 | * the next caller gets hugepages on the next node. | 633 | * the next caller gets hugepages on the next node. |
652 | */ | 634 | */ |
653 | static int hstate_next_node(struct hstate *h) | 635 | static int hstate_next_node_to_alloc(struct hstate *h) |
654 | { | 636 | { |
655 | int next_nid; | 637 | int next_nid; |
656 | next_nid = next_node(h->hugetlb_next_nid, node_online_map); | 638 | next_nid = next_node(h->next_nid_to_alloc, node_online_map); |
657 | if (next_nid == MAX_NUMNODES) | 639 | if (next_nid == MAX_NUMNODES) |
658 | next_nid = first_node(node_online_map); | 640 | next_nid = first_node(node_online_map); |
659 | h->hugetlb_next_nid = next_nid; | 641 | h->next_nid_to_alloc = next_nid; |
660 | return next_nid; | 642 | return next_nid; |
661 | } | 643 | } |
662 | 644 | ||
@@ -667,14 +649,15 @@ static int alloc_fresh_huge_page(struct hstate *h) | |||
667 | int next_nid; | 649 | int next_nid; |
668 | int ret = 0; | 650 | int ret = 0; |
669 | 651 | ||
670 | start_nid = h->hugetlb_next_nid; | 652 | start_nid = h->next_nid_to_alloc; |
653 | next_nid = start_nid; | ||
671 | 654 | ||
672 | do { | 655 | do { |
673 | page = alloc_fresh_huge_page_node(h, h->hugetlb_next_nid); | 656 | page = alloc_fresh_huge_page_node(h, next_nid); |
674 | if (page) | 657 | if (page) |
675 | ret = 1; | 658 | ret = 1; |
676 | next_nid = hstate_next_node(h); | 659 | next_nid = hstate_next_node_to_alloc(h); |
677 | } while (!page && h->hugetlb_next_nid != start_nid); | 660 | } while (!page && next_nid != start_nid); |
678 | 661 | ||
679 | if (ret) | 662 | if (ret) |
680 | count_vm_event(HTLB_BUDDY_PGALLOC); | 663 | count_vm_event(HTLB_BUDDY_PGALLOC); |
@@ -684,6 +667,52 @@ static int alloc_fresh_huge_page(struct hstate *h) | |||
684 | return ret; | 667 | return ret; |
685 | } | 668 | } |
686 | 669 | ||
670 | /* | ||
671 | * helper for free_pool_huge_page() - find next node | ||
672 | * from which to free a huge page | ||
673 | */ | ||
674 | static int hstate_next_node_to_free(struct hstate *h) | ||
675 | { | ||
676 | int next_nid; | ||
677 | next_nid = next_node(h->next_nid_to_free, node_online_map); | ||
678 | if (next_nid == MAX_NUMNODES) | ||
679 | next_nid = first_node(node_online_map); | ||
680 | h->next_nid_to_free = next_nid; | ||
681 | return next_nid; | ||
682 | } | ||
683 | |||
684 | /* | ||
685 | * Free huge page from pool from next node to free. | ||
686 | * Attempt to keep persistent huge pages more or less | ||
687 | * balanced over allowed nodes. | ||
688 | * Called with hugetlb_lock locked. | ||
689 | */ | ||
690 | static int free_pool_huge_page(struct hstate *h) | ||
691 | { | ||
692 | int start_nid; | ||
693 | int next_nid; | ||
694 | int ret = 0; | ||
695 | |||
696 | start_nid = h->next_nid_to_free; | ||
697 | next_nid = start_nid; | ||
698 | |||
699 | do { | ||
700 | if (!list_empty(&h->hugepage_freelists[next_nid])) { | ||
701 | struct page *page = | ||
702 | list_entry(h->hugepage_freelists[next_nid].next, | ||
703 | struct page, lru); | ||
704 | list_del(&page->lru); | ||
705 | h->free_huge_pages--; | ||
706 | h->free_huge_pages_node[next_nid]--; | ||
707 | update_and_free_page(h, page); | ||
708 | ret = 1; | ||
709 | } | ||
710 | next_nid = hstate_next_node_to_free(h); | ||
711 | } while (!ret && next_nid != start_nid); | ||
712 | |||
713 | return ret; | ||
714 | } | ||
715 | |||
687 | static struct page *alloc_buddy_huge_page(struct hstate *h, | 716 | static struct page *alloc_buddy_huge_page(struct hstate *h, |
688 | struct vm_area_struct *vma, unsigned long address) | 717 | struct vm_area_struct *vma, unsigned long address) |
689 | { | 718 | { |
@@ -1008,7 +1037,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h) | |||
1008 | void *addr; | 1037 | void *addr; |
1009 | 1038 | ||
1010 | addr = __alloc_bootmem_node_nopanic( | 1039 | addr = __alloc_bootmem_node_nopanic( |
1011 | NODE_DATA(h->hugetlb_next_nid), | 1040 | NODE_DATA(h->next_nid_to_alloc), |
1012 | huge_page_size(h), huge_page_size(h), 0); | 1041 | huge_page_size(h), huge_page_size(h), 0); |
1013 | 1042 | ||
1014 | if (addr) { | 1043 | if (addr) { |
@@ -1020,7 +1049,7 @@ int __weak alloc_bootmem_huge_page(struct hstate *h) | |||
1020 | m = addr; | 1049 | m = addr; |
1021 | goto found; | 1050 | goto found; |
1022 | } | 1051 | } |
1023 | hstate_next_node(h); | 1052 | hstate_next_node_to_alloc(h); |
1024 | nr_nodes--; | 1053 | nr_nodes--; |
1025 | } | 1054 | } |
1026 | return 0; | 1055 | return 0; |
@@ -1141,31 +1170,43 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count) | |||
1141 | */ | 1170 | */ |
1142 | static int adjust_pool_surplus(struct hstate *h, int delta) | 1171 | static int adjust_pool_surplus(struct hstate *h, int delta) |
1143 | { | 1172 | { |
1144 | static int prev_nid; | 1173 | int start_nid, next_nid; |
1145 | int nid = prev_nid; | ||
1146 | int ret = 0; | 1174 | int ret = 0; |
1147 | 1175 | ||
1148 | VM_BUG_ON(delta != -1 && delta != 1); | 1176 | VM_BUG_ON(delta != -1 && delta != 1); |
1149 | do { | ||
1150 | nid = next_node(nid, node_online_map); | ||
1151 | if (nid == MAX_NUMNODES) | ||
1152 | nid = first_node(node_online_map); | ||
1153 | 1177 | ||
1154 | /* To shrink on this node, there must be a surplus page */ | 1178 | if (delta < 0) |
1155 | if (delta < 0 && !h->surplus_huge_pages_node[nid]) | 1179 | start_nid = h->next_nid_to_alloc; |
1156 | continue; | 1180 | else |
1157 | /* Surplus cannot exceed the total number of pages */ | 1181 | start_nid = h->next_nid_to_free; |
1158 | if (delta > 0 && h->surplus_huge_pages_node[nid] >= | 1182 | next_nid = start_nid; |
1183 | |||
1184 | do { | ||
1185 | int nid = next_nid; | ||
1186 | if (delta < 0) { | ||
1187 | next_nid = hstate_next_node_to_alloc(h); | ||
1188 | /* | ||
1189 | * To shrink on this node, there must be a surplus page | ||
1190 | */ | ||
1191 | if (!h->surplus_huge_pages_node[nid]) | ||
1192 | continue; | ||
1193 | } | ||
1194 | if (delta > 0) { | ||
1195 | next_nid = hstate_next_node_to_free(h); | ||
1196 | /* | ||
1197 | * Surplus cannot exceed the total number of pages | ||
1198 | */ | ||
1199 | if (h->surplus_huge_pages_node[nid] >= | ||
1159 | h->nr_huge_pages_node[nid]) | 1200 | h->nr_huge_pages_node[nid]) |
1160 | continue; | 1201 | continue; |
1202 | } | ||
1161 | 1203 | ||
1162 | h->surplus_huge_pages += delta; | 1204 | h->surplus_huge_pages += delta; |
1163 | h->surplus_huge_pages_node[nid] += delta; | 1205 | h->surplus_huge_pages_node[nid] += delta; |
1164 | ret = 1; | 1206 | ret = 1; |
1165 | break; | 1207 | break; |
1166 | } while (nid != prev_nid); | 1208 | } while (next_nid != start_nid); |
1167 | 1209 | ||
1168 | prev_nid = nid; | ||
1169 | return ret; | 1210 | return ret; |
1170 | } | 1211 | } |
1171 | 1212 | ||
@@ -1227,10 +1268,8 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count) | |||
1227 | min_count = max(count, min_count); | 1268 | min_count = max(count, min_count); |
1228 | try_to_free_low(h, min_count); | 1269 | try_to_free_low(h, min_count); |
1229 | while (min_count < persistent_huge_pages(h)) { | 1270 | while (min_count < persistent_huge_pages(h)) { |
1230 | struct page *page = dequeue_huge_page(h); | 1271 | if (!free_pool_huge_page(h)) |
1231 | if (!page) | ||
1232 | break; | 1272 | break; |
1233 | update_and_free_page(h, page); | ||
1234 | } | 1273 | } |
1235 | while (count < persistent_huge_pages(h)) { | 1274 | while (count < persistent_huge_pages(h)) { |
1236 | if (!adjust_pool_surplus(h, 1)) | 1275 | if (!adjust_pool_surplus(h, 1)) |
@@ -1442,7 +1481,8 @@ void __init hugetlb_add_hstate(unsigned order) | |||
1442 | h->free_huge_pages = 0; | 1481 | h->free_huge_pages = 0; |
1443 | for (i = 0; i < MAX_NUMNODES; ++i) | 1482 | for (i = 0; i < MAX_NUMNODES; ++i) |
1444 | INIT_LIST_HEAD(&h->hugepage_freelists[i]); | 1483 | INIT_LIST_HEAD(&h->hugepage_freelists[i]); |
1445 | h->hugetlb_next_nid = first_node(node_online_map); | 1484 | h->next_nid_to_alloc = first_node(node_online_map); |
1485 | h->next_nid_to_free = first_node(node_online_map); | ||
1446 | snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", | 1486 | snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", |
1447 | huge_page_size(h)/1024); | 1487 | huge_page_size(h)/1024); |
1448 | 1488 | ||