aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
authorLee Schermerhorn <lee.schermerhorn@hp.com>2009-12-14 20:58:16 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:12 -0500
commit6ae11b278bca1cd41651bae49a8c69de2f6a6262 (patch)
tree8bf4203ce676cac4f5ce1d03b35adbca1571d295 /mm/hugetlb.c
parent9a76db099709388ae4126c4f441358b97c6ba20c (diff)
hugetlb: add nodemask arg to huge page alloc, free and surplus adjust functions
In preparation for constraining huge page allocation and freeing by the controlling task's numa mempolicy, add a "nodes_allowed" nodemask pointer to the allocate, free and surplus adjustment functions. For now, pass NULL to indicate default behavior--i.e., use node_online_map. A subsqeuent patch will derive a non-default mask from the controlling task's numa mempolicy. Note that this method of updating the global hstate nr_hugepages under the constraint of a nodemask simplifies keeping the global state consistent--especially the number of persistent and surplus pages relative to reservations and overcommit limits. There are undoubtedly other ways to do this, but this works for both interfaces: mempolicy and per node attributes. [rientjes@google.com: fix HIGHMEM compile error] Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Reviewed-by: Mel Gorman <mel@csn.ul.ie> Acked-by: David Rientjes <rientjes@google.com> Reviewed-by: Andi Kleen <andi@firstfloor.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Randy Dunlap <randy.dunlap@oracle.com> Cc: Nishanth Aravamudan <nacc@us.ibm.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Adam Litke <agl@us.ibm.com> Cc: Andy Whitcroft <apw@canonical.com> Cc: Eric Whitney <eric.whitney@hp.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c125
1 files changed, 72 insertions, 53 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bffcf774f60..324d1abae87 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -622,48 +622,56 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
622} 622}
623 623
624/* 624/*
625 * common helper function for hstate_next_node_to_{alloc|free}. 625 * common helper functions for hstate_next_node_to_{alloc|free}.
626 * return next node in node_online_map, wrapping at end. 626 * We may have allocated or freed a huge page based on a different
627 * nodes_allowed previously, so h->next_node_to_{alloc|free} might
628 * be outside of *nodes_allowed. Ensure that we use an allowed
629 * node for alloc or free.
627 */ 630 */
628static int next_node_allowed(int nid) 631static int next_node_allowed(int nid, nodemask_t *nodes_allowed)
629{ 632{
630 nid = next_node(nid, node_online_map); 633 nid = next_node(nid, *nodes_allowed);
631 if (nid == MAX_NUMNODES) 634 if (nid == MAX_NUMNODES)
632 nid = first_node(node_online_map); 635 nid = first_node(*nodes_allowed);
633 VM_BUG_ON(nid >= MAX_NUMNODES); 636 VM_BUG_ON(nid >= MAX_NUMNODES);
634 637
635 return nid; 638 return nid;
636} 639}
637 640
641static int get_valid_node_allowed(int nid, nodemask_t *nodes_allowed)
642{
643 if (!node_isset(nid, *nodes_allowed))
644 nid = next_node_allowed(nid, nodes_allowed);
645 return nid;
646}
647
638/* 648/*
639 * Use a helper variable to find the next node and then 649 * returns the previously saved node ["this node"] from which to
640 * copy it back to next_nid_to_alloc afterwards: 650 * allocate a persistent huge page for the pool and advance the
641 * otherwise there's a window in which a racer might 651 * next node from which to allocate, handling wrap at end of node
642 * pass invalid nid MAX_NUMNODES to alloc_pages_exact_node. 652 * mask.
643 * But we don't need to use a spin_lock here: it really
644 * doesn't matter if occasionally a racer chooses the
645 * same nid as we do. Move nid forward in the mask even
646 * if we just successfully allocated a hugepage so that
647 * the next caller gets hugepages on the next node.
648 */ 653 */
649static int hstate_next_node_to_alloc(struct hstate *h) 654static int hstate_next_node_to_alloc(struct hstate *h,
655 nodemask_t *nodes_allowed)
650{ 656{
651 int nid, next_nid; 657 int nid;
658
659 VM_BUG_ON(!nodes_allowed);
660
661 nid = get_valid_node_allowed(h->next_nid_to_alloc, nodes_allowed);
662 h->next_nid_to_alloc = next_node_allowed(nid, nodes_allowed);
652 663
653 nid = h->next_nid_to_alloc;
654 next_nid = next_node_allowed(nid);
655 h->next_nid_to_alloc = next_nid;
656 return nid; 664 return nid;
657} 665}
658 666
659static int alloc_fresh_huge_page(struct hstate *h) 667static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
660{ 668{
661 struct page *page; 669 struct page *page;
662 int start_nid; 670 int start_nid;
663 int next_nid; 671 int next_nid;
664 int ret = 0; 672 int ret = 0;
665 673
666 start_nid = hstate_next_node_to_alloc(h); 674 start_nid = hstate_next_node_to_alloc(h, nodes_allowed);
667 next_nid = start_nid; 675 next_nid = start_nid;
668 676
669 do { 677 do {
@@ -672,7 +680,7 @@ static int alloc_fresh_huge_page(struct hstate *h)
672 ret = 1; 680 ret = 1;
673 break; 681 break;
674 } 682 }
675 next_nid = hstate_next_node_to_alloc(h); 683 next_nid = hstate_next_node_to_alloc(h, nodes_allowed);
676 } while (next_nid != start_nid); 684 } while (next_nid != start_nid);
677 685
678 if (ret) 686 if (ret)
@@ -684,18 +692,20 @@ static int alloc_fresh_huge_page(struct hstate *h)
684} 692}
685 693
686/* 694/*
687 * helper for free_pool_huge_page() - return the next node 695 * helper for free_pool_huge_page() - return the previously saved
688 * from which to free a huge page. Advance the next node id 696 * node ["this node"] from which to free a huge page. Advance the
689 * whether or not we find a free huge page to free so that the 697 * next node id whether or not we find a free huge page to free so
690 * next attempt to free addresses the next node. 698 * that the next attempt to free addresses the next node.
691 */ 699 */
692static int hstate_next_node_to_free(struct hstate *h) 700static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
693{ 701{
694 int nid, next_nid; 702 int nid;
703
704 VM_BUG_ON(!nodes_allowed);
705
706 nid = get_valid_node_allowed(h->next_nid_to_free, nodes_allowed);
707 h->next_nid_to_free = next_node_allowed(nid, nodes_allowed);
695 708
696 nid = h->next_nid_to_free;
697 next_nid = next_node_allowed(nid);
698 h->next_nid_to_free = next_nid;
699 return nid; 709 return nid;
700} 710}
701 711
@@ -705,13 +715,14 @@ static int hstate_next_node_to_free(struct hstate *h)
705 * balanced over allowed nodes. 715 * balanced over allowed nodes.
706 * Called with hugetlb_lock locked. 716 * Called with hugetlb_lock locked.
707 */ 717 */
708static int free_pool_huge_page(struct hstate *h, bool acct_surplus) 718static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
719 bool acct_surplus)
709{ 720{
710 int start_nid; 721 int start_nid;
711 int next_nid; 722 int next_nid;
712 int ret = 0; 723 int ret = 0;
713 724
714 start_nid = hstate_next_node_to_free(h); 725 start_nid = hstate_next_node_to_free(h, nodes_allowed);
715 next_nid = start_nid; 726 next_nid = start_nid;
716 727
717 do { 728 do {
@@ -735,7 +746,7 @@ static int free_pool_huge_page(struct hstate *h, bool acct_surplus)
735 ret = 1; 746 ret = 1;
736 break; 747 break;
737 } 748 }
738 next_nid = hstate_next_node_to_free(h); 749 next_nid = hstate_next_node_to_free(h, nodes_allowed);
739 } while (next_nid != start_nid); 750 } while (next_nid != start_nid);
740 751
741 return ret; 752 return ret;
@@ -937,7 +948,7 @@ static void return_unused_surplus_pages(struct hstate *h,
937 * on-line nodes for us and will handle the hstate accounting. 948 * on-line nodes for us and will handle the hstate accounting.
938 */ 949 */
939 while (nr_pages--) { 950 while (nr_pages--) {
940 if (!free_pool_huge_page(h, 1)) 951 if (!free_pool_huge_page(h, &node_online_map, 1))
941 break; 952 break;
942 } 953 }
943} 954}
@@ -1047,7 +1058,8 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
1047 void *addr; 1058 void *addr;
1048 1059
1049 addr = __alloc_bootmem_node_nopanic( 1060 addr = __alloc_bootmem_node_nopanic(
1050 NODE_DATA(hstate_next_node_to_alloc(h)), 1061 NODE_DATA(hstate_next_node_to_alloc(h,
1062 &node_online_map)),
1051 huge_page_size(h), huge_page_size(h), 0); 1063 huge_page_size(h), huge_page_size(h), 0);
1052 1064
1053 if (addr) { 1065 if (addr) {
@@ -1102,7 +1114,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
1102 if (h->order >= MAX_ORDER) { 1114 if (h->order >= MAX_ORDER) {
1103 if (!alloc_bootmem_huge_page(h)) 1115 if (!alloc_bootmem_huge_page(h))
1104 break; 1116 break;
1105 } else if (!alloc_fresh_huge_page(h)) 1117 } else if (!alloc_fresh_huge_page(h, &node_online_map))
1106 break; 1118 break;
1107 } 1119 }
1108 h->max_huge_pages = i; 1120 h->max_huge_pages = i;
@@ -1144,14 +1156,15 @@ static void __init report_hugepages(void)
1144} 1156}
1145 1157
1146#ifdef CONFIG_HIGHMEM 1158#ifdef CONFIG_HIGHMEM
1147static void try_to_free_low(struct hstate *h, unsigned long count) 1159static void try_to_free_low(struct hstate *h, unsigned long count,
1160 nodemask_t *nodes_allowed)
1148{ 1161{
1149 int i; 1162 int i;
1150 1163
1151 if (h->order >= MAX_ORDER) 1164 if (h->order >= MAX_ORDER)
1152 return; 1165 return;
1153 1166
1154 for (i = 0; i < MAX_NUMNODES; ++i) { 1167 for_each_node_mask(i, *nodes_allowed) {
1155 struct page *page, *next; 1168 struct page *page, *next;
1156 struct list_head *freel = &h->hugepage_freelists[i]; 1169 struct list_head *freel = &h->hugepage_freelists[i];
1157 list_for_each_entry_safe(page, next, freel, lru) { 1170 list_for_each_entry_safe(page, next, freel, lru) {
@@ -1167,7 +1180,8 @@ static void try_to_free_low(struct hstate *h, unsigned long count)
1167 } 1180 }
1168} 1181}
1169#else 1182#else
1170static inline void try_to_free_low(struct hstate *h, unsigned long count) 1183static inline void try_to_free_low(struct hstate *h, unsigned long count,
1184 nodemask_t *nodes_allowed)
1171{ 1185{
1172} 1186}
1173#endif 1187#endif
@@ -1177,7 +1191,8 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count)
1177 * balanced by operating on them in a round-robin fashion. 1191 * balanced by operating on them in a round-robin fashion.
1178 * Returns 1 if an adjustment was made. 1192 * Returns 1 if an adjustment was made.
1179 */ 1193 */
1180static int adjust_pool_surplus(struct hstate *h, int delta) 1194static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed,
1195 int delta)
1181{ 1196{
1182 int start_nid, next_nid; 1197 int start_nid, next_nid;
1183 int ret = 0; 1198 int ret = 0;
@@ -1185,9 +1200,9 @@ static int adjust_pool_surplus(struct hstate *h, int delta)
1185 VM_BUG_ON(delta != -1 && delta != 1); 1200 VM_BUG_ON(delta != -1 && delta != 1);
1186 1201
1187 if (delta < 0) 1202 if (delta < 0)
1188 start_nid = hstate_next_node_to_alloc(h); 1203 start_nid = hstate_next_node_to_alloc(h, nodes_allowed);
1189 else 1204 else
1190 start_nid = hstate_next_node_to_free(h); 1205 start_nid = hstate_next_node_to_free(h, nodes_allowed);
1191 next_nid = start_nid; 1206 next_nid = start_nid;
1192 1207
1193 do { 1208 do {
@@ -1197,7 +1212,8 @@ static int adjust_pool_surplus(struct hstate *h, int delta)
1197 * To shrink on this node, there must be a surplus page 1212 * To shrink on this node, there must be a surplus page
1198 */ 1213 */
1199 if (!h->surplus_huge_pages_node[nid]) { 1214 if (!h->surplus_huge_pages_node[nid]) {
1200 next_nid = hstate_next_node_to_alloc(h); 1215 next_nid = hstate_next_node_to_alloc(h,
1216 nodes_allowed);
1201 continue; 1217 continue;
1202 } 1218 }
1203 } 1219 }
@@ -1207,7 +1223,8 @@ static int adjust_pool_surplus(struct hstate *h, int delta)
1207 */ 1223 */
1208 if (h->surplus_huge_pages_node[nid] >= 1224 if (h->surplus_huge_pages_node[nid] >=
1209 h->nr_huge_pages_node[nid]) { 1225 h->nr_huge_pages_node[nid]) {
1210 next_nid = hstate_next_node_to_free(h); 1226 next_nid = hstate_next_node_to_free(h,
1227 nodes_allowed);
1211 continue; 1228 continue;
1212 } 1229 }
1213 } 1230 }
@@ -1222,7 +1239,8 @@ static int adjust_pool_surplus(struct hstate *h, int delta)
1222} 1239}
1223 1240
1224#define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages) 1241#define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
1225static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count) 1242static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count,
1243 nodemask_t *nodes_allowed)
1226{ 1244{
1227 unsigned long min_count, ret; 1245 unsigned long min_count, ret;
1228 1246
@@ -1242,7 +1260,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
1242 */ 1260 */
1243 spin_lock(&hugetlb_lock); 1261 spin_lock(&hugetlb_lock);
1244 while (h->surplus_huge_pages && count > persistent_huge_pages(h)) { 1262 while (h->surplus_huge_pages && count > persistent_huge_pages(h)) {
1245 if (!adjust_pool_surplus(h, -1)) 1263 if (!adjust_pool_surplus(h, nodes_allowed, -1))
1246 break; 1264 break;
1247 } 1265 }
1248 1266
@@ -1253,7 +1271,7 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
1253 * and reducing the surplus. 1271 * and reducing the surplus.
1254 */ 1272 */
1255 spin_unlock(&hugetlb_lock); 1273 spin_unlock(&hugetlb_lock);
1256 ret = alloc_fresh_huge_page(h); 1274 ret = alloc_fresh_huge_page(h, nodes_allowed);
1257 spin_lock(&hugetlb_lock); 1275 spin_lock(&hugetlb_lock);
1258 if (!ret) 1276 if (!ret)
1259 goto out; 1277 goto out;
@@ -1277,13 +1295,13 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count)
1277 */ 1295 */
1278 min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages; 1296 min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages;
1279 min_count = max(count, min_count); 1297 min_count = max(count, min_count);
1280 try_to_free_low(h, min_count); 1298 try_to_free_low(h, min_count, nodes_allowed);
1281 while (min_count < persistent_huge_pages(h)) { 1299 while (min_count < persistent_huge_pages(h)) {
1282 if (!free_pool_huge_page(h, 0)) 1300 if (!free_pool_huge_page(h, nodes_allowed, 0))
1283 break; 1301 break;
1284 } 1302 }
1285 while (count < persistent_huge_pages(h)) { 1303 while (count < persistent_huge_pages(h)) {
1286 if (!adjust_pool_surplus(h, 1)) 1304 if (!adjust_pool_surplus(h, nodes_allowed, 1))
1287 break; 1305 break;
1288 } 1306 }
1289out: 1307out:
@@ -1329,7 +1347,7 @@ static ssize_t nr_hugepages_store(struct kobject *kobj,
1329 if (err) 1347 if (err)
1330 return 0; 1348 return 0;
1331 1349
1332 h->max_huge_pages = set_max_huge_pages(h, input); 1350 h->max_huge_pages = set_max_huge_pages(h, input, &node_online_map);
1333 1351
1334 return count; 1352 return count;
1335} 1353}
@@ -1571,7 +1589,8 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
1571 proc_doulongvec_minmax(table, write, buffer, length, ppos); 1589 proc_doulongvec_minmax(table, write, buffer, length, ppos);
1572 1590
1573 if (write) 1591 if (write)
1574 h->max_huge_pages = set_max_huge_pages(h, tmp); 1592 h->max_huge_pages = set_max_huge_pages(h, tmp,
1593 &node_online_map);
1575 1594
1576 return 0; 1595 return 0;
1577} 1596}