aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLee Schermerhorn <lee.schermerhorn@hp.com>2009-12-14 20:58:32 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:13 -0500
commit9b5e5d0fdc91b73bba8cf5e0fbe3521a953e4e4d (patch)
tree58b7ca4a13b71de2132ae669fc138eb85c5ab3c8
parent267b4c281b4a43c8f3d965c791d3a7fd62448733 (diff)
hugetlb: use only nodes with memory for huge pages
Register per node hstate sysfs attributes only for nodes with memory. Global replacement of 'all online nodes" with "all nodes with memory" in mm/hugetlb.c. Suggested by David Rientjes. A subsequent patch will handle adding/removing of per node hstate sysfs attributes when nodes transition to/from memoryless state via memory hotplug. NOTE: this patch has not been tested with memoryless nodes. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Reviewed-by: Andi Kleen <andi@firstfloor.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Randy Dunlap <randy.dunlap@oracle.com> Cc: Nishanth Aravamudan <nacc@us.ibm.com> Acked-by: David Rientjes <rientjes@google.com> Cc: Adam Litke <agl@us.ibm.com> Cc: Andy Whitcroft <apw@canonical.com> Cc: Eric Whitney <eric.whitney@hp.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/vm/hugetlbpage.txt12
-rw-r--r--mm/hugetlb.c35
2 files changed, 24 insertions, 23 deletions
diff --git a/Documentation/vm/hugetlbpage.txt b/Documentation/vm/hugetlbpage.txt
index 01c3108d2e31..6a8e4667ab38 100644
--- a/Documentation/vm/hugetlbpage.txt
+++ b/Documentation/vm/hugetlbpage.txt
@@ -90,11 +90,11 @@ huge page pool to 20, allocating or freeing huge pages, as required.
90On a NUMA platform, the kernel will attempt to distribute the huge page pool 90On a NUMA platform, the kernel will attempt to distribute the huge page pool
91over all the set of allowed nodes specified by the NUMA memory policy of the 91over all the set of allowed nodes specified by the NUMA memory policy of the
92task that modifies nr_hugepages. The default for the allowed nodes--when the 92task that modifies nr_hugepages. The default for the allowed nodes--when the
93task has default memory policy--is all on-line nodes. Allowed nodes with 93task has default memory policy--is all on-line nodes with memory. Allowed
94insufficient available, contiguous memory for a huge page will be silently 94nodes with insufficient available, contiguous memory for a huge page will be
95skipped when allocating persistent huge pages. See the discussion below of 95silently skipped when allocating persistent huge pages. See the discussion
96the interaction of task memory policy, cpusets and per node attributes with 96below of the interaction of task memory policy, cpusets and per node attributes
97the allocation and freeing of persistent huge pages. 97with the allocation and freeing of persistent huge pages.
98 98
99The success or failure of huge page allocation depends on the amount of 99The success or failure of huge page allocation depends on the amount of
100physically contiguous memory that is present in system at the time of the 100physically contiguous memory that is present in system at the time of the
@@ -226,7 +226,7 @@ resulting effect on persistent huge page allocation is as follows:
226 without first moving to a cpuset that contains all of the desired nodes. 226 without first moving to a cpuset that contains all of the desired nodes.
227 227
2285) Boot-time huge page allocation attempts to distribute the requested number 2285) Boot-time huge page allocation attempts to distribute the requested number
229 of huge pages over all on-lines nodes. 229 of huge pages over all on-lines nodes with memory.
230 230
231Per Node Hugepages Attributes 231Per Node Hugepages Attributes
232 232
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 544f7bcb615e..b4a263512cb7 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -942,14 +942,14 @@ static void return_unused_surplus_pages(struct hstate *h,
942 942
943 /* 943 /*
944 * We want to release as many surplus pages as possible, spread 944 * We want to release as many surplus pages as possible, spread
945 * evenly across all nodes. Iterate across all nodes until we 945 * evenly across all nodes with memory. Iterate across these nodes
946 * can no longer free unreserved surplus pages. This occurs when 946 * until we can no longer free unreserved surplus pages. This occurs
947 * the nodes with surplus pages have no free pages. 947 * when the nodes with surplus pages have no free pages.
948 * free_pool_huge_page() will balance the the frees across the 948 * free_pool_huge_page() will balance the the freed pages across the
949 * on-line nodes for us and will handle the hstate accounting. 949 * on-line nodes with memory and will handle the hstate accounting.
950 */ 950 */
951 while (nr_pages--) { 951 while (nr_pages--) {
952 if (!free_pool_huge_page(h, &node_online_map, 1)) 952 if (!free_pool_huge_page(h, &node_states[N_HIGH_MEMORY], 1))
953 break; 953 break;
954 } 954 }
955} 955}
@@ -1053,14 +1053,14 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
1053int __weak alloc_bootmem_huge_page(struct hstate *h) 1053int __weak alloc_bootmem_huge_page(struct hstate *h)
1054{ 1054{
1055 struct huge_bootmem_page *m; 1055 struct huge_bootmem_page *m;
1056 int nr_nodes = nodes_weight(node_online_map); 1056 int nr_nodes = nodes_weight(node_states[N_HIGH_MEMORY]);
1057 1057
1058 while (nr_nodes) { 1058 while (nr_nodes) {
1059 void *addr; 1059 void *addr;
1060 1060
1061 addr = __alloc_bootmem_node_nopanic( 1061 addr = __alloc_bootmem_node_nopanic(
1062 NODE_DATA(hstate_next_node_to_alloc(h, 1062 NODE_DATA(hstate_next_node_to_alloc(h,
1063 &node_online_map)), 1063 &node_states[N_HIGH_MEMORY])),
1064 huge_page_size(h), huge_page_size(h), 0); 1064 huge_page_size(h), huge_page_size(h), 0);
1065 1065
1066 if (addr) { 1066 if (addr) {
@@ -1115,7 +1115,8 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
1115 if (h->order >= MAX_ORDER) { 1115 if (h->order >= MAX_ORDER) {
1116 if (!alloc_bootmem_huge_page(h)) 1116 if (!alloc_bootmem_huge_page(h))
1117 break; 1117 break;
1118 } else if (!alloc_fresh_huge_page(h, &node_online_map)) 1118 } else if (!alloc_fresh_huge_page(h,
1119 &node_states[N_HIGH_MEMORY]))
1119 break; 1120 break;
1120 } 1121 }
1121 h->max_huge_pages = i; 1122 h->max_huge_pages = i;
@@ -1388,7 +1389,7 @@ static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
1388 1389
1389 h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed); 1390 h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
1390 1391
1391 if (nodes_allowed != &node_online_map) 1392 if (nodes_allowed != &node_states[N_HIGH_MEMORY])
1392 NODEMASK_FREE(nodes_allowed); 1393 NODEMASK_FREE(nodes_allowed);
1393 1394
1394 return len; 1395 return len;
@@ -1610,7 +1611,7 @@ void hugetlb_unregister_node(struct node *node)
1610 struct node_hstate *nhs = &node_hstates[node->sysdev.id]; 1611 struct node_hstate *nhs = &node_hstates[node->sysdev.id];
1611 1612
1612 if (!nhs->hugepages_kobj) 1613 if (!nhs->hugepages_kobj)
1613 return; 1614 return; /* no hstate attributes */
1614 1615
1615 for_each_hstate(h) 1616 for_each_hstate(h)
1616 if (nhs->hstate_kobjs[h - hstates]) { 1617 if (nhs->hstate_kobjs[h - hstates]) {
@@ -1675,15 +1676,15 @@ void hugetlb_register_node(struct node *node)
1675} 1676}
1676 1677
1677/* 1678/*
1678 * hugetlb init time: register hstate attributes for all registered 1679 * hugetlb init time: register hstate attributes for all registered node
1679 * node sysdevs. All on-line nodes should have registered their 1680 * sysdevs of nodes that have memory. All on-line nodes should have
1680 * associated sysdev by the time the hugetlb module initializes. 1681 * registered their associated sysdev by this time.
1681 */ 1682 */
1682static void hugetlb_register_all_nodes(void) 1683static void hugetlb_register_all_nodes(void)
1683{ 1684{
1684 int nid; 1685 int nid;
1685 1686
1686 for (nid = 0; nid < nr_node_ids; nid++) { 1687 for_each_node_state(nid, N_HIGH_MEMORY) {
1687 struct node *node = &node_devices[nid]; 1688 struct node *node = &node_devices[nid];
1688 if (node->sysdev.id == nid) 1689 if (node->sysdev.id == nid)
1689 hugetlb_register_node(node); 1690 hugetlb_register_node(node);
@@ -1777,8 +1778,8 @@ void __init hugetlb_add_hstate(unsigned order)
1777 h->free_huge_pages = 0; 1778 h->free_huge_pages = 0;
1778 for (i = 0; i < MAX_NUMNODES; ++i) 1779 for (i = 0; i < MAX_NUMNODES; ++i)
1779 INIT_LIST_HEAD(&h->hugepage_freelists[i]); 1780 INIT_LIST_HEAD(&h->hugepage_freelists[i]);
1780 h->next_nid_to_alloc = first_node(node_online_map); 1781 h->next_nid_to_alloc = first_node(node_states[N_HIGH_MEMORY]);
1781 h->next_nid_to_free = first_node(node_online_map); 1782 h->next_nid_to_free = first_node(node_states[N_HIGH_MEMORY]);
1782 snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", 1783 snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB",
1783 huge_page_size(h)/1024); 1784 huge_page_size(h)/1024);
1784 1785