aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
authorLee Schermerhorn <lee.schermerhorn@hp.com>2009-12-14 20:58:21 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:12 -0500
commit06808b0827e1cd14eedc96bac2655d5b37ac246c (patch)
tree8f7b52a4af1532ed414631f68b99a059e299d83f /mm/hugetlb.c
parentc1e6c8d074ea3621106548654cc244d2edc12ead (diff)
hugetlb: derive huge pages nodes allowed from task mempolicy
This patch derives a "nodes_allowed" node mask from the numa mempolicy of the task modifying the number of persistent huge pages to control the allocation, freeing and adjusting of surplus huge pages when the pool page count is modified via the new sysctl or sysfs attribute "nr_hugepages_mempolicy". The nodes_allowed mask is derived as follows: * For "default" [NULL] task mempolicy, a NULL nodemask_t pointer is produced. This will cause the hugetlb subsystem to use node_online_map as the "nodes_allowed". This preserves the behavior before this patch. * For "preferred" mempolicy, including explicit local allocation, a nodemask with the single preferred node will be produced. "local" policy will NOT track any internode migrations of the task adjusting nr_hugepages. * For "bind" and "interleave" policy, the mempolicy's nodemask will be used. * Other than to inform the construction of the nodes_allowed node mask, the actual mempolicy mode is ignored. That is, all modes behave like interleave over the resulting nodes_allowed mask with no "fallback". See the updated documentation [next patch] for more information about the implications of this patch. Examples: Starting with: Node 0 HugePages_Total: 0 Node 1 HugePages_Total: 0 Node 2 HugePages_Total: 0 Node 3 HugePages_Total: 0 Default behavior [with or without this patch] balances persistent hugepage allocation across nodes [with sufficient contiguous memory]: sysctl vm.nr_hugepages[_mempolicy]=32 yields: Node 0 HugePages_Total: 8 Node 1 HugePages_Total: 8 Node 2 HugePages_Total: 8 Node 3 HugePages_Total: 8 Of course, we only have nr_hugepages_mempolicy with the patch, but with default mempolicy, nr_hugepages_mempolicy behaves the same as nr_hugepages. Applying mempolicy--e.g., with numactl [using '-m' a.k.a. '--membind' because it allows multiple nodes to be specified and it's easy to type]--we can allocate huge pages on individual nodes or sets of nodes. So, starting from the condition above, with 8 huge pages per node, add 8 more to node 2 using: numactl -m 2 sysctl vm.nr_hugepages_mempolicy=40 This yields: Node 0 HugePages_Total: 8 Node 1 HugePages_Total: 8 Node 2 HugePages_Total: 16 Node 3 HugePages_Total: 8 The incremental 8 huge pages were restricted to node 2 by the specified mempolicy. Similarly, we can use mempolicy to free persistent huge pages from specified nodes: numactl -m 0,1 sysctl vm.nr_hugepages_mempolicy=32 yields: Node 0 HugePages_Total: 4 Node 1 HugePages_Total: 4 Node 2 HugePages_Total: 16 Node 3 HugePages_Total: 8 The 8 huge pages freed were balanced over nodes 0 and 1. [rientjes@google.com: accomodate reworked NODEMASK_ALLOC] Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Andi Kleen <andi@firstfloor.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Randy Dunlap <randy.dunlap@oracle.com> Cc: Nishanth Aravamudan <nacc@us.ibm.com> Cc: Adam Litke <agl@us.ibm.com> Cc: Andy Whitcroft <apw@canonical.com> Cc: Eric Whitney <eric.whitney@hp.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c97
1 files changed, 84 insertions, 13 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 324d1abae876..1125d818ea06 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1330,29 +1330,71 @@ static struct hstate *kobj_to_hstate(struct kobject *kobj)
1330 return NULL; 1330 return NULL;
1331} 1331}
1332 1332
1333static ssize_t nr_hugepages_show(struct kobject *kobj, 1333static ssize_t nr_hugepages_show_common(struct kobject *kobj,
1334 struct kobj_attribute *attr, char *buf) 1334 struct kobj_attribute *attr, char *buf)
1335{ 1335{
1336 struct hstate *h = kobj_to_hstate(kobj); 1336 struct hstate *h = kobj_to_hstate(kobj);
1337 return sprintf(buf, "%lu\n", h->nr_huge_pages); 1337 return sprintf(buf, "%lu\n", h->nr_huge_pages);
1338} 1338}
1339static ssize_t nr_hugepages_store(struct kobject *kobj, 1339static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
1340 struct kobj_attribute *attr, const char *buf, size_t count) 1340 struct kobject *kobj, struct kobj_attribute *attr,
1341 const char *buf, size_t len)
1341{ 1342{
1342 int err; 1343 int err;
1343 unsigned long input; 1344 unsigned long count;
1344 struct hstate *h = kobj_to_hstate(kobj); 1345 struct hstate *h = kobj_to_hstate(kobj);
1346 NODEMASK_ALLOC(nodemask_t, nodes_allowed);
1345 1347
1346 err = strict_strtoul(buf, 10, &input); 1348 err = strict_strtoul(buf, 10, &count);
1347 if (err) 1349 if (err)
1348 return 0; 1350 return 0;
1349 1351
1350 h->max_huge_pages = set_max_huge_pages(h, input, &node_online_map); 1352 if (!(obey_mempolicy && init_nodemask_of_mempolicy(nodes_allowed))) {
1353 NODEMASK_FREE(nodes_allowed);
1354 nodes_allowed = &node_online_map;
1355 }
1356 h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
1351 1357
1352 return count; 1358 if (nodes_allowed != &node_online_map)
1359 NODEMASK_FREE(nodes_allowed);
1360
1361 return len;
1362}
1363
1364static ssize_t nr_hugepages_show(struct kobject *kobj,
1365 struct kobj_attribute *attr, char *buf)
1366{
1367 return nr_hugepages_show_common(kobj, attr, buf);
1368}
1369
1370static ssize_t nr_hugepages_store(struct kobject *kobj,
1371 struct kobj_attribute *attr, const char *buf, size_t len)
1372{
1373 return nr_hugepages_store_common(false, kobj, attr, buf, len);
1353} 1374}
1354HSTATE_ATTR(nr_hugepages); 1375HSTATE_ATTR(nr_hugepages);
1355 1376
1377#ifdef CONFIG_NUMA
1378
1379/*
1380 * hstate attribute for optionally mempolicy-based constraint on persistent
1381 * huge page alloc/free.
1382 */
1383static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj,
1384 struct kobj_attribute *attr, char *buf)
1385{
1386 return nr_hugepages_show_common(kobj, attr, buf);
1387}
1388
1389static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj,
1390 struct kobj_attribute *attr, const char *buf, size_t len)
1391{
1392 return nr_hugepages_store_common(true, kobj, attr, buf, len);
1393}
1394HSTATE_ATTR(nr_hugepages_mempolicy);
1395#endif
1396
1397
1356static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj, 1398static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
1357 struct kobj_attribute *attr, char *buf) 1399 struct kobj_attribute *attr, char *buf)
1358{ 1400{
@@ -1408,6 +1450,9 @@ static struct attribute *hstate_attrs[] = {
1408 &free_hugepages_attr.attr, 1450 &free_hugepages_attr.attr,
1409 &resv_hugepages_attr.attr, 1451 &resv_hugepages_attr.attr,
1410 &surplus_hugepages_attr.attr, 1452 &surplus_hugepages_attr.attr,
1453#ifdef CONFIG_NUMA
1454 &nr_hugepages_mempolicy_attr.attr,
1455#endif
1411 NULL, 1456 NULL,
1412}; 1457};
1413 1458
@@ -1574,9 +1619,9 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
1574} 1619}
1575 1620
1576#ifdef CONFIG_SYSCTL 1621#ifdef CONFIG_SYSCTL
1577int hugetlb_sysctl_handler(struct ctl_table *table, int write, 1622static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
1578 void __user *buffer, 1623 struct ctl_table *table, int write,
1579 size_t *length, loff_t *ppos) 1624 void __user *buffer, size_t *length, loff_t *ppos)
1580{ 1625{
1581 struct hstate *h = &default_hstate; 1626 struct hstate *h = &default_hstate;
1582 unsigned long tmp; 1627 unsigned long tmp;
@@ -1588,13 +1633,39 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
1588 table->maxlen = sizeof(unsigned long); 1633 table->maxlen = sizeof(unsigned long);
1589 proc_doulongvec_minmax(table, write, buffer, length, ppos); 1634 proc_doulongvec_minmax(table, write, buffer, length, ppos);
1590 1635
1591 if (write) 1636 if (write) {
1592 h->max_huge_pages = set_max_huge_pages(h, tmp, 1637 NODEMASK_ALLOC(nodemask_t, nodes_allowed);
1593 &node_online_map); 1638 if (!(obey_mempolicy &&
1639 init_nodemask_of_mempolicy(nodes_allowed))) {
1640 NODEMASK_FREE(nodes_allowed);
1641 nodes_allowed = &node_states[N_HIGH_MEMORY];
1642 }
1643 h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed);
1644
1645 if (nodes_allowed != &node_states[N_HIGH_MEMORY])
1646 NODEMASK_FREE(nodes_allowed);
1647 }
1594 1648
1595 return 0; 1649 return 0;
1596} 1650}
1597 1651
1652int hugetlb_sysctl_handler(struct ctl_table *table, int write,
1653 void __user *buffer, size_t *length, loff_t *ppos)
1654{
1655
1656 return hugetlb_sysctl_handler_common(false, table, write,
1657 buffer, length, ppos);
1658}
1659
1660#ifdef CONFIG_NUMA
1661int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
1662 void __user *buffer, size_t *length, loff_t *ppos)
1663{
1664 return hugetlb_sysctl_handler_common(true, table, write,
1665 buffer, length, ppos);
1666}
1667#endif /* CONFIG_NUMA */
1668
1598int hugetlb_treat_movable_handler(struct ctl_table *table, int write, 1669int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
1599 void __user *buffer, 1670 void __user *buffer,
1600 size_t *length, loff_t *ppos) 1671 size_t *length, loff_t *ppos)