aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
authorLee Schermerhorn <lee.schermerhorn@hp.com>2009-12-14 20:58:25 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:12 -0500
commit9a30523066cde73c1442b76224bb540de9f9b0b0 (patch)
tree57165f74e16def0b74bb6e6e05ec9a6b7bd42403 /mm/hugetlb.c
parent4e25b2576efda24c02e2d6b9bcb5965a3f865f33 (diff)
hugetlb: add per node hstate attributes
Add the per huge page size control/query attributes to the per node sysdevs: /sys/devices/system/node/node<ID>/hugepages/hugepages-<size>/ nr_hugepages - r/w free_huge_pages - r/o surplus_huge_pages - r/o The patch attempts to re-use/share as much of the existing global hstate attribute initialization and handling, and the "nodes_allowed" constraint processing as possible. Calling set_max_huge_pages() with no node indicates a change to global hstate parameters. In this case, any non-default task mempolicy will be used to generate the nodes_allowed mask. A valid node id indicates an update to that node's hstate parameters, and the count argument specifies the target count for the specified node. From this info, we compute the target global count for the hstate and construct a nodes_allowed node mask contain only the specified node. Setting the node specific nr_hugepages via the per node attribute effectively ignores any task mempolicy or cpuset constraints. With this patch: (me):ls /sys/devices/system/node/node0/hugepages/hugepages-2048kB ./ ../ free_hugepages nr_hugepages surplus_hugepages Starting from: Node 0 HugePages_Total: 0 Node 0 HugePages_Free: 0 Node 0 HugePages_Surp: 0 Node 1 HugePages_Total: 0 Node 1 HugePages_Free: 0 Node 1 HugePages_Surp: 0 Node 2 HugePages_Total: 0 Node 2 HugePages_Free: 0 Node 2 HugePages_Surp: 0 Node 3 HugePages_Total: 0 Node 3 HugePages_Free: 0 Node 3 HugePages_Surp: 0 vm.nr_hugepages = 0 Allocate 16 persistent huge pages on node 2: (me):echo 16 >/sys/devices/system/node/node2/hugepages/hugepages-2048kB/nr_hugepages [Note that this is equivalent to: numactl -m 2 hugeadmin --pool-pages-min 2M:+16 ] Yields: Node 0 HugePages_Total: 0 Node 0 HugePages_Free: 0 Node 0 HugePages_Surp: 0 Node 1 HugePages_Total: 0 Node 1 HugePages_Free: 0 Node 1 HugePages_Surp: 0 Node 2 HugePages_Total: 16 Node 2 HugePages_Free: 16 Node 2 HugePages_Surp: 0 Node 3 HugePages_Total: 0 Node 3 HugePages_Free: 0 Node 3 HugePages_Surp: 0 vm.nr_hugepages = 16 Global controls work as expected--reduce pool to 8 persistent huge pages: (me):echo 8 >/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages Node 0 HugePages_Total: 0 Node 0 HugePages_Free: 0 Node 0 HugePages_Surp: 0 Node 1 HugePages_Total: 0 Node 1 HugePages_Free: 0 Node 1 HugePages_Surp: 0 Node 2 HugePages_Total: 8 Node 2 HugePages_Free: 8 Node 2 HugePages_Surp: 0 Node 3 HugePages_Total: 0 Node 3 HugePages_Free: 0 Node 3 HugePages_Surp: 0 Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Andi Kleen <andi@firstfloor.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Randy Dunlap <randy.dunlap@oracle.com> Cc: Nishanth Aravamudan <nacc@us.ibm.com> Cc: David Rientjes <rientjes@google.com> Cc: Adam Litke <agl@us.ibm.com> Cc: Andy Whitcroft <apw@canonical.com> Cc: Eric Whitney <eric.whitney@hp.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c274
1 files changed, 248 insertions, 26 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1125d818ea0..544f7bcb615 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -24,6 +24,7 @@
24#include <asm/io.h> 24#include <asm/io.h>
25 25
26#include <linux/hugetlb.h> 26#include <linux/hugetlb.h>
27#include <linux/node.h>
27#include "internal.h" 28#include "internal.h"
28 29
29const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; 30const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
@@ -1320,39 +1321,71 @@ out:
1320static struct kobject *hugepages_kobj; 1321static struct kobject *hugepages_kobj;
1321static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE]; 1322static struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
1322 1323
1323static struct hstate *kobj_to_hstate(struct kobject *kobj) 1324static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp);
1325
1326static struct hstate *kobj_to_hstate(struct kobject *kobj, int *nidp)
1324{ 1327{
1325 int i; 1328 int i;
1329
1326 for (i = 0; i < HUGE_MAX_HSTATE; i++) 1330 for (i = 0; i < HUGE_MAX_HSTATE; i++)
1327 if (hstate_kobjs[i] == kobj) 1331 if (hstate_kobjs[i] == kobj) {
1332 if (nidp)
1333 *nidp = NUMA_NO_NODE;
1328 return &hstates[i]; 1334 return &hstates[i];
1329 BUG(); 1335 }
1330 return NULL; 1336
1337 return kobj_to_node_hstate(kobj, nidp);
1331} 1338}
1332 1339
1333static ssize_t nr_hugepages_show_common(struct kobject *kobj, 1340static ssize_t nr_hugepages_show_common(struct kobject *kobj,
1334 struct kobj_attribute *attr, char *buf) 1341 struct kobj_attribute *attr, char *buf)
1335{ 1342{
1336 struct hstate *h = kobj_to_hstate(kobj); 1343 struct hstate *h;
1337 return sprintf(buf, "%lu\n", h->nr_huge_pages); 1344 unsigned long nr_huge_pages;
1345 int nid;
1346
1347 h = kobj_to_hstate(kobj, &nid);
1348 if (nid == NUMA_NO_NODE)
1349 nr_huge_pages = h->nr_huge_pages;
1350 else
1351 nr_huge_pages = h->nr_huge_pages_node[nid];
1352
1353 return sprintf(buf, "%lu\n", nr_huge_pages);
1338} 1354}
1339static ssize_t nr_hugepages_store_common(bool obey_mempolicy, 1355static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
1340 struct kobject *kobj, struct kobj_attribute *attr, 1356 struct kobject *kobj, struct kobj_attribute *attr,
1341 const char *buf, size_t len) 1357 const char *buf, size_t len)
1342{ 1358{
1343 int err; 1359 int err;
1360 int nid;
1344 unsigned long count; 1361 unsigned long count;
1345 struct hstate *h = kobj_to_hstate(kobj); 1362 struct hstate *h;
1346 NODEMASK_ALLOC(nodemask_t, nodes_allowed); 1363 NODEMASK_ALLOC(nodemask_t, nodes_allowed);
1347 1364
1348 err = strict_strtoul(buf, 10, &count); 1365 err = strict_strtoul(buf, 10, &count);
1349 if (err) 1366 if (err)
1350 return 0; 1367 return 0;
1351 1368
1352 if (!(obey_mempolicy && init_nodemask_of_mempolicy(nodes_allowed))) { 1369 h = kobj_to_hstate(kobj, &nid);
1353 NODEMASK_FREE(nodes_allowed); 1370 if (nid == NUMA_NO_NODE) {
1354 nodes_allowed = &node_online_map; 1371 /*
1355 } 1372 * global hstate attribute
1373 */
1374 if (!(obey_mempolicy &&
1375 init_nodemask_of_mempolicy(nodes_allowed))) {
1376 NODEMASK_FREE(nodes_allowed);
1377 nodes_allowed = &node_states[N_HIGH_MEMORY];
1378 }
1379 } else if (nodes_allowed) {
1380 /*
1381 * per node hstate attribute: adjust count to global,
1382 * but restrict alloc/free to the specified node.
1383 */
1384 count += h->nr_huge_pages - h->nr_huge_pages_node[nid];
1385 init_nodemask_of_node(nodes_allowed, nid);
1386 } else
1387 nodes_allowed = &node_states[N_HIGH_MEMORY];
1388
1356 h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed); 1389 h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
1357 1390
1358 if (nodes_allowed != &node_online_map) 1391 if (nodes_allowed != &node_online_map)
@@ -1398,7 +1431,7 @@ HSTATE_ATTR(nr_hugepages_mempolicy);
1398static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj, 1431static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
1399 struct kobj_attribute *attr, char *buf) 1432 struct kobj_attribute *attr, char *buf)
1400{ 1433{
1401 struct hstate *h = kobj_to_hstate(kobj); 1434 struct hstate *h = kobj_to_hstate(kobj, NULL);
1402 return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages); 1435 return sprintf(buf, "%lu\n", h->nr_overcommit_huge_pages);
1403} 1436}
1404static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj, 1437static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
@@ -1406,7 +1439,7 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
1406{ 1439{
1407 int err; 1440 int err;
1408 unsigned long input; 1441 unsigned long input;
1409 struct hstate *h = kobj_to_hstate(kobj); 1442 struct hstate *h = kobj_to_hstate(kobj, NULL);
1410 1443
1411 err = strict_strtoul(buf, 10, &input); 1444 err = strict_strtoul(buf, 10, &input);
1412 if (err) 1445 if (err)
@@ -1423,15 +1456,24 @@ HSTATE_ATTR(nr_overcommit_hugepages);
1423static ssize_t free_hugepages_show(struct kobject *kobj, 1456static ssize_t free_hugepages_show(struct kobject *kobj,
1424 struct kobj_attribute *attr, char *buf) 1457 struct kobj_attribute *attr, char *buf)
1425{ 1458{
1426 struct hstate *h = kobj_to_hstate(kobj); 1459 struct hstate *h;
1427 return sprintf(buf, "%lu\n", h->free_huge_pages); 1460 unsigned long free_huge_pages;
1461 int nid;
1462
1463 h = kobj_to_hstate(kobj, &nid);
1464 if (nid == NUMA_NO_NODE)
1465 free_huge_pages = h->free_huge_pages;
1466 else
1467 free_huge_pages = h->free_huge_pages_node[nid];
1468
1469 return sprintf(buf, "%lu\n", free_huge_pages);
1428} 1470}
1429HSTATE_ATTR_RO(free_hugepages); 1471HSTATE_ATTR_RO(free_hugepages);
1430 1472
1431static ssize_t resv_hugepages_show(struct kobject *kobj, 1473static ssize_t resv_hugepages_show(struct kobject *kobj,
1432 struct kobj_attribute *attr, char *buf) 1474 struct kobj_attribute *attr, char *buf)
1433{ 1475{
1434 struct hstate *h = kobj_to_hstate(kobj); 1476 struct hstate *h = kobj_to_hstate(kobj, NULL);
1435 return sprintf(buf, "%lu\n", h->resv_huge_pages); 1477 return sprintf(buf, "%lu\n", h->resv_huge_pages);
1436} 1478}
1437HSTATE_ATTR_RO(resv_hugepages); 1479HSTATE_ATTR_RO(resv_hugepages);
@@ -1439,8 +1481,17 @@ HSTATE_ATTR_RO(resv_hugepages);
1439static ssize_t surplus_hugepages_show(struct kobject *kobj, 1481static ssize_t surplus_hugepages_show(struct kobject *kobj,
1440 struct kobj_attribute *attr, char *buf) 1482 struct kobj_attribute *attr, char *buf)
1441{ 1483{
1442 struct hstate *h = kobj_to_hstate(kobj); 1484 struct hstate *h;
1443 return sprintf(buf, "%lu\n", h->surplus_huge_pages); 1485 unsigned long surplus_huge_pages;
1486 int nid;
1487
1488 h = kobj_to_hstate(kobj, &nid);
1489 if (nid == NUMA_NO_NODE)
1490 surplus_huge_pages = h->surplus_huge_pages;
1491 else
1492 surplus_huge_pages = h->surplus_huge_pages_node[nid];
1493
1494 return sprintf(buf, "%lu\n", surplus_huge_pages);
1444} 1495}
1445HSTATE_ATTR_RO(surplus_hugepages); 1496HSTATE_ATTR_RO(surplus_hugepages);
1446 1497
@@ -1460,19 +1511,21 @@ static struct attribute_group hstate_attr_group = {
1460 .attrs = hstate_attrs, 1511 .attrs = hstate_attrs,
1461}; 1512};
1462 1513
1463static int __init hugetlb_sysfs_add_hstate(struct hstate *h) 1514static int __init hugetlb_sysfs_add_hstate(struct hstate *h,
1515 struct kobject *parent,
1516 struct kobject **hstate_kobjs,
1517 struct attribute_group *hstate_attr_group)
1464{ 1518{
1465 int retval; 1519 int retval;
1520 int hi = h - hstates;
1466 1521
1467 hstate_kobjs[h - hstates] = kobject_create_and_add(h->name, 1522 hstate_kobjs[hi] = kobject_create_and_add(h->name, parent);
1468 hugepages_kobj); 1523 if (!hstate_kobjs[hi])
1469 if (!hstate_kobjs[h - hstates])
1470 return -ENOMEM; 1524 return -ENOMEM;
1471 1525
1472 retval = sysfs_create_group(hstate_kobjs[h - hstates], 1526 retval = sysfs_create_group(hstate_kobjs[hi], hstate_attr_group);
1473 &hstate_attr_group);
1474 if (retval) 1527 if (retval)
1475 kobject_put(hstate_kobjs[h - hstates]); 1528 kobject_put(hstate_kobjs[hi]);
1476 1529
1477 return retval; 1530 return retval;
1478} 1531}
@@ -1487,17 +1540,184 @@ static void __init hugetlb_sysfs_init(void)
1487 return; 1540 return;
1488 1541
1489 for_each_hstate(h) { 1542 for_each_hstate(h) {
1490 err = hugetlb_sysfs_add_hstate(h); 1543 err = hugetlb_sysfs_add_hstate(h, hugepages_kobj,
1544 hstate_kobjs, &hstate_attr_group);
1491 if (err) 1545 if (err)
1492 printk(KERN_ERR "Hugetlb: Unable to add hstate %s", 1546 printk(KERN_ERR "Hugetlb: Unable to add hstate %s",
1493 h->name); 1547 h->name);
1494 } 1548 }
1495} 1549}
1496 1550
1551#ifdef CONFIG_NUMA
1552
1553/*
1554 * node_hstate/s - associate per node hstate attributes, via their kobjects,
1555 * with node sysdevs in node_devices[] using a parallel array. The array
1556 * index of a node sysdev or _hstate == node id.
1557 * This is here to avoid any static dependency of the node sysdev driver, in
1558 * the base kernel, on the hugetlb module.
1559 */
1560struct node_hstate {
1561 struct kobject *hugepages_kobj;
1562 struct kobject *hstate_kobjs[HUGE_MAX_HSTATE];
1563};
1564struct node_hstate node_hstates[MAX_NUMNODES];
1565
1566/*
1567 * A subset of global hstate attributes for node sysdevs
1568 */
1569static struct attribute *per_node_hstate_attrs[] = {
1570 &nr_hugepages_attr.attr,
1571 &free_hugepages_attr.attr,
1572 &surplus_hugepages_attr.attr,
1573 NULL,
1574};
1575
1576static struct attribute_group per_node_hstate_attr_group = {
1577 .attrs = per_node_hstate_attrs,
1578};
1579
1580/*
1581 * kobj_to_node_hstate - lookup global hstate for node sysdev hstate attr kobj.
1582 * Returns node id via non-NULL nidp.
1583 */
1584static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
1585{
1586 int nid;
1587
1588 for (nid = 0; nid < nr_node_ids; nid++) {
1589 struct node_hstate *nhs = &node_hstates[nid];
1590 int i;
1591 for (i = 0; i < HUGE_MAX_HSTATE; i++)
1592 if (nhs->hstate_kobjs[i] == kobj) {
1593 if (nidp)
1594 *nidp = nid;
1595 return &hstates[i];
1596 }
1597 }
1598
1599 BUG();
1600 return NULL;
1601}
1602
1603/*
1604 * Unregister hstate attributes from a single node sysdev.
1605 * No-op if no hstate attributes attached.
1606 */
1607void hugetlb_unregister_node(struct node *node)
1608{
1609 struct hstate *h;
1610 struct node_hstate *nhs = &node_hstates[node->sysdev.id];
1611
1612 if (!nhs->hugepages_kobj)
1613 return;
1614
1615 for_each_hstate(h)
1616 if (nhs->hstate_kobjs[h - hstates]) {
1617 kobject_put(nhs->hstate_kobjs[h - hstates]);
1618 nhs->hstate_kobjs[h - hstates] = NULL;
1619 }
1620
1621 kobject_put(nhs->hugepages_kobj);
1622 nhs->hugepages_kobj = NULL;
1623}
1624
1625/*
1626 * hugetlb module exit: unregister hstate attributes from node sysdevs
1627 * that have them.
1628 */
1629static void hugetlb_unregister_all_nodes(void)
1630{
1631 int nid;
1632
1633 /*
1634 * disable node sysdev registrations.
1635 */
1636 register_hugetlbfs_with_node(NULL, NULL);
1637
1638 /*
1639 * remove hstate attributes from any nodes that have them.
1640 */
1641 for (nid = 0; nid < nr_node_ids; nid++)
1642 hugetlb_unregister_node(&node_devices[nid]);
1643}
1644
1645/*
1646 * Register hstate attributes for a single node sysdev.
1647 * No-op if attributes already registered.
1648 */
1649void hugetlb_register_node(struct node *node)
1650{
1651 struct hstate *h;
1652 struct node_hstate *nhs = &node_hstates[node->sysdev.id];
1653 int err;
1654
1655 if (nhs->hugepages_kobj)
1656 return; /* already allocated */
1657
1658 nhs->hugepages_kobj = kobject_create_and_add("hugepages",
1659 &node->sysdev.kobj);
1660 if (!nhs->hugepages_kobj)
1661 return;
1662
1663 for_each_hstate(h) {
1664 err = hugetlb_sysfs_add_hstate(h, nhs->hugepages_kobj,
1665 nhs->hstate_kobjs,
1666 &per_node_hstate_attr_group);
1667 if (err) {
1668 printk(KERN_ERR "Hugetlb: Unable to add hstate %s"
1669 " for node %d\n",
1670 h->name, node->sysdev.id);
1671 hugetlb_unregister_node(node);
1672 break;
1673 }
1674 }
1675}
1676
1677/*
1678 * hugetlb init time: register hstate attributes for all registered
1679 * node sysdevs. All on-line nodes should have registered their
1680 * associated sysdev by the time the hugetlb module initializes.
1681 */
1682static void hugetlb_register_all_nodes(void)
1683{
1684 int nid;
1685
1686 for (nid = 0; nid < nr_node_ids; nid++) {
1687 struct node *node = &node_devices[nid];
1688 if (node->sysdev.id == nid)
1689 hugetlb_register_node(node);
1690 }
1691
1692 /*
1693 * Let the node sysdev driver know we're here so it can
1694 * [un]register hstate attributes on node hotplug.
1695 */
1696 register_hugetlbfs_with_node(hugetlb_register_node,
1697 hugetlb_unregister_node);
1698}
1699#else /* !CONFIG_NUMA */
1700
1701static struct hstate *kobj_to_node_hstate(struct kobject *kobj, int *nidp)
1702{
1703 BUG();
1704 if (nidp)
1705 *nidp = -1;
1706 return NULL;
1707}
1708
1709static void hugetlb_unregister_all_nodes(void) { }
1710
1711static void hugetlb_register_all_nodes(void) { }
1712
1713#endif
1714
1497static void __exit hugetlb_exit(void) 1715static void __exit hugetlb_exit(void)
1498{ 1716{
1499 struct hstate *h; 1717 struct hstate *h;
1500 1718
1719 hugetlb_unregister_all_nodes();
1720
1501 for_each_hstate(h) { 1721 for_each_hstate(h) {
1502 kobject_put(hstate_kobjs[h - hstates]); 1722 kobject_put(hstate_kobjs[h - hstates]);
1503 } 1723 }
@@ -1532,6 +1752,8 @@ static int __init hugetlb_init(void)
1532 1752
1533 hugetlb_sysfs_init(); 1753 hugetlb_sysfs_init();
1534 1754
1755 hugetlb_register_all_nodes();
1756
1535 return 0; 1757 return 0;
1536} 1758}
1537module_init(hugetlb_init); 1759module_init(hugetlb_init);