aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/hugetlb.h6
-rw-r--r--include/linux/mempolicy.h3
-rw-r--r--kernel/sysctl.c15
-rw-r--r--mm/hugetlb.c97
-rw-r--r--mm/mempolicy.c47
5 files changed, 153 insertions, 15 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 41a59afc70fa..78b4bc64c006 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -23,6 +23,12 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
23int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); 23int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
24int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); 24int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
25int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); 25int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
26
27#ifdef CONFIG_NUMA
28int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int,
29 void __user *, size_t *, loff_t *);
30#endif
31
26int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); 32int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
27int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, 33int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
28 struct page **, struct vm_area_struct **, 34 struct page **, struct vm_area_struct **,
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 085c903fe0f1..1cc966cd3e5f 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -201,6 +201,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p);
201extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, 201extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
202 unsigned long addr, gfp_t gfp_flags, 202 unsigned long addr, gfp_t gfp_flags,
203 struct mempolicy **mpol, nodemask_t **nodemask); 203 struct mempolicy **mpol, nodemask_t **nodemask);
204extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
204extern unsigned slab_node(struct mempolicy *policy); 205extern unsigned slab_node(struct mempolicy *policy);
205 206
206extern enum zone_type policy_zone; 207extern enum zone_type policy_zone;
@@ -328,6 +329,8 @@ static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
328 return node_zonelist(0, gfp_flags); 329 return node_zonelist(0, gfp_flags);
329} 330}
330 331
332static inline bool init_nodemask_of_mempolicy(nodemask_t *m) { return false; }
333
331static inline int do_migrate_pages(struct mm_struct *mm, 334static inline int do_migrate_pages(struct mm_struct *mm,
332 const nodemask_t *from_nodes, 335 const nodemask_t *from_nodes,
333 const nodemask_t *to_nodes, int flags) 336 const nodemask_t *to_nodes, int flags)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 554ac4894f0f..60fc93131095 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1051,7 +1051,7 @@ static struct ctl_table vm_table[] = {
1051 .extra2 = &one_hundred, 1051 .extra2 = &one_hundred,
1052 }, 1052 },
1053#ifdef CONFIG_HUGETLB_PAGE 1053#ifdef CONFIG_HUGETLB_PAGE
1054 { 1054 {
1055 .procname = "nr_hugepages", 1055 .procname = "nr_hugepages",
1056 .data = NULL, 1056 .data = NULL,
1057 .maxlen = sizeof(unsigned long), 1057 .maxlen = sizeof(unsigned long),
@@ -1059,7 +1059,18 @@ static struct ctl_table vm_table[] = {
1059 .proc_handler = hugetlb_sysctl_handler, 1059 .proc_handler = hugetlb_sysctl_handler,
1060 .extra1 = (void *)&hugetlb_zero, 1060 .extra1 = (void *)&hugetlb_zero,
1061 .extra2 = (void *)&hugetlb_infinity, 1061 .extra2 = (void *)&hugetlb_infinity,
1062 }, 1062 },
1063#ifdef CONFIG_NUMA
1064 {
1065 .procname = "nr_hugepages_mempolicy",
1066 .data = NULL,
1067 .maxlen = sizeof(unsigned long),
1068 .mode = 0644,
1069 .proc_handler = &hugetlb_mempolicy_sysctl_handler,
1070 .extra1 = (void *)&hugetlb_zero,
1071 .extra2 = (void *)&hugetlb_infinity,
1072 },
1073#endif
1063 { 1074 {
1064 .procname = "hugetlb_shm_group", 1075 .procname = "hugetlb_shm_group",
1065 .data = &sysctl_hugetlb_shm_group, 1076 .data = &sysctl_hugetlb_shm_group,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 324d1abae876..1125d818ea06 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1330,29 +1330,71 @@ static struct hstate *kobj_to_hstate(struct kobject *kobj)
1330 return NULL; 1330 return NULL;
1331} 1331}
1332 1332
1333static ssize_t nr_hugepages_show(struct kobject *kobj, 1333static ssize_t nr_hugepages_show_common(struct kobject *kobj,
1334 struct kobj_attribute *attr, char *buf) 1334 struct kobj_attribute *attr, char *buf)
1335{ 1335{
1336 struct hstate *h = kobj_to_hstate(kobj); 1336 struct hstate *h = kobj_to_hstate(kobj);
1337 return sprintf(buf, "%lu\n", h->nr_huge_pages); 1337 return sprintf(buf, "%lu\n", h->nr_huge_pages);
1338} 1338}
1339static ssize_t nr_hugepages_store(struct kobject *kobj, 1339static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
1340 struct kobj_attribute *attr, const char *buf, size_t count) 1340 struct kobject *kobj, struct kobj_attribute *attr,
1341 const char *buf, size_t len)
1341{ 1342{
1342 int err; 1343 int err;
1343 unsigned long input; 1344 unsigned long count;
1344 struct hstate *h = kobj_to_hstate(kobj); 1345 struct hstate *h = kobj_to_hstate(kobj);
1346 NODEMASK_ALLOC(nodemask_t, nodes_allowed);
1345 1347
1346 err = strict_strtoul(buf, 10, &input); 1348 err = strict_strtoul(buf, 10, &count);
1347 if (err) 1349 if (err)
1348 return 0; 1350 return 0;
1349 1351
1350 h->max_huge_pages = set_max_huge_pages(h, input, &node_online_map); 1352 if (!(obey_mempolicy && init_nodemask_of_mempolicy(nodes_allowed))) {
1353 NODEMASK_FREE(nodes_allowed);
1354 nodes_allowed = &node_online_map;
1355 }
1356 h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
1351 1357
1352 return count; 1358 if (nodes_allowed != &node_online_map)
1359 NODEMASK_FREE(nodes_allowed);
1360
1361 return len;
1362}
1363
1364static ssize_t nr_hugepages_show(struct kobject *kobj,
1365 struct kobj_attribute *attr, char *buf)
1366{
1367 return nr_hugepages_show_common(kobj, attr, buf);
1368}
1369
1370static ssize_t nr_hugepages_store(struct kobject *kobj,
1371 struct kobj_attribute *attr, const char *buf, size_t len)
1372{
1373 return nr_hugepages_store_common(false, kobj, attr, buf, len);
1353} 1374}
1354HSTATE_ATTR(nr_hugepages); 1375HSTATE_ATTR(nr_hugepages);
1355 1376
1377#ifdef CONFIG_NUMA
1378
1379/*
1380 * hstate attribute for optionally mempolicy-based constraint on persistent
1381 * huge page alloc/free.
1382 */
1383static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj,
1384 struct kobj_attribute *attr, char *buf)
1385{
1386 return nr_hugepages_show_common(kobj, attr, buf);
1387}
1388
1389static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj,
1390 struct kobj_attribute *attr, const char *buf, size_t len)
1391{
1392 return nr_hugepages_store_common(true, kobj, attr, buf, len);
1393}
1394HSTATE_ATTR(nr_hugepages_mempolicy);
1395#endif
1396
1397
1356static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj, 1398static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
1357 struct kobj_attribute *attr, char *buf) 1399 struct kobj_attribute *attr, char *buf)
1358{ 1400{
@@ -1408,6 +1450,9 @@ static struct attribute *hstate_attrs[] = {
1408 &free_hugepages_attr.attr, 1450 &free_hugepages_attr.attr,
1409 &resv_hugepages_attr.attr, 1451 &resv_hugepages_attr.attr,
1410 &surplus_hugepages_attr.attr, 1452 &surplus_hugepages_attr.attr,
1453#ifdef CONFIG_NUMA
1454 &nr_hugepages_mempolicy_attr.attr,
1455#endif
1411 NULL, 1456 NULL,
1412}; 1457};
1413 1458
@@ -1574,9 +1619,9 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
1574} 1619}
1575 1620
1576#ifdef CONFIG_SYSCTL 1621#ifdef CONFIG_SYSCTL
1577int hugetlb_sysctl_handler(struct ctl_table *table, int write, 1622static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
1578 void __user *buffer, 1623 struct ctl_table *table, int write,
1579 size_t *length, loff_t *ppos) 1624 void __user *buffer, size_t *length, loff_t *ppos)
1580{ 1625{
1581 struct hstate *h = &default_hstate; 1626 struct hstate *h = &default_hstate;
1582 unsigned long tmp; 1627 unsigned long tmp;
@@ -1588,13 +1633,39 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
1588 table->maxlen = sizeof(unsigned long); 1633 table->maxlen = sizeof(unsigned long);
1589 proc_doulongvec_minmax(table, write, buffer, length, ppos); 1634 proc_doulongvec_minmax(table, write, buffer, length, ppos);
1590 1635
1591 if (write) 1636 if (write) {
1592 h->max_huge_pages = set_max_huge_pages(h, tmp, 1637 NODEMASK_ALLOC(nodemask_t, nodes_allowed);
1593 &node_online_map); 1638 if (!(obey_mempolicy &&
1639 init_nodemask_of_mempolicy(nodes_allowed))) {
1640 NODEMASK_FREE(nodes_allowed);
1641 nodes_allowed = &node_states[N_HIGH_MEMORY];
1642 }
1643 h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed);
1644
1645 if (nodes_allowed != &node_states[N_HIGH_MEMORY])
1646 NODEMASK_FREE(nodes_allowed);
1647 }
1594 1648
1595 return 0; 1649 return 0;
1596} 1650}
1597 1651
1652int hugetlb_sysctl_handler(struct ctl_table *table, int write,
1653 void __user *buffer, size_t *length, loff_t *ppos)
1654{
1655
1656 return hugetlb_sysctl_handler_common(false, table, write,
1657 buffer, length, ppos);
1658}
1659
1660#ifdef CONFIG_NUMA
1661int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
1662 void __user *buffer, size_t *length, loff_t *ppos)
1663{
1664 return hugetlb_sysctl_handler_common(true, table, write,
1665 buffer, length, ppos);
1666}
1667#endif /* CONFIG_NUMA */
1668
1598int hugetlb_treat_movable_handler(struct ctl_table *table, int write, 1669int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
1599 void __user *buffer, 1670 void __user *buffer,
1600 size_t *length, loff_t *ppos) 1671 size_t *length, loff_t *ppos)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0f89eabbaf3e..f11fdad06204 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1568,6 +1568,53 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
1568 } 1568 }
1569 return zl; 1569 return zl;
1570} 1570}
1571
1572/*
1573 * init_nodemask_of_mempolicy
1574 *
1575 * If the current task's mempolicy is "default" [NULL], return 'false'
1576 * to indicate default policy. Otherwise, extract the policy nodemask
1577 * for 'bind' or 'interleave' policy into the argument nodemask, or
1578 * initialize the argument nodemask to contain the single node for
1579 * 'preferred' or 'local' policy and return 'true' to indicate presence
1580 * of non-default mempolicy.
1581 *
1582 * We don't bother with reference counting the mempolicy [mpol_get/put]
1583 * because the current task is examining it's own mempolicy and a task's
1584 * mempolicy is only ever changed by the task itself.
1585 *
1586 * N.B., it is the caller's responsibility to free a returned nodemask.
1587 */
1588bool init_nodemask_of_mempolicy(nodemask_t *mask)
1589{
1590 struct mempolicy *mempolicy;
1591 int nid;
1592
1593 if (!(mask && current->mempolicy))
1594 return false;
1595
1596 mempolicy = current->mempolicy;
1597 switch (mempolicy->mode) {
1598 case MPOL_PREFERRED:
1599 if (mempolicy->flags & MPOL_F_LOCAL)
1600 nid = numa_node_id();
1601 else
1602 nid = mempolicy->v.preferred_node;
1603 init_nodemask_of_node(mask, nid);
1604 break;
1605
1606 case MPOL_BIND:
1607 /* Fall through */
1608 case MPOL_INTERLEAVE:
1609 *mask = mempolicy->v.nodes;
1610 break;
1611
1612 default:
1613 BUG();
1614 }
1615
1616 return true;
1617}
1571#endif 1618#endif
1572 1619
1573/* Allocate a page in interleaved policy. 1620/* Allocate a page in interleaved policy.