diff options
-rw-r--r-- | include/linux/hugetlb.h | 6 | ||||
-rw-r--r-- | include/linux/mempolicy.h | 3 | ||||
-rw-r--r-- | kernel/sysctl.c | 15 | ||||
-rw-r--r-- | mm/hugetlb.c | 97 | ||||
-rw-r--r-- | mm/mempolicy.c | 47 |
5 files changed, 153 insertions, 15 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 41a59afc70fa..78b4bc64c006 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -23,6 +23,12 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma); | |||
23 | int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); | 23 | int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); |
24 | int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); | 24 | int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); |
25 | int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); | 25 | int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); |
26 | |||
27 | #ifdef CONFIG_NUMA | ||
28 | int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, | ||
29 | void __user *, size_t *, loff_t *); | ||
30 | #endif | ||
31 | |||
26 | int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); | 32 | int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); |
27 | int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, | 33 | int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, |
28 | struct page **, struct vm_area_struct **, | 34 | struct page **, struct vm_area_struct **, |
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 085c903fe0f1..1cc966cd3e5f 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h | |||
@@ -201,6 +201,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p); | |||
201 | extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, | 201 | extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, |
202 | unsigned long addr, gfp_t gfp_flags, | 202 | unsigned long addr, gfp_t gfp_flags, |
203 | struct mempolicy **mpol, nodemask_t **nodemask); | 203 | struct mempolicy **mpol, nodemask_t **nodemask); |
204 | extern bool init_nodemask_of_mempolicy(nodemask_t *mask); | ||
204 | extern unsigned slab_node(struct mempolicy *policy); | 205 | extern unsigned slab_node(struct mempolicy *policy); |
205 | 206 | ||
206 | extern enum zone_type policy_zone; | 207 | extern enum zone_type policy_zone; |
@@ -328,6 +329,8 @@ static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, | |||
328 | return node_zonelist(0, gfp_flags); | 329 | return node_zonelist(0, gfp_flags); |
329 | } | 330 | } |
330 | 331 | ||
332 | static inline bool init_nodemask_of_mempolicy(nodemask_t *m) { return false; } | ||
333 | |||
331 | static inline int do_migrate_pages(struct mm_struct *mm, | 334 | static inline int do_migrate_pages(struct mm_struct *mm, |
332 | const nodemask_t *from_nodes, | 335 | const nodemask_t *from_nodes, |
333 | const nodemask_t *to_nodes, int flags) | 336 | const nodemask_t *to_nodes, int flags) |
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 554ac4894f0f..60fc93131095 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -1051,7 +1051,7 @@ static struct ctl_table vm_table[] = { | |||
1051 | .extra2 = &one_hundred, | 1051 | .extra2 = &one_hundred, |
1052 | }, | 1052 | }, |
1053 | #ifdef CONFIG_HUGETLB_PAGE | 1053 | #ifdef CONFIG_HUGETLB_PAGE |
1054 | { | 1054 | { |
1055 | .procname = "nr_hugepages", | 1055 | .procname = "nr_hugepages", |
1056 | .data = NULL, | 1056 | .data = NULL, |
1057 | .maxlen = sizeof(unsigned long), | 1057 | .maxlen = sizeof(unsigned long), |
@@ -1059,7 +1059,18 @@ static struct ctl_table vm_table[] = { | |||
1059 | .proc_handler = hugetlb_sysctl_handler, | 1059 | .proc_handler = hugetlb_sysctl_handler, |
1060 | .extra1 = (void *)&hugetlb_zero, | 1060 | .extra1 = (void *)&hugetlb_zero, |
1061 | .extra2 = (void *)&hugetlb_infinity, | 1061 | .extra2 = (void *)&hugetlb_infinity, |
1062 | }, | 1062 | }, |
1063 | #ifdef CONFIG_NUMA | ||
1064 | { | ||
1065 | .procname = "nr_hugepages_mempolicy", | ||
1066 | .data = NULL, | ||
1067 | .maxlen = sizeof(unsigned long), | ||
1068 | .mode = 0644, | ||
1069 | .proc_handler = &hugetlb_mempolicy_sysctl_handler, | ||
1070 | .extra1 = (void *)&hugetlb_zero, | ||
1071 | .extra2 = (void *)&hugetlb_infinity, | ||
1072 | }, | ||
1073 | #endif | ||
1063 | { | 1074 | { |
1064 | .procname = "hugetlb_shm_group", | 1075 | .procname = "hugetlb_shm_group", |
1065 | .data = &sysctl_hugetlb_shm_group, | 1076 | .data = &sysctl_hugetlb_shm_group, |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 324d1abae876..1125d818ea06 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -1330,29 +1330,71 @@ static struct hstate *kobj_to_hstate(struct kobject *kobj) | |||
1330 | return NULL; | 1330 | return NULL; |
1331 | } | 1331 | } |
1332 | 1332 | ||
1333 | static ssize_t nr_hugepages_show(struct kobject *kobj, | 1333 | static ssize_t nr_hugepages_show_common(struct kobject *kobj, |
1334 | struct kobj_attribute *attr, char *buf) | 1334 | struct kobj_attribute *attr, char *buf) |
1335 | { | 1335 | { |
1336 | struct hstate *h = kobj_to_hstate(kobj); | 1336 | struct hstate *h = kobj_to_hstate(kobj); |
1337 | return sprintf(buf, "%lu\n", h->nr_huge_pages); | 1337 | return sprintf(buf, "%lu\n", h->nr_huge_pages); |
1338 | } | 1338 | } |
1339 | static ssize_t nr_hugepages_store(struct kobject *kobj, | 1339 | static ssize_t nr_hugepages_store_common(bool obey_mempolicy, |
1340 | struct kobj_attribute *attr, const char *buf, size_t count) | 1340 | struct kobject *kobj, struct kobj_attribute *attr, |
1341 | const char *buf, size_t len) | ||
1341 | { | 1342 | { |
1342 | int err; | 1343 | int err; |
1343 | unsigned long input; | 1344 | unsigned long count; |
1344 | struct hstate *h = kobj_to_hstate(kobj); | 1345 | struct hstate *h = kobj_to_hstate(kobj); |
1346 | NODEMASK_ALLOC(nodemask_t, nodes_allowed); | ||
1345 | 1347 | ||
1346 | err = strict_strtoul(buf, 10, &input); | 1348 | err = strict_strtoul(buf, 10, &count); |
1347 | if (err) | 1349 | if (err) |
1348 | return 0; | 1350 | return 0; |
1349 | 1351 | ||
1350 | h->max_huge_pages = set_max_huge_pages(h, input, &node_online_map); | 1352 | if (!(obey_mempolicy && init_nodemask_of_mempolicy(nodes_allowed))) { |
1353 | NODEMASK_FREE(nodes_allowed); | ||
1354 | nodes_allowed = &node_online_map; | ||
1355 | } | ||
1356 | h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed); | ||
1351 | 1357 | ||
1352 | return count; | 1358 | if (nodes_allowed != &node_online_map) |
1359 | NODEMASK_FREE(nodes_allowed); | ||
1360 | |||
1361 | return len; | ||
1362 | } | ||
1363 | |||
1364 | static ssize_t nr_hugepages_show(struct kobject *kobj, | ||
1365 | struct kobj_attribute *attr, char *buf) | ||
1366 | { | ||
1367 | return nr_hugepages_show_common(kobj, attr, buf); | ||
1368 | } | ||
1369 | |||
1370 | static ssize_t nr_hugepages_store(struct kobject *kobj, | ||
1371 | struct kobj_attribute *attr, const char *buf, size_t len) | ||
1372 | { | ||
1373 | return nr_hugepages_store_common(false, kobj, attr, buf, len); | ||
1353 | } | 1374 | } |
1354 | HSTATE_ATTR(nr_hugepages); | 1375 | HSTATE_ATTR(nr_hugepages); |
1355 | 1376 | ||
1377 | #ifdef CONFIG_NUMA | ||
1378 | |||
1379 | /* | ||
1380 | * hstate attribute for optionally mempolicy-based constraint on persistent | ||
1381 | * huge page alloc/free. | ||
1382 | */ | ||
1383 | static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj, | ||
1384 | struct kobj_attribute *attr, char *buf) | ||
1385 | { | ||
1386 | return nr_hugepages_show_common(kobj, attr, buf); | ||
1387 | } | ||
1388 | |||
1389 | static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj, | ||
1390 | struct kobj_attribute *attr, const char *buf, size_t len) | ||
1391 | { | ||
1392 | return nr_hugepages_store_common(true, kobj, attr, buf, len); | ||
1393 | } | ||
1394 | HSTATE_ATTR(nr_hugepages_mempolicy); | ||
1395 | #endif | ||
1396 | |||
1397 | |||
1356 | static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj, | 1398 | static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj, |
1357 | struct kobj_attribute *attr, char *buf) | 1399 | struct kobj_attribute *attr, char *buf) |
1358 | { | 1400 | { |
@@ -1408,6 +1450,9 @@ static struct attribute *hstate_attrs[] = { | |||
1408 | &free_hugepages_attr.attr, | 1450 | &free_hugepages_attr.attr, |
1409 | &resv_hugepages_attr.attr, | 1451 | &resv_hugepages_attr.attr, |
1410 | &surplus_hugepages_attr.attr, | 1452 | &surplus_hugepages_attr.attr, |
1453 | #ifdef CONFIG_NUMA | ||
1454 | &nr_hugepages_mempolicy_attr.attr, | ||
1455 | #endif | ||
1411 | NULL, | 1456 | NULL, |
1412 | }; | 1457 | }; |
1413 | 1458 | ||
@@ -1574,9 +1619,9 @@ static unsigned int cpuset_mems_nr(unsigned int *array) | |||
1574 | } | 1619 | } |
1575 | 1620 | ||
1576 | #ifdef CONFIG_SYSCTL | 1621 | #ifdef CONFIG_SYSCTL |
1577 | int hugetlb_sysctl_handler(struct ctl_table *table, int write, | 1622 | static int hugetlb_sysctl_handler_common(bool obey_mempolicy, |
1578 | void __user *buffer, | 1623 | struct ctl_table *table, int write, |
1579 | size_t *length, loff_t *ppos) | 1624 | void __user *buffer, size_t *length, loff_t *ppos) |
1580 | { | 1625 | { |
1581 | struct hstate *h = &default_hstate; | 1626 | struct hstate *h = &default_hstate; |
1582 | unsigned long tmp; | 1627 | unsigned long tmp; |
@@ -1588,13 +1633,39 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, | |||
1588 | table->maxlen = sizeof(unsigned long); | 1633 | table->maxlen = sizeof(unsigned long); |
1589 | proc_doulongvec_minmax(table, write, buffer, length, ppos); | 1634 | proc_doulongvec_minmax(table, write, buffer, length, ppos); |
1590 | 1635 | ||
1591 | if (write) | 1636 | if (write) { |
1592 | h->max_huge_pages = set_max_huge_pages(h, tmp, | 1637 | NODEMASK_ALLOC(nodemask_t, nodes_allowed); |
1593 | &node_online_map); | 1638 | if (!(obey_mempolicy && |
1639 | init_nodemask_of_mempolicy(nodes_allowed))) { | ||
1640 | NODEMASK_FREE(nodes_allowed); | ||
1641 | nodes_allowed = &node_states[N_HIGH_MEMORY]; | ||
1642 | } | ||
1643 | h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed); | ||
1644 | |||
1645 | if (nodes_allowed != &node_states[N_HIGH_MEMORY]) | ||
1646 | NODEMASK_FREE(nodes_allowed); | ||
1647 | } | ||
1594 | 1648 | ||
1595 | return 0; | 1649 | return 0; |
1596 | } | 1650 | } |
1597 | 1651 | ||
1652 | int hugetlb_sysctl_handler(struct ctl_table *table, int write, | ||
1653 | void __user *buffer, size_t *length, loff_t *ppos) | ||
1654 | { | ||
1655 | |||
1656 | return hugetlb_sysctl_handler_common(false, table, write, | ||
1657 | buffer, length, ppos); | ||
1658 | } | ||
1659 | |||
1660 | #ifdef CONFIG_NUMA | ||
1661 | int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write, | ||
1662 | void __user *buffer, size_t *length, loff_t *ppos) | ||
1663 | { | ||
1664 | return hugetlb_sysctl_handler_common(true, table, write, | ||
1665 | buffer, length, ppos); | ||
1666 | } | ||
1667 | #endif /* CONFIG_NUMA */ | ||
1668 | |||
1598 | int hugetlb_treat_movable_handler(struct ctl_table *table, int write, | 1669 | int hugetlb_treat_movable_handler(struct ctl_table *table, int write, |
1599 | void __user *buffer, | 1670 | void __user *buffer, |
1600 | size_t *length, loff_t *ppos) | 1671 | size_t *length, loff_t *ppos) |
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 0f89eabbaf3e..f11fdad06204 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1568,6 +1568,53 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, | |||
1568 | } | 1568 | } |
1569 | return zl; | 1569 | return zl; |
1570 | } | 1570 | } |
1571 | |||
1572 | /* | ||
1573 | * init_nodemask_of_mempolicy | ||
1574 | * | ||
1575 | * If the current task's mempolicy is "default" [NULL], return 'false' | ||
1576 | * to indicate default policy. Otherwise, extract the policy nodemask | ||
1577 | * for 'bind' or 'interleave' policy into the argument nodemask, or | ||
1578 | * initialize the argument nodemask to contain the single node for | ||
1579 | * 'preferred' or 'local' policy and return 'true' to indicate presence | ||
1580 | * of non-default mempolicy. | ||
1581 | * | ||
1582 | * We don't bother with reference counting the mempolicy [mpol_get/put] | ||
1583 | * because the current task is examining it's own mempolicy and a task's | ||
1584 | * mempolicy is only ever changed by the task itself. | ||
1585 | * | ||
1586 | * N.B., it is the caller's responsibility to free a returned nodemask. | ||
1587 | */ | ||
1588 | bool init_nodemask_of_mempolicy(nodemask_t *mask) | ||
1589 | { | ||
1590 | struct mempolicy *mempolicy; | ||
1591 | int nid; | ||
1592 | |||
1593 | if (!(mask && current->mempolicy)) | ||
1594 | return false; | ||
1595 | |||
1596 | mempolicy = current->mempolicy; | ||
1597 | switch (mempolicy->mode) { | ||
1598 | case MPOL_PREFERRED: | ||
1599 | if (mempolicy->flags & MPOL_F_LOCAL) | ||
1600 | nid = numa_node_id(); | ||
1601 | else | ||
1602 | nid = mempolicy->v.preferred_node; | ||
1603 | init_nodemask_of_node(mask, nid); | ||
1604 | break; | ||
1605 | |||
1606 | case MPOL_BIND: | ||
1607 | /* Fall through */ | ||
1608 | case MPOL_INTERLEAVE: | ||
1609 | *mask = mempolicy->v.nodes; | ||
1610 | break; | ||
1611 | |||
1612 | default: | ||
1613 | BUG(); | ||
1614 | } | ||
1615 | |||
1616 | return true; | ||
1617 | } | ||
1571 | #endif | 1618 | #endif |
1572 | 1619 | ||
1573 | /* Allocate a page in interleaved policy. | 1620 | /* Allocate a page in interleaved policy. |