aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/sysctl/vm.txt30
-rw-r--r--kernel/sysctl.c2
-rw-r--r--mm/hugetlb.c32
3 files changed, 34 insertions, 30 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 36ecc26c7433..79a797eb3e87 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -200,17 +200,25 @@ fragmentation index is <= extfrag_threshold. The default value is 500.
200 200
201hugepages_treat_as_movable 201hugepages_treat_as_movable
202 202
203This parameter is only useful when kernelcore= is specified at boot time to 203This parameter controls whether we can allocate hugepages from ZONE_MOVABLE
204create ZONE_MOVABLE for pages that may be reclaimed or migrated. Huge pages 204or not. If set to non-zero, hugepages can be allocated from ZONE_MOVABLE.
205are not movable so are not normally allocated from ZONE_MOVABLE. A non-zero 205ZONE_MOVABLE is created when kernel boot parameter kernelcore= is specified,
206value written to hugepages_treat_as_movable allows huge pages to be allocated 206so this parameter has no effect if used without kernelcore=.
207from ZONE_MOVABLE. 207
208 208Hugepage migration is now available in some situations which depend on the
209Once enabled, the ZONE_MOVABLE is treated as an area of memory the huge 209architecture and/or the hugepage size. If a hugepage supports migration,
210pages pool can easily grow or shrink within. Assuming that applications are 210allocation from ZONE_MOVABLE is always enabled for the hugepage regardless
211not running that mlock() a lot of memory, it is likely the huge pages pool 211of the value of this parameter.
212can grow to the size of ZONE_MOVABLE by repeatedly entering the desired value 212IOW, this parameter affects only non-migratable hugepages.
213into nr_hugepages and triggering page reclaim. 213
214Assuming that hugepages are not migratable in your system, one usecase of
215this parameter is that users can make hugepage pool more extensible by
216enabling the allocation from ZONE_MOVABLE. This is because on ZONE_MOVABLE
217page reclaim/migration/compaction work more and you can get contiguous
218memory more likely. Note that using ZONE_MOVABLE for non-migratable
219hugepages can do harm to other features like memory hotremove (because
220memory hotremove expects that memory blocks on ZONE_MOVABLE are always
221removable,) so it's a trade-off responsible for the users.
214 222
215============================================================== 223==============================================================
216 224
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 07f6fc468e17..dc69093a8ec4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1225,7 +1225,7 @@ static struct ctl_table vm_table[] = {
1225 .data = &hugepages_treat_as_movable, 1225 .data = &hugepages_treat_as_movable,
1226 .maxlen = sizeof(int), 1226 .maxlen = sizeof(int),
1227 .mode = 0644, 1227 .mode = 0644,
1228 .proc_handler = hugetlb_treat_movable_handler, 1228 .proc_handler = proc_dointvec,
1229 }, 1229 },
1230 { 1230 {
1231 .procname = "nr_overcommit_hugepages", 1231 .procname = "nr_overcommit_hugepages",
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index fb4293b93fd0..b49579c7f2a5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -34,7 +34,6 @@
34#include "internal.h" 34#include "internal.h"
35 35
36const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; 36const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
37static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
38unsigned long hugepages_treat_as_movable; 37unsigned long hugepages_treat_as_movable;
39 38
40int hugetlb_max_hstate __read_mostly; 39int hugetlb_max_hstate __read_mostly;
@@ -539,6 +538,15 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
539 return page; 538 return page;
540} 539}
541 540
541/* Movability of hugepages depends on migration support. */
542static inline gfp_t htlb_alloc_mask(struct hstate *h)
543{
544 if (hugepages_treat_as_movable || hugepage_migration_support(h))
545 return GFP_HIGHUSER_MOVABLE;
546 else
547 return GFP_HIGHUSER;
548}
549
542static struct page *dequeue_huge_page_vma(struct hstate *h, 550static struct page *dequeue_huge_page_vma(struct hstate *h,
543 struct vm_area_struct *vma, 551 struct vm_area_struct *vma,
544 unsigned long address, int avoid_reserve, 552 unsigned long address, int avoid_reserve,
@@ -568,11 +576,11 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
568retry_cpuset: 576retry_cpuset:
569 cpuset_mems_cookie = get_mems_allowed(); 577 cpuset_mems_cookie = get_mems_allowed();
570 zonelist = huge_zonelist(vma, address, 578 zonelist = huge_zonelist(vma, address,
571 htlb_alloc_mask, &mpol, &nodemask); 579 htlb_alloc_mask(h), &mpol, &nodemask);
572 580
573 for_each_zone_zonelist_nodemask(zone, z, zonelist, 581 for_each_zone_zonelist_nodemask(zone, z, zonelist,
574 MAX_NR_ZONES - 1, nodemask) { 582 MAX_NR_ZONES - 1, nodemask) {
575 if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) { 583 if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask(h))) {
576 page = dequeue_huge_page_node(h, zone_to_nid(zone)); 584 page = dequeue_huge_page_node(h, zone_to_nid(zone));
577 if (page) { 585 if (page) {
578 if (avoid_reserve) 586 if (avoid_reserve)
@@ -738,7 +746,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
738 return NULL; 746 return NULL;
739 747
740 page = alloc_pages_exact_node(nid, 748 page = alloc_pages_exact_node(nid,
741 htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| 749 htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
742 __GFP_REPEAT|__GFP_NOWARN, 750 __GFP_REPEAT|__GFP_NOWARN,
743 huge_page_order(h)); 751 huge_page_order(h));
744 if (page) { 752 if (page) {
@@ -965,12 +973,12 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
965 spin_unlock(&hugetlb_lock); 973 spin_unlock(&hugetlb_lock);
966 974
967 if (nid == NUMA_NO_NODE) 975 if (nid == NUMA_NO_NODE)
968 page = alloc_pages(htlb_alloc_mask|__GFP_COMP| 976 page = alloc_pages(htlb_alloc_mask(h)|__GFP_COMP|
969 __GFP_REPEAT|__GFP_NOWARN, 977 __GFP_REPEAT|__GFP_NOWARN,
970 huge_page_order(h)); 978 huge_page_order(h));
971 else 979 else
972 page = alloc_pages_exact_node(nid, 980 page = alloc_pages_exact_node(nid,
973 htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| 981 htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
974 __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); 982 __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h));
975 983
976 if (page && arch_prepare_hugepage(page)) { 984 if (page && arch_prepare_hugepage(page)) {
@@ -2117,18 +2125,6 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
2117} 2125}
2118#endif /* CONFIG_NUMA */ 2126#endif /* CONFIG_NUMA */
2119 2127
2120int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
2121 void __user *buffer,
2122 size_t *length, loff_t *ppos)
2123{
2124 proc_dointvec(table, write, buffer, length, ppos);
2125 if (hugepages_treat_as_movable)
2126 htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
2127 else
2128 htlb_alloc_mask = GFP_HIGHUSER;
2129 return 0;
2130}
2131
2132int hugetlb_overcommit_handler(struct ctl_table *table, int write, 2128int hugetlb_overcommit_handler(struct ctl_table *table, int write,
2133 void __user *buffer, 2129 void __user *buffer,
2134 size_t *length, loff_t *ppos) 2130 size_t *length, loff_t *ppos)