diff options
-rw-r--r-- | Documentation/sysctl/vm.txt | 30 | ||||
-rw-r--r-- | kernel/sysctl.c | 2 | ||||
-rw-r--r-- | mm/hugetlb.c | 32 |
3 files changed, 34 insertions, 30 deletions
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 36ecc26c7433..79a797eb3e87 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt | |||
@@ -200,17 +200,25 @@ fragmentation index is <= extfrag_threshold. The default value is 500. | |||
200 | 200 | ||
201 | hugepages_treat_as_movable | 201 | hugepages_treat_as_movable |
202 | 202 | ||
203 | This parameter is only useful when kernelcore= is specified at boot time to | 203 | This parameter controls whether we can allocate hugepages from ZONE_MOVABLE |
204 | create ZONE_MOVABLE for pages that may be reclaimed or migrated. Huge pages | 204 | or not. If set to non-zero, hugepages can be allocated from ZONE_MOVABLE. |
205 | are not movable so are not normally allocated from ZONE_MOVABLE. A non-zero | 205 | ZONE_MOVABLE is created when kernel boot parameter kernelcore= is specified, |
206 | value written to hugepages_treat_as_movable allows huge pages to be allocated | 206 | so this parameter has no effect if used without kernelcore=. |
207 | from ZONE_MOVABLE. | 207 | |
208 | 208 | Hugepage migration is now available in some situations which depend on the | |
209 | Once enabled, the ZONE_MOVABLE is treated as an area of memory the huge | 209 | architecture and/or the hugepage size. If a hugepage supports migration, |
210 | pages pool can easily grow or shrink within. Assuming that applications are | 210 | allocation from ZONE_MOVABLE is always enabled for the hugepage regardless |
211 | not running that mlock() a lot of memory, it is likely the huge pages pool | 211 | of the value of this parameter. |
212 | can grow to the size of ZONE_MOVABLE by repeatedly entering the desired value | 212 | IOW, this parameter affects only non-migratable hugepages. |
213 | into nr_hugepages and triggering page reclaim. | 213 | |
214 | Assuming that hugepages are not migratable in your system, one usecase of | ||
215 | this parameter is that users can make hugepage pool more extensible by | ||
216 | enabling the allocation from ZONE_MOVABLE. This is because on ZONE_MOVABLE | ||
217 | page reclaim/migration/compaction work more and you can get contiguous | ||
218 | memory more likely. Note that using ZONE_MOVABLE for non-migratable | ||
219 | hugepages can do harm to other features like memory hotremove (because | ||
220 | memory hotremove expects that memory blocks on ZONE_MOVABLE are always | ||
221 | removable,) so it's a trade-off responsible for the users. | ||
214 | 222 | ||
215 | ============================================================== | 223 | ============================================================== |
216 | 224 | ||
diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 07f6fc468e17..dc69093a8ec4 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c | |||
@@ -1225,7 +1225,7 @@ static struct ctl_table vm_table[] = { | |||
1225 | .data = &hugepages_treat_as_movable, | 1225 | .data = &hugepages_treat_as_movable, |
1226 | .maxlen = sizeof(int), | 1226 | .maxlen = sizeof(int), |
1227 | .mode = 0644, | 1227 | .mode = 0644, |
1228 | .proc_handler = hugetlb_treat_movable_handler, | 1228 | .proc_handler = proc_dointvec, |
1229 | }, | 1229 | }, |
1230 | { | 1230 | { |
1231 | .procname = "nr_overcommit_hugepages", | 1231 | .procname = "nr_overcommit_hugepages", |
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index fb4293b93fd0..b49579c7f2a5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -34,7 +34,6 @@ | |||
34 | #include "internal.h" | 34 | #include "internal.h" |
35 | 35 | ||
36 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; | 36 | const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL; |
37 | static gfp_t htlb_alloc_mask = GFP_HIGHUSER; | ||
38 | unsigned long hugepages_treat_as_movable; | 37 | unsigned long hugepages_treat_as_movable; |
39 | 38 | ||
40 | int hugetlb_max_hstate __read_mostly; | 39 | int hugetlb_max_hstate __read_mostly; |
@@ -539,6 +538,15 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid) | |||
539 | return page; | 538 | return page; |
540 | } | 539 | } |
541 | 540 | ||
541 | /* Movability of hugepages depends on migration support. */ | ||
542 | static inline gfp_t htlb_alloc_mask(struct hstate *h) | ||
543 | { | ||
544 | if (hugepages_treat_as_movable || hugepage_migration_support(h)) | ||
545 | return GFP_HIGHUSER_MOVABLE; | ||
546 | else | ||
547 | return GFP_HIGHUSER; | ||
548 | } | ||
549 | |||
542 | static struct page *dequeue_huge_page_vma(struct hstate *h, | 550 | static struct page *dequeue_huge_page_vma(struct hstate *h, |
543 | struct vm_area_struct *vma, | 551 | struct vm_area_struct *vma, |
544 | unsigned long address, int avoid_reserve, | 552 | unsigned long address, int avoid_reserve, |
@@ -568,11 +576,11 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, | |||
568 | retry_cpuset: | 576 | retry_cpuset: |
569 | cpuset_mems_cookie = get_mems_allowed(); | 577 | cpuset_mems_cookie = get_mems_allowed(); |
570 | zonelist = huge_zonelist(vma, address, | 578 | zonelist = huge_zonelist(vma, address, |
571 | htlb_alloc_mask, &mpol, &nodemask); | 579 | htlb_alloc_mask(h), &mpol, &nodemask); |
572 | 580 | ||
573 | for_each_zone_zonelist_nodemask(zone, z, zonelist, | 581 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
574 | MAX_NR_ZONES - 1, nodemask) { | 582 | MAX_NR_ZONES - 1, nodemask) { |
575 | if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) { | 583 | if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask(h))) { |
576 | page = dequeue_huge_page_node(h, zone_to_nid(zone)); | 584 | page = dequeue_huge_page_node(h, zone_to_nid(zone)); |
577 | if (page) { | 585 | if (page) { |
578 | if (avoid_reserve) | 586 | if (avoid_reserve) |
@@ -738,7 +746,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) | |||
738 | return NULL; | 746 | return NULL; |
739 | 747 | ||
740 | page = alloc_pages_exact_node(nid, | 748 | page = alloc_pages_exact_node(nid, |
741 | htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| | 749 | htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE| |
742 | __GFP_REPEAT|__GFP_NOWARN, | 750 | __GFP_REPEAT|__GFP_NOWARN, |
743 | huge_page_order(h)); | 751 | huge_page_order(h)); |
744 | if (page) { | 752 | if (page) { |
@@ -965,12 +973,12 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) | |||
965 | spin_unlock(&hugetlb_lock); | 973 | spin_unlock(&hugetlb_lock); |
966 | 974 | ||
967 | if (nid == NUMA_NO_NODE) | 975 | if (nid == NUMA_NO_NODE) |
968 | page = alloc_pages(htlb_alloc_mask|__GFP_COMP| | 976 | page = alloc_pages(htlb_alloc_mask(h)|__GFP_COMP| |
969 | __GFP_REPEAT|__GFP_NOWARN, | 977 | __GFP_REPEAT|__GFP_NOWARN, |
970 | huge_page_order(h)); | 978 | huge_page_order(h)); |
971 | else | 979 | else |
972 | page = alloc_pages_exact_node(nid, | 980 | page = alloc_pages_exact_node(nid, |
973 | htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| | 981 | htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE| |
974 | __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); | 982 | __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h)); |
975 | 983 | ||
976 | if (page && arch_prepare_hugepage(page)) { | 984 | if (page && arch_prepare_hugepage(page)) { |
@@ -2117,18 +2125,6 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write, | |||
2117 | } | 2125 | } |
2118 | #endif /* CONFIG_NUMA */ | 2126 | #endif /* CONFIG_NUMA */ |
2119 | 2127 | ||
2120 | int hugetlb_treat_movable_handler(struct ctl_table *table, int write, | ||
2121 | void __user *buffer, | ||
2122 | size_t *length, loff_t *ppos) | ||
2123 | { | ||
2124 | proc_dointvec(table, write, buffer, length, ppos); | ||
2125 | if (hugepages_treat_as_movable) | ||
2126 | htlb_alloc_mask = GFP_HIGHUSER_MOVABLE; | ||
2127 | else | ||
2128 | htlb_alloc_mask = GFP_HIGHUSER; | ||
2129 | return 0; | ||
2130 | } | ||
2131 | |||
2132 | int hugetlb_overcommit_handler(struct ctl_table *table, int write, | 2128 | int hugetlb_overcommit_handler(struct ctl_table *table, int write, |
2133 | void __user *buffer, | 2129 | void __user *buffer, |
2134 | size_t *length, loff_t *ppos) | 2130 | size_t *length, loff_t *ppos) |