aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLee Schermerhorn <lee.schermerhorn@hp.com>2009-12-14 20:58:21 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-12-15 11:53:12 -0500
commit06808b0827e1cd14eedc96bac2655d5b37ac246c (patch)
tree8f7b52a4af1532ed414631f68b99a059e299d83f /include
parentc1e6c8d074ea3621106548654cc244d2edc12ead (diff)
hugetlb: derive huge pages nodes allowed from task mempolicy
This patch derives a "nodes_allowed" node mask from the numa mempolicy of the task modifying the number of persistent huge pages to control the allocation, freeing and adjusting of surplus huge pages when the pool page count is modified via the new sysctl or sysfs attribute "nr_hugepages_mempolicy". The nodes_allowed mask is derived as follows: * For "default" [NULL] task mempolicy, a NULL nodemask_t pointer is produced. This will cause the hugetlb subsystem to use node_online_map as the "nodes_allowed". This preserves the behavior before this patch. * For "preferred" mempolicy, including explicit local allocation, a nodemask with the single preferred node will be produced. "local" policy will NOT track any internode migrations of the task adjusting nr_hugepages. * For "bind" and "interleave" policy, the mempolicy's nodemask will be used. * Other than to inform the construction of the nodes_allowed node mask, the actual mempolicy mode is ignored. That is, all modes behave like interleave over the resulting nodes_allowed mask with no "fallback". See the updated documentation [next patch] for more information about the implications of this patch. Examples: Starting with: Node 0 HugePages_Total: 0 Node 1 HugePages_Total: 0 Node 2 HugePages_Total: 0 Node 3 HugePages_Total: 0 Default behavior [with or without this patch] balances persistent hugepage allocation across nodes [with sufficient contiguous memory]: sysctl vm.nr_hugepages[_mempolicy]=32 yields: Node 0 HugePages_Total: 8 Node 1 HugePages_Total: 8 Node 2 HugePages_Total: 8 Node 3 HugePages_Total: 8 Of course, we only have nr_hugepages_mempolicy with the patch, but with default mempolicy, nr_hugepages_mempolicy behaves the same as nr_hugepages. Applying mempolicy--e.g., with numactl [using '-m' a.k.a. '--membind' because it allows multiple nodes to be specified and it's easy to type]--we can allocate huge pages on individual nodes or sets of nodes. So, starting from the condition above, with 8 huge pages per node, add 8 more to node 2 using: numactl -m 2 sysctl vm.nr_hugepages_mempolicy=40 This yields: Node 0 HugePages_Total: 8 Node 1 HugePages_Total: 8 Node 2 HugePages_Total: 16 Node 3 HugePages_Total: 8 The incremental 8 huge pages were restricted to node 2 by the specified mempolicy. Similarly, we can use mempolicy to free persistent huge pages from specified nodes: numactl -m 0,1 sysctl vm.nr_hugepages_mempolicy=32 yields: Node 0 HugePages_Total: 4 Node 1 HugePages_Total: 4 Node 2 HugePages_Total: 16 Node 3 HugePages_Total: 8 The 8 huge pages freed were balanced over nodes 0 and 1. [rientjes@google.com: accomodate reworked NODEMASK_ALLOC] Signed-off-by: David Rientjes <rientjes@google.com> Signed-off-by: Lee Schermerhorn <lee.schermerhorn@hp.com> Acked-by: Mel Gorman <mel@csn.ul.ie> Reviewed-by: Andi Kleen <andi@firstfloor.org> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Randy Dunlap <randy.dunlap@oracle.com> Cc: Nishanth Aravamudan <nacc@us.ibm.com> Cc: Adam Litke <agl@us.ibm.com> Cc: Andy Whitcroft <apw@canonical.com> Cc: Eric Whitney <eric.whitney@hp.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/hugetlb.h6
-rw-r--r--include/linux/mempolicy.h3
2 files changed, 9 insertions, 0 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 41a59afc70fa..78b4bc64c006 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -23,6 +23,12 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
23int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); 23int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
24int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); 24int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
25int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); 25int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
26
27#ifdef CONFIG_NUMA
28int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int,
29 void __user *, size_t *, loff_t *);
30#endif
31
26int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); 32int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
27int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, 33int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
28 struct page **, struct vm_area_struct **, 34 struct page **, struct vm_area_struct **,
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 085c903fe0f1..1cc966cd3e5f 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -201,6 +201,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p);
201extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, 201extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
202 unsigned long addr, gfp_t gfp_flags, 202 unsigned long addr, gfp_t gfp_flags,
203 struct mempolicy **mpol, nodemask_t **nodemask); 203 struct mempolicy **mpol, nodemask_t **nodemask);
204extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
204extern unsigned slab_node(struct mempolicy *policy); 205extern unsigned slab_node(struct mempolicy *policy);
205 206
206extern enum zone_type policy_zone; 207extern enum zone_type policy_zone;
@@ -328,6 +329,8 @@ static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
328 return node_zonelist(0, gfp_flags); 329 return node_zonelist(0, gfp_flags);
329} 330}
330 331
332static inline bool init_nodemask_of_mempolicy(nodemask_t *m) { return false; }
333
331static inline int do_migrate_pages(struct mm_struct *mm, 334static inline int do_migrate_pages(struct mm_struct *mm,
332 const nodemask_t *from_nodes, 335 const nodemask_t *from_nodes,
333 const nodemask_t *to_nodes, int flags) 336 const nodemask_t *to_nodes, int flags)