aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorKen Chen <kenchen@google.com>2007-05-09 05:33:34 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-05-09 15:30:49 -0400
commit8a63011275e1a0ec9389e8c7d9b08caab8957ca0 (patch)
treeae631cb1206c60a8b375384a880cabf7dc156d00 /mm
parent85badbdf5120d246ce2bb3f1a7689a805f9c9006 (diff)
pretend cpuset has some form of hugetlb page reservation
When cpuset is configured, it breaks the strict hugetlb page reservation as the accounting is done on a global variable. Such reservation is completely rubbish in the presence of cpuset because the reservation is not checked against page availability for the current cpuset. Application can still potentially OOM'ed by kernel with lack of free htlb page in cpuset that the task is in. Attempt to enforce strict accounting with cpuset is almost impossible (or too ugly) because cpuset is too fluid that task or memory node can be dynamically moved between cpusets. The change of semantics for shared hugetlb mapping with cpuset is undesirable. However, in order to preserve some of the semantics, we fall back to check against current free page availability as a best attempt and hopefully to minimize the impact of changing semantics that cpuset has on hugetlb. Signed-off-by: Ken Chen <kenchen@google.com> Cc: Paul Jackson <pj@sgi.com> Cc: Christoph Lameter <clameter@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb.c31
1 files changed, 31 insertions, 0 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 88e708be1f64..eb7180db3033 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -174,6 +174,17 @@ static int __init hugetlb_setup(char *s)
174} 174}
175__setup("hugepages=", hugetlb_setup); 175__setup("hugepages=", hugetlb_setup);
176 176
177static unsigned int cpuset_mems_nr(unsigned int *array)
178{
179 int node;
180 unsigned int nr = 0;
181
182 for_each_node_mask(node, cpuset_current_mems_allowed)
183 nr += array[node];
184
185 return nr;
186}
187
177#ifdef CONFIG_SYSCTL 188#ifdef CONFIG_SYSCTL
178static void update_and_free_page(struct page *page) 189static void update_and_free_page(struct page *page)
179{ 190{
@@ -819,6 +830,26 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to)
819 chg = region_chg(&inode->i_mapping->private_list, from, to); 830 chg = region_chg(&inode->i_mapping->private_list, from, to);
820 if (chg < 0) 831 if (chg < 0)
821 return chg; 832 return chg;
833 /*
834 * When cpuset is configured, it breaks the strict hugetlb page
835 * reservation as the accounting is done on a global variable. Such
836 * reservation is completely rubbish in the presence of cpuset because
837 * the reservation is not checked against page availability for the
838 * current cpuset. Application can still potentially OOM'ed by kernel
839 * with lack of free htlb page in cpuset that the task is in.
840 * Attempt to enforce strict accounting with cpuset is almost
841 * impossible (or too ugly) because cpuset is too fluid that
842 * task or memory node can be dynamically moved between cpusets.
843 *
844 * The change of semantics for shared hugetlb mapping with cpuset is
845 * undesirable. However, in order to preserve some of the semantics,
846 * we fall back to check against current free page availability as
847 * a best attempt and hopefully to minimize the impact of changing
848 * semantics that cpuset has.
849 */
850 if (chg > cpuset_mems_nr(free_huge_pages_node))
851 return -ENOMEM;
852
822 ret = hugetlb_acct_memory(chg); 853 ret = hugetlb_acct_memory(chg);
823 if (ret < 0) 854 if (ret < 0)
824 return ret; 855 return ret;