diff options
-rw-r--r-- | mm/hugetlb.c | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 88e708be1f64..eb7180db3033 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -174,6 +174,17 @@ static int __init hugetlb_setup(char *s) | |||
174 | } | 174 | } |
175 | __setup("hugepages=", hugetlb_setup); | 175 | __setup("hugepages=", hugetlb_setup); |
176 | 176 | ||
177 | static unsigned int cpuset_mems_nr(unsigned int *array) | ||
178 | { | ||
179 | int node; | ||
180 | unsigned int nr = 0; | ||
181 | |||
182 | for_each_node_mask(node, cpuset_current_mems_allowed) | ||
183 | nr += array[node]; | ||
184 | |||
185 | return nr; | ||
186 | } | ||
187 | |||
177 | #ifdef CONFIG_SYSCTL | 188 | #ifdef CONFIG_SYSCTL |
178 | static void update_and_free_page(struct page *page) | 189 | static void update_and_free_page(struct page *page) |
179 | { | 190 | { |
@@ -819,6 +830,26 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to) | |||
819 | chg = region_chg(&inode->i_mapping->private_list, from, to); | 830 | chg = region_chg(&inode->i_mapping->private_list, from, to); |
820 | if (chg < 0) | 831 | if (chg < 0) |
821 | return chg; | 832 | return chg; |
833 | /* | ||
834 | * When cpuset is configured, it breaks the strict hugetlb page | ||
835 | * reservation as the accounting is done on a global variable. Such | ||
836 | * reservation is completely rubbish in the presence of cpuset because | ||
837 | * the reservation is not checked against page availability for the | ||
838 | * current cpuset. Application can still potentially OOM'ed by kernel | ||
839 | * with lack of free htlb page in cpuset that the task is in. | ||
840 | * Attempt to enforce strict accounting with cpuset is almost | ||
841 | * impossible (or too ugly) because cpuset is too fluid that | ||
842 | * task or memory node can be dynamically moved between cpusets. | ||
843 | * | ||
844 | * The change of semantics for shared hugetlb mapping with cpuset is | ||
845 | * undesirable. However, in order to preserve some of the semantics, | ||
846 | * we fall back to check against current free page availability as | ||
847 | * a best attempt and hopefully to minimize the impact of changing | ||
848 | * semantics that cpuset has. | ||
849 | */ | ||
850 | if (chg > cpuset_mems_nr(free_huge_pages_node)) | ||
851 | return -ENOMEM; | ||
852 | |||
822 | ret = hugetlb_acct_memory(chg); | 853 | ret = hugetlb_acct_memory(chg); |
823 | if (ret < 0) | 854 | if (ret < 0) |
824 | return ret; | 855 | return ret; |