hugetlb: fix pool shrinking while in restricted cpuset

Adam Litke noticed that currently we grow the hugepage pool independent of any cpuset the running process may be in, but when shrinking the pool, the cpuset is checked. This leads to inconsistency when shrinking the pool in a restricted cpuset -- an administrator may have been able to grow the pool on a node restricted by a containing cpuset, but they cannot shrink it there. There are two options: either prevent growing of the pool outside of the cpuset or allow shrinking outside of the cpuset. >From previous discussions on linux-mm, /proc/sys/vm/nr_hugepages is an administrative interface that should not be restricted by cpusets. So allow shrinking the pool by removing pages from nodes outside of current's cpuset. Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com> Acked-by: Adam Litke <agl@us.ibm.com> Cc: William Irwin <wli@holomorphy.com> Cc: Lee Schermerhorn <Lee.Schermerhonr@hp.com> Cc: Christoph Lameter <clameter@sgi.com> Cc: Paul Jackson <pj@sgi.com> Cc: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Nishanth Aravamudan <nacc@us.ibm.com> 2008-03-04 17:29:42 -0500
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2008-03-04 19:35:18 -0500
commit: 348e1e04b5229a481891699ce86da009b793f29e (patch)
tree: 481caab1f0178e64ace723fc9bd7e36627525e39 /mm
parent: ac09b3a15154af5f081fed509c6c3662e79de785 (diff)
1 files changed, 22 insertions, 4 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 20e04c64468d..dcacc811e70e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -71,7 +71,25 @@ static void enqueue_huge_page(struct page *page)
        free_huge_pages_node[nid]++;
 }
-static struct page *dequeue_huge_page(struct vm_area_struct *vma,
+static struct page *dequeue_huge_page(void)
+{
+        int nid;
+        struct page *page = NULL;
+        for (nid = 0; nid < MAX_NUMNODES; ++nid) {
+                if (!list_empty(&hugepage_freelists[nid])) {
+                        page = list_entry(hugepage_freelists[nid].next,
+                                          struct page, lru);
+                        list_del(&page->lru);
+                        free_huge_pages--;
+                        free_huge_pages_node[nid]--;
+                        break;
+                }
+        }
+        return page;
+}
+static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
                                unsigned long address)
 {
        int nid;
@@ -410,7 +428,7 @@ static struct page *alloc_huge_page_shared(struct vm_area_struct *vma,
        struct page *page;
        spin_lock(&hugetlb_lock);
-        page = dequeue_huge_page(vma, addr);
+        page = dequeue_huge_page_vma(vma, addr);
        spin_unlock(&hugetlb_lock);
        return page ? page : ERR_PTR(-VM_FAULT_OOM);
 }
@@ -425,7 +443,7 @@ static struct page *alloc_huge_page_private(struct vm_area_struct *vma,
        spin_lock(&hugetlb_lock);
        if (free_huge_pages > resv_huge_pages)
-                page = dequeue_huge_page(vma, addr);
+                page = dequeue_huge_page_vma(vma, addr);
        spin_unlock(&hugetlb_lock);
        if (!page) {
                page = alloc_buddy_huge_page(vma, addr);
@@ -578,7 +596,7 @@ static unsigned long set_max_huge_pages(unsigned long count)
        min_count = max(count, min_count);
        try_to_free_low(min_count);
        while (min_count < persistent_huge_pages) {
-                struct page *page = dequeue_huge_page(NULL, 0);
+                struct page *page = dequeue_huge_page();
                if (!page)
                        break;
                update_and_free_page(page);
author	Nishanth Aravamudan <nacc@us.ibm.com>	2008-03-04 17:29:42 -0500
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2008-03-04 19:35:18 -0500
commit	348e1e04b5229a481891699ce86da009b793f29e (patch)
tree	481caab1f0178e64ace723fc9bd7e36627525e39 /mm
parent	ac09b3a15154af5f081fed509c6c3662e79de785 (diff)