hugetlb: split alloc_huge_page into private and shared components

Hugetlbfs implements a quota system which can limit the amount of memory that can be used by the filesystem. Before allocating a new huge page for a file, the quota is checked and debited. The quota is then credited when truncating the file. I found a few bugs in the code for both MAP_PRIVATE and MAP_SHARED mappings. Before detailing the problems and my proposed solutions, we should agree on a definition of quotas that properly addresses both private and shared pages. Since the purpose of quotas is to limit total memory consumption on a per-filesystem basis, I argue that all pages allocated by the fs (private and shared) should be charged against quota. Private Mappings ================ The current code will debit quota for private pages sometimes, but will never credit it. At a minimum, this causes a leak in the quota accounting which renders the accounting essentially useless as it is. Shared pages have a one to one mapping with a hugetlbfs file and are easy to account by debiting on allocation and crediting on truncate. Private pages are anonymous in nature and have a many to one relationship with their hugetlbfs files (due to copy on write). Because private pages are not indexed by the mapping's radix tree, thier quota cannot be credited at file truncation time. Crediting must be done when the page is unmapped and freed. Shared Pages ============ I discovered an issue concerning the interaction between the MAP_SHARED reservation system and quotas. Since quota is not checked until page instantiation, an over-quota mmap/reservation will initially succeed. When instantiating the first over-quota page, the program will receive SIGBUS. This is inconsistent since the reservation is supposed to be a guarantee. The solution is to debit the full amount of quota at reservation time and credit the unused portion when the reservation is released. This patch series brings quotas back in line by making the following modifications: * Private pages - Debit quota in alloc_huge_page() - Credit quota in free_huge_page() * Shared pages - Debit quota for entire reservation at mmap time - Credit quota for instantiated pages in free_huge_page() - Credit quota for unused reservation at munmap time This patch: The shared page reservation and dynamic pool resizing features have made the allocation of private vs. shared huge pages quite different. By splitting out the private/shared-specific portions of the process into their own functions, readability is greatly improved. alloc_huge_page now calls the proper helper and performs common operations. [akpm@linux-foundation.org: coding-style cleanups] Signed-off-by: Adam Litke <agl@us.ibm.com> Cc: Ken Chen <kenchen@google.com> Cc: Andy Whitcroft <apw@shadowen.org> Cc: Dave Hansen <haveblue@us.ibm.com> Cc: David Gibson <hermes@gibson.dropbear.id.au> Cc: William Lee Irwin III <wli@holomorphy.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Adam Litke <agl@us.ibm.com> 2007-11-14 19:59:37 -0500
committer: Linus Torvalds <torvalds@woody.linux-foundation.org> 2007-11-14 21:45:39 -0500
commit: 348ea204cc23cda35faf962414b674c57da647d7 (patch)
tree: fb27a17c13ca745bd3f0fb15d0d967bc5d5bc088
parent: 6c55be8b962f1bdc592d579e81fc27b11ea53dfc (diff)
1 files changed, 27 insertions, 19 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e2c80631d36a..f43b3dca12b5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -353,35 +353,43 @@ void return_unused_surplus_pages(unsigned long unused_resv_pages)
        }
 }
-static struct page *alloc_huge_page(struct vm_area_struct *vma,
-                                    unsigned long addr)
+static struct page *alloc_huge_page_shared(struct vm_area_struct *vma,
+                                                unsigned long addr)
 {
-        struct page *page = NULL;
+        struct page *page;
-        int use_reserved_page = vma->vm_flags & VM_MAYSHARE;
        spin_lock(&hugetlb_lock);
-        if (!use_reserved_page && (free_huge_pages <= resv_huge_pages))
-                goto fail;
        page = dequeue_huge_page(vma, addr);
-        if (!page)
-                goto fail;
        spin_unlock(&hugetlb_lock);
-        set_page_refcounted(page);
        return page;
+}
-fail:
+static struct page *alloc_huge_page_private(struct vm_area_struct *vma,
-        spin_unlock(&hugetlb_lock);
+                                                unsigned long addr)
+{
+        struct page *page = NULL;
-        /*
+        spin_lock(&hugetlb_lock);
-         * Private mappings do not use reserved huge pages so the allocation
+        if (free_huge_pages > resv_huge_pages)
-         * may have failed due to an undersized hugetlb pool.  Try to grab a
+                page = dequeue_huge_page(vma, addr);
-         * surplus huge page from the buddy allocator.
+        spin_unlock(&hugetlb_lock);
-         */
+        if (!page)
-        if (!use_reserved_page)
                page = alloc_buddy_huge_page(vma, addr);
+        return page;
+}
+static struct page *alloc_huge_page(struct vm_area_struct *vma,
+                                    unsigned long addr)
+{
+        struct page *page;
+        if (vma->vm_flags & VM_MAYSHARE)
+                page = alloc_huge_page_shared(vma, addr);
+        else
+                page = alloc_huge_page_private(vma, addr);
+        if (page)
+                set_page_refcounted(page);
        return page;
 }
author	Adam Litke <agl@us.ibm.com>	2007-11-14 19:59:37 -0500
committer	Linus Torvalds <torvalds@woody.linux-foundation.org>	2007-11-14 21:45:39 -0500
commit	348ea204cc23cda35faf962414b674c57da647d7 (patch)
tree	fb27a17c13ca745bd3f0fb15d0d967bc5d5bc088
parent	6c55be8b962f1bdc592d579e81fc27b11ea53dfc (diff)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e2c80631d36a..f43b3dca12b5 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c
@@ -353,35 +353,43 @@ void return_unused_surplus_pages(unsigned long unused_resv_pages)
353	}	353	}
354	}	354	}
355		355
356	static struct page alloc_huge_page(struct vm_area_struct vma,	356
357	unsigned long addr)	357	static struct page alloc_huge_page_shared(struct vm_area_struct vma,
		358	unsigned long addr)
358	{	359	{
359	struct page *page = NULL;	360	struct page *page;
360	int use_reserved_page = vma->vm_flags & VM_MAYSHARE;
361		361
362	spin_lock(&hugetlb_lock);	362	spin_lock(&hugetlb_lock);
363	if (!use_reserved_page && (free_huge_pages <= resv_huge_pages))
364	goto fail;
365
366	page = dequeue_huge_page(vma, addr);	363	page = dequeue_huge_page(vma, addr);
367	if (!page)
368	goto fail;
369
370	spin_unlock(&hugetlb_lock);	364	spin_unlock(&hugetlb_lock);
371	set_page_refcounted(page);
372	return page;	365	return page;
		366	}
373		367
374	fail:	368	static struct page alloc_huge_page_private(struct vm_area_struct vma,
375	spin_unlock(&hugetlb_lock);	369	unsigned long addr)
		370	{
		371	struct page *page = NULL;
376		372
377	/*	373	spin_lock(&hugetlb_lock);
378	* Private mappings do not use reserved huge pages so the allocation	374	if (free_huge_pages > resv_huge_pages)
379	* may have failed due to an undersized hugetlb pool. Try to grab a	375	page = dequeue_huge_page(vma, addr);
380	* surplus huge page from the buddy allocator.	376	spin_unlock(&hugetlb_lock);
381	*/	377	if (!page)
382	if (!use_reserved_page)
383	page = alloc_buddy_huge_page(vma, addr);	378	page = alloc_buddy_huge_page(vma, addr);
		379	return page;
		380	}
384		381
		382	static struct page alloc_huge_page(struct vm_area_struct vma,
		383	unsigned long addr)
		384	{
		385	struct page *page;
		386
		387	if (vma->vm_flags & VM_MAYSHARE)
		388	page = alloc_huge_page_shared(vma, addr);
		389	else
		390	page = alloc_huge_page_private(vma, addr);
		391	if (page)
		392	set_page_refcounted(page);
385	return page;	393	return page;
386	}	394	}
387		395