aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Whitcroft <apw@shadowen.org>2008-07-24 00:27:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-24 13:47:16 -0400
commitc37f9fb11c976ffc08200d631dada6dcbfd07ea4 (patch)
tree2598f0713e4e61ed90335db1745decf2da6086a4
parent9682290484370ce68ba23cd2ec2838e301934199 (diff)
hugetlb: allow huge page mappings to be created without reservations
By default all shared mappings and most private mappings now have reservations associated with them. This improves semantics by providing allocation guarentees to the mapper. However a small number of applications may attempt to make very large sparse mappings, with these strict reservations the system will never be able to honour the mapping. This patch set brings MAP_NORESERVE support to hugetlb files. This allows new mappings to be made to hugetlbfs files without an associated reservation, for both shared and private mappings. This allows applications which want to create very sparse mappings to opt-out of the reservation system. Obviously as there is no reservation they are liable to fault at runtime if the huge page pool becomes exhausted; buyer beware. Signed-off-by: Andy Whitcroft <apw@shadowen.org> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Adam Litke <agl@us.ibm.com> Cc: Johannes Weiner <hannes@saeurebad.de> Cc: Andy Whitcroft <apw@shadowen.org> Cc: William Lee Irwin III <wli@holomorphy.com> Cc: Hugh Dickins <hugh@veritas.com> Cc: Michael Kerrisk <mtk.manpages@googlemail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/hugetlb.c58
1 files changed, 53 insertions, 5 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 05bc9af4fca9..72acbb29d2cc 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -247,6 +247,9 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
247/* Decrement the reserved pages in the hugepage pool by one */ 247/* Decrement the reserved pages in the hugepage pool by one */
248static void decrement_hugepage_resv_vma(struct vm_area_struct *vma) 248static void decrement_hugepage_resv_vma(struct vm_area_struct *vma)
249{ 249{
250 if (vma->vm_flags & VM_NORESERVE)
251 return;
252
250 if (vma->vm_flags & VM_SHARED) { 253 if (vma->vm_flags & VM_SHARED) {
251 /* Shared mappings always use reserves */ 254 /* Shared mappings always use reserves */
252 resv_huge_pages--; 255 resv_huge_pages--;
@@ -720,25 +723,65 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
720 } 723 }
721} 724}
722 725
726/*
727 * Determine if the huge page at addr within the vma has an associated
728 * reservation. Where it does not we will need to logically increase
729 * reservation and actually increase quota before an allocation can occur.
730 * Where any new reservation would be required the reservation change is
731 * prepared, but not committed. Once the page has been quota'd allocated
732 * an instantiated the change should be committed via vma_commit_reservation.
733 * No action is required on failure.
734 */
735static int vma_needs_reservation(struct vm_area_struct *vma, unsigned long addr)
736{
737 struct address_space *mapping = vma->vm_file->f_mapping;
738 struct inode *inode = mapping->host;
739
740 if (vma->vm_flags & VM_SHARED) {
741 pgoff_t idx = vma_pagecache_offset(vma, addr);
742 return region_chg(&inode->i_mapping->private_list,
743 idx, idx + 1);
744
745 } else {
746 if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER))
747 return 1;
748 }
749
750 return 0;
751}
752static void vma_commit_reservation(struct vm_area_struct *vma,
753 unsigned long addr)
754{
755 struct address_space *mapping = vma->vm_file->f_mapping;
756 struct inode *inode = mapping->host;
757
758 if (vma->vm_flags & VM_SHARED) {
759 pgoff_t idx = vma_pagecache_offset(vma, addr);
760 region_add(&inode->i_mapping->private_list, idx, idx + 1);
761 }
762}
763
723static struct page *alloc_huge_page(struct vm_area_struct *vma, 764static struct page *alloc_huge_page(struct vm_area_struct *vma,
724 unsigned long addr, int avoid_reserve) 765 unsigned long addr, int avoid_reserve)
725{ 766{
726 struct page *page; 767 struct page *page;
727 struct address_space *mapping = vma->vm_file->f_mapping; 768 struct address_space *mapping = vma->vm_file->f_mapping;
728 struct inode *inode = mapping->host; 769 struct inode *inode = mapping->host;
729 unsigned int chg = 0; 770 unsigned int chg;
730 771
731 /* 772 /*
732 * Processes that did not create the mapping will have no reserves and 773 * Processes that did not create the mapping will have no reserves and
733 * will not have accounted against quota. Check that the quota can be 774 * will not have accounted against quota. Check that the quota can be
734 * made before satisfying the allocation 775 * made before satisfying the allocation
776 * MAP_NORESERVE mappings may also need pages and quota allocated
777 * if no reserve mapping overlaps.
735 */ 778 */
736 if (!(vma->vm_flags & VM_SHARED) && 779 chg = vma_needs_reservation(vma, addr);
737 !is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { 780 if (chg < 0)
738 chg = 1; 781 return ERR_PTR(chg);
782 if (chg)
739 if (hugetlb_get_quota(inode->i_mapping, chg)) 783 if (hugetlb_get_quota(inode->i_mapping, chg))
740 return ERR_PTR(-ENOSPC); 784 return ERR_PTR(-ENOSPC);
741 }
742 785
743 spin_lock(&hugetlb_lock); 786 spin_lock(&hugetlb_lock);
744 page = dequeue_huge_page_vma(vma, addr, avoid_reserve); 787 page = dequeue_huge_page_vma(vma, addr, avoid_reserve);
@@ -755,6 +798,8 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
755 set_page_refcounted(page); 798 set_page_refcounted(page);
756 set_page_private(page, (unsigned long) mapping); 799 set_page_private(page, (unsigned long) mapping);
757 800
801 vma_commit_reservation(vma, addr);
802
758 return page; 803 return page;
759} 804}
760 805
@@ -1560,6 +1605,9 @@ int hugetlb_reserve_pages(struct inode *inode,
1560{ 1605{
1561 long ret, chg; 1606 long ret, chg;
1562 1607
1608 if (vma && vma->vm_flags & VM_NORESERVE)
1609 return 0;
1610
1563 /* 1611 /*
1564 * Shared mappings base their reservation on the number of pages that 1612 * Shared mappings base their reservation on the number of pages that
1565 * are already allocated on behalf of the file. Private mappings need 1613 * are already allocated on behalf of the file. Private mappings need