aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/hugetlb.h
diff options
context:
space:
mode:
authorDavid Gibson <david@gibson.dropbear.id.au>2006-03-22 03:08:55 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2006-03-22 10:54:03 -0500
commitb45b5bd65f668a665db40d093e4e1fe563533608 (patch)
treeaa3806bd87fd7aa719b561e4d468c779f6adb31b /include/linux/hugetlb.h
parent3935baa9bcda3ccaee4f7849f5157d316e34412e (diff)
[PATCH] hugepage: Strict page reservation for hugepage inodes
These days, hugepages are demand-allocated at first fault time. There's a somewhat dubious (and racy) heuristic when making a new mmap() to check if there are enough available hugepages to fully satisfy that mapping. A particularly obvious case where the heuristic breaks down is where a process maps its hugepages not as a single chunk, but as a bunch of individually mmap()ed (or shmat()ed) blocks without touching and instantiating the pages in between allocations. In this case the size of each block is compared against the total number of available hugepages. It's thus easy for the process to become overcommitted, because each block mapping will succeed, although the total number of hugepages required by all blocks exceeds the number available. In particular, this defeats such a program which will detect a mapping failure and adjust its hugepage usage downward accordingly. The patch below addresses this problem, by strictly reserving a number of physical hugepages for hugepage inodes which have been mapped, but not instatiated. MAP_SHARED mappings are thus "safe" - they will fail on mmap(), not later with an OOM SIGKILL. MAP_PRIVATE mappings can still trigger an OOM. (Actually SHARED mappings can technically still OOM, but only if the sysadmin explicitly reduces the hugepage pool between mapping and instantiation) This patch appears to address the problem at hand - it allows DB2 to start correctly, for instance, which previously suffered the failure described above. This patch causes no regressions on the libhugetblfs testsuite, and makes a test (designed to catch this problem) pass which previously failed (ppc64, POWER5). Signed-off-by: David Gibson <dwg@au1.ibm.com> Cc: William Lee Irwin III <wli@holomorphy.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include/linux/hugetlb.h')
-rw-r--r--include/linux/hugetlb.h8
1 files changed, 6 insertions, 2 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index fa83836b63d2..cafe73eecb05 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -20,7 +20,6 @@ void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long)
20int hugetlb_prefault(struct address_space *, struct vm_area_struct *); 20int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
21int hugetlb_report_meminfo(char *); 21int hugetlb_report_meminfo(char *);
22int hugetlb_report_node_meminfo(int, char *); 22int hugetlb_report_node_meminfo(int, char *);
23int is_hugepage_mem_enough(size_t);
24unsigned long hugetlb_total_pages(void); 23unsigned long hugetlb_total_pages(void);
25struct page *alloc_huge_page(struct vm_area_struct *, unsigned long); 24struct page *alloc_huge_page(struct vm_area_struct *, unsigned long);
26void free_huge_page(struct page *); 25void free_huge_page(struct page *);
@@ -89,7 +88,6 @@ static inline unsigned long hugetlb_total_pages(void)
89#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) 88#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
90#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) 89#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
91#define unmap_hugepage_range(vma, start, end) BUG() 90#define unmap_hugepage_range(vma, start, end) BUG()
92#define is_hugepage_mem_enough(size) 0
93#define hugetlb_report_meminfo(buf) 0 91#define hugetlb_report_meminfo(buf) 0
94#define hugetlb_report_node_meminfo(n, buf) 0 92#define hugetlb_report_node_meminfo(n, buf) 0
95#define follow_huge_pmd(mm, addr, pmd, write) NULL 93#define follow_huge_pmd(mm, addr, pmd, write) NULL
@@ -132,6 +130,8 @@ struct hugetlbfs_sb_info {
132 130
133struct hugetlbfs_inode_info { 131struct hugetlbfs_inode_info {
134 struct shared_policy policy; 132 struct shared_policy policy;
133 /* Protected by the (global) hugetlb_lock */
134 unsigned long prereserved_hpages;
135 struct inode vfs_inode; 135 struct inode vfs_inode;
136}; 136};
137 137
@@ -148,6 +148,10 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
148extern struct file_operations hugetlbfs_file_operations; 148extern struct file_operations hugetlbfs_file_operations;
149extern struct vm_operations_struct hugetlb_vm_ops; 149extern struct vm_operations_struct hugetlb_vm_ops;
150struct file *hugetlb_zero_setup(size_t); 150struct file *hugetlb_zero_setup(size_t);
151int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info,
152 unsigned long atleast_hpages);
153void hugetlb_truncate_reservation(struct hugetlbfs_inode_info *info,
154 unsigned long atmost_hpages);
151int hugetlb_get_quota(struct address_space *mapping); 155int hugetlb_get_quota(struct address_space *mapping);
152void hugetlb_put_quota(struct address_space *mapping); 156void hugetlb_put_quota(struct address_space *mapping);
153 157