diff options
author | David Gibson <david@gibson.dropbear.id.au> | 2006-03-22 03:08:55 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2006-03-22 10:54:03 -0500 |
commit | b45b5bd65f668a665db40d093e4e1fe563533608 (patch) | |
tree | aa3806bd87fd7aa719b561e4d468c779f6adb31b /include/linux | |
parent | 3935baa9bcda3ccaee4f7849f5157d316e34412e (diff) |
[PATCH] hugepage: Strict page reservation for hugepage inodes
These days, hugepages are demand-allocated at first fault time. There's a
somewhat dubious (and racy) heuristic when making a new mmap() to check if
there are enough available hugepages to fully satisfy that mapping.
A particularly obvious case where the heuristic breaks down is where a
process maps its hugepages not as a single chunk, but as a bunch of
individually mmap()ed (or shmat()ed) blocks without touching and
instantiating the pages in between allocations. In this case the size of
each block is compared against the total number of available hugepages.
It's thus easy for the process to become overcommitted, because each block
mapping will succeed, although the total number of hugepages required by
all blocks exceeds the number available. In particular, this defeats such
a program which will detect a mapping failure and adjust its hugepage usage
downward accordingly.
The patch below addresses this problem, by strictly reserving a number of
physical hugepages for hugepage inodes which have been mapped, but not
instatiated. MAP_SHARED mappings are thus "safe" - they will fail on
mmap(), not later with an OOM SIGKILL. MAP_PRIVATE mappings can still
trigger an OOM. (Actually SHARED mappings can technically still OOM, but
only if the sysadmin explicitly reduces the hugepage pool between mapping
and instantiation)
This patch appears to address the problem at hand - it allows DB2 to start
correctly, for instance, which previously suffered the failure described
above.
This patch causes no regressions on the libhugetblfs testsuite, and makes a
test (designed to catch this problem) pass which previously failed (ppc64,
POWER5).
Signed-off-by: David Gibson <dwg@au1.ibm.com>
Cc: William Lee Irwin III <wli@holomorphy.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'include/linux')
-rw-r--r-- | include/linux/hugetlb.h | 8 |
1 files changed, 6 insertions, 2 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index fa83836b63d2..cafe73eecb05 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h | |||
@@ -20,7 +20,6 @@ void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long) | |||
20 | int hugetlb_prefault(struct address_space *, struct vm_area_struct *); | 20 | int hugetlb_prefault(struct address_space *, struct vm_area_struct *); |
21 | int hugetlb_report_meminfo(char *); | 21 | int hugetlb_report_meminfo(char *); |
22 | int hugetlb_report_node_meminfo(int, char *); | 22 | int hugetlb_report_node_meminfo(int, char *); |
23 | int is_hugepage_mem_enough(size_t); | ||
24 | unsigned long hugetlb_total_pages(void); | 23 | unsigned long hugetlb_total_pages(void); |
25 | struct page *alloc_huge_page(struct vm_area_struct *, unsigned long); | 24 | struct page *alloc_huge_page(struct vm_area_struct *, unsigned long); |
26 | void free_huge_page(struct page *); | 25 | void free_huge_page(struct page *); |
@@ -89,7 +88,6 @@ static inline unsigned long hugetlb_total_pages(void) | |||
89 | #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) | 88 | #define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; }) |
90 | #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) | 89 | #define hugetlb_prefault(mapping, vma) ({ BUG(); 0; }) |
91 | #define unmap_hugepage_range(vma, start, end) BUG() | 90 | #define unmap_hugepage_range(vma, start, end) BUG() |
92 | #define is_hugepage_mem_enough(size) 0 | ||
93 | #define hugetlb_report_meminfo(buf) 0 | 91 | #define hugetlb_report_meminfo(buf) 0 |
94 | #define hugetlb_report_node_meminfo(n, buf) 0 | 92 | #define hugetlb_report_node_meminfo(n, buf) 0 |
95 | #define follow_huge_pmd(mm, addr, pmd, write) NULL | 93 | #define follow_huge_pmd(mm, addr, pmd, write) NULL |
@@ -132,6 +130,8 @@ struct hugetlbfs_sb_info { | |||
132 | 130 | ||
133 | struct hugetlbfs_inode_info { | 131 | struct hugetlbfs_inode_info { |
134 | struct shared_policy policy; | 132 | struct shared_policy policy; |
133 | /* Protected by the (global) hugetlb_lock */ | ||
134 | unsigned long prereserved_hpages; | ||
135 | struct inode vfs_inode; | 135 | struct inode vfs_inode; |
136 | }; | 136 | }; |
137 | 137 | ||
@@ -148,6 +148,10 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) | |||
148 | extern struct file_operations hugetlbfs_file_operations; | 148 | extern struct file_operations hugetlbfs_file_operations; |
149 | extern struct vm_operations_struct hugetlb_vm_ops; | 149 | extern struct vm_operations_struct hugetlb_vm_ops; |
150 | struct file *hugetlb_zero_setup(size_t); | 150 | struct file *hugetlb_zero_setup(size_t); |
151 | int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info, | ||
152 | unsigned long atleast_hpages); | ||
153 | void hugetlb_truncate_reservation(struct hugetlbfs_inode_info *info, | ||
154 | unsigned long atmost_hpages); | ||
151 | int hugetlb_get_quota(struct address_space *mapping); | 155 | int hugetlb_get_quota(struct address_space *mapping); |
152 | void hugetlb_put_quota(struct address_space *mapping); | 156 | void hugetlb_put_quota(struct address_space *mapping); |
153 | 157 | ||