aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2009-02-10 09:02:27 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-02-10 13:48:42 -0500
commit5a6fe125950676015f5108fb71b2a67441755003 (patch)
treec985fac46de39392466c4917c497b50bdc9c0757 /mm/hugetlb.c
parent4c098bcd55fad34dcf224bf8343db6a9ac58fc68 (diff)
Do not account for the address space used by hugetlbfs using VM_ACCOUNT
When overcommit is disabled, the core VM accounts for pages used by anonymous shared, private mappings and special mappings. It keeps track of VMAs that should be accounted for with VM_ACCOUNT and VMAs that never had a reserve with VM_NORESERVE. Overcommit for hugetlbfs is much riskier than overcommit for base pages due to contiguity requirements. It avoids overcommiting on both shared and private mappings using reservation counters that are checked and updated during mmap(). This ensures (within limits) that hugepages exist in the future when faults occurs or it is too easy to applications to be SIGKILLed. As hugetlbfs makes its own reservations of a different unit to the base page size, VM_ACCOUNT should never be set. Even if the units were correct, we would double account for the usage in the core VM and hugetlbfs. VM_NORESERVE may be set because an application can request no reserves be made for hugetlbfs at the risk of getting killed later. With commit fc8744adc870a8d4366908221508bb113d8b72ee, VM_NORESERVE and VM_ACCOUNT are getting unconditionally set for hugetlbfs-backed mappings. This breaks the accounting for both the core VM and hugetlbfs, can trigger an OOM storm when hugepage pools are too small lockups and corrupted counters otherwise are used. This patch brings hugetlbfs more in line with how the core VM treats VM_NORESERVE but prevents VM_ACCOUNT being set. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c39
1 files changed, 25 insertions, 14 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 618e9830408..20746420954 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2269,14 +2269,12 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
2269 2269
2270int hugetlb_reserve_pages(struct inode *inode, 2270int hugetlb_reserve_pages(struct inode *inode,
2271 long from, long to, 2271 long from, long to,
2272 struct vm_area_struct *vma) 2272 struct vm_area_struct *vma,
2273 int acctflag)
2273{ 2274{
2274 long ret, chg; 2275 long ret = 0, chg;
2275 struct hstate *h = hstate_inode(inode); 2276 struct hstate *h = hstate_inode(inode);
2276 2277
2277 if (vma && vma->vm_flags & VM_NORESERVE)
2278 return 0;
2279
2280 /* 2278 /*
2281 * Shared mappings base their reservation on the number of pages that 2279 * Shared mappings base their reservation on the number of pages that
2282 * are already allocated on behalf of the file. Private mappings need 2280 * are already allocated on behalf of the file. Private mappings need
@@ -2285,22 +2283,25 @@ int hugetlb_reserve_pages(struct inode *inode,
2285 */ 2283 */
2286 if (!vma || vma->vm_flags & VM_SHARED) 2284 if (!vma || vma->vm_flags & VM_SHARED)
2287 chg = region_chg(&inode->i_mapping->private_list, from, to); 2285 chg = region_chg(&inode->i_mapping->private_list, from, to);
2288 else { 2286 else
2289 struct resv_map *resv_map = resv_map_alloc();
2290 if (!resv_map)
2291 return -ENOMEM;
2292
2293 chg = to - from; 2287 chg = to - from;
2294 2288
2295 set_vma_resv_map(vma, resv_map);
2296 set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
2297 }
2298
2299 if (chg < 0) 2289 if (chg < 0)
2300 return chg; 2290 return chg;
2301 2291
2302 if (hugetlb_get_quota(inode->i_mapping, chg)) 2292 if (hugetlb_get_quota(inode->i_mapping, chg))
2303 return -ENOSPC; 2293 return -ENOSPC;
2294
2295 /*
2296 * Only apply hugepage reservation if asked. We still have to
2297 * take the filesystem quota because it is an upper limit
2298 * defined for the mount and not necessarily memory as a whole
2299 */
2300 if (acctflag & VM_NORESERVE) {
2301 reset_vma_resv_huge_pages(vma);
2302 return 0;
2303 }
2304
2304 ret = hugetlb_acct_memory(h, chg); 2305 ret = hugetlb_acct_memory(h, chg);
2305 if (ret < 0) { 2306 if (ret < 0) {
2306 hugetlb_put_quota(inode->i_mapping, chg); 2307 hugetlb_put_quota(inode->i_mapping, chg);
@@ -2308,6 +2309,16 @@ int hugetlb_reserve_pages(struct inode *inode,
2308 } 2309 }
2309 if (!vma || vma->vm_flags & VM_SHARED) 2310 if (!vma || vma->vm_flags & VM_SHARED)
2310 region_add(&inode->i_mapping->private_list, from, to); 2311 region_add(&inode->i_mapping->private_list, from, to);
2312 else {
2313 struct resv_map *resv_map = resv_map_alloc();
2314
2315 if (!resv_map)
2316 return -ENOMEM;
2317
2318 set_vma_resv_map(vma, resv_map);
2319 set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
2320 }
2321
2311 return 0; 2322 return 0;
2312} 2323}
2313 2324