diff options
author | Mel Gorman <mel@csn.ul.ie> | 2009-02-10 09:02:27 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-02-10 13:48:42 -0500 |
commit | 5a6fe125950676015f5108fb71b2a67441755003 (patch) | |
tree | c985fac46de39392466c4917c497b50bdc9c0757 /mm/hugetlb.c | |
parent | 4c098bcd55fad34dcf224bf8343db6a9ac58fc68 (diff) |
Do not account for the address space used by hugetlbfs using VM_ACCOUNT
When overcommit is disabled, the core VM accounts for pages used by anonymous
shared, private mappings and special mappings. It keeps track of VMAs that
should be accounted for with VM_ACCOUNT and VMAs that never had a reserve
with VM_NORESERVE.
Overcommit for hugetlbfs is much riskier than overcommit for base pages
due to contiguity requirements. It avoids overcommiting on both shared and
private mappings using reservation counters that are checked and updated
during mmap(). This ensures (within limits) that hugepages exist in the
future when faults occurs or it is too easy to applications to be SIGKILLed.
As hugetlbfs makes its own reservations of a different unit to the base page
size, VM_ACCOUNT should never be set. Even if the units were correct, we would
double account for the usage in the core VM and hugetlbfs. VM_NORESERVE may
be set because an application can request no reserves be made for hugetlbfs
at the risk of getting killed later.
With commit fc8744adc870a8d4366908221508bb113d8b72ee, VM_NORESERVE and
VM_ACCOUNT are getting unconditionally set for hugetlbfs-backed mappings. This
breaks the accounting for both the core VM and hugetlbfs, can trigger an
OOM storm when hugepage pools are too small lockups and corrupted counters
otherwise are used. This patch brings hugetlbfs more in line with how the
core VM treats VM_NORESERVE but prevents VM_ACCOUNT being set.
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 39 |
1 files changed, 25 insertions, 14 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 618e9830408..20746420954 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2269,14 +2269,12 @@ void hugetlb_change_protection(struct vm_area_struct *vma, | |||
2269 | 2269 | ||
2270 | int hugetlb_reserve_pages(struct inode *inode, | 2270 | int hugetlb_reserve_pages(struct inode *inode, |
2271 | long from, long to, | 2271 | long from, long to, |
2272 | struct vm_area_struct *vma) | 2272 | struct vm_area_struct *vma, |
2273 | int acctflag) | ||
2273 | { | 2274 | { |
2274 | long ret, chg; | 2275 | long ret = 0, chg; |
2275 | struct hstate *h = hstate_inode(inode); | 2276 | struct hstate *h = hstate_inode(inode); |
2276 | 2277 | ||
2277 | if (vma && vma->vm_flags & VM_NORESERVE) | ||
2278 | return 0; | ||
2279 | |||
2280 | /* | 2278 | /* |
2281 | * Shared mappings base their reservation on the number of pages that | 2279 | * Shared mappings base their reservation on the number of pages that |
2282 | * are already allocated on behalf of the file. Private mappings need | 2280 | * are already allocated on behalf of the file. Private mappings need |
@@ -2285,22 +2283,25 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
2285 | */ | 2283 | */ |
2286 | if (!vma || vma->vm_flags & VM_SHARED) | 2284 | if (!vma || vma->vm_flags & VM_SHARED) |
2287 | chg = region_chg(&inode->i_mapping->private_list, from, to); | 2285 | chg = region_chg(&inode->i_mapping->private_list, from, to); |
2288 | else { | 2286 | else |
2289 | struct resv_map *resv_map = resv_map_alloc(); | ||
2290 | if (!resv_map) | ||
2291 | return -ENOMEM; | ||
2292 | |||
2293 | chg = to - from; | 2287 | chg = to - from; |
2294 | 2288 | ||
2295 | set_vma_resv_map(vma, resv_map); | ||
2296 | set_vma_resv_flags(vma, HPAGE_RESV_OWNER); | ||
2297 | } | ||
2298 | |||
2299 | if (chg < 0) | 2289 | if (chg < 0) |
2300 | return chg; | 2290 | return chg; |
2301 | 2291 | ||
2302 | if (hugetlb_get_quota(inode->i_mapping, chg)) | 2292 | if (hugetlb_get_quota(inode->i_mapping, chg)) |
2303 | return -ENOSPC; | 2293 | return -ENOSPC; |
2294 | |||
2295 | /* | ||
2296 | * Only apply hugepage reservation if asked. We still have to | ||
2297 | * take the filesystem quota because it is an upper limit | ||
2298 | * defined for the mount and not necessarily memory as a whole | ||
2299 | */ | ||
2300 | if (acctflag & VM_NORESERVE) { | ||
2301 | reset_vma_resv_huge_pages(vma); | ||
2302 | return 0; | ||
2303 | } | ||
2304 | |||
2304 | ret = hugetlb_acct_memory(h, chg); | 2305 | ret = hugetlb_acct_memory(h, chg); |
2305 | if (ret < 0) { | 2306 | if (ret < 0) { |
2306 | hugetlb_put_quota(inode->i_mapping, chg); | 2307 | hugetlb_put_quota(inode->i_mapping, chg); |
@@ -2308,6 +2309,16 @@ int hugetlb_reserve_pages(struct inode *inode, | |||
2308 | } | 2309 | } |
2309 | if (!vma || vma->vm_flags & VM_SHARED) | 2310 | if (!vma || vma->vm_flags & VM_SHARED) |
2310 | region_add(&inode->i_mapping->private_list, from, to); | 2311 | region_add(&inode->i_mapping->private_list, from, to); |
2312 | else { | ||
2313 | struct resv_map *resv_map = resv_map_alloc(); | ||
2314 | |||
2315 | if (!resv_map) | ||
2316 | return -ENOMEM; | ||
2317 | |||
2318 | set_vma_resv_map(vma, resv_map); | ||
2319 | set_vma_resv_flags(vma, HPAGE_RESV_OWNER); | ||
2320 | } | ||
2321 | |||
2311 | return 0; | 2322 | return 0; |
2312 | } | 2323 | } |
2313 | 2324 | ||