aboutsummaryrefslogtreecommitdiffstats
path: root/ipc
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2009-02-10 09:02:27 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-02-10 13:48:42 -0500
commit5a6fe125950676015f5108fb71b2a67441755003 (patch)
treec985fac46de39392466c4917c497b50bdc9c0757 /ipc
parent4c098bcd55fad34dcf224bf8343db6a9ac58fc68 (diff)
Do not account for the address space used by hugetlbfs using VM_ACCOUNT
When overcommit is disabled, the core VM accounts for pages used by anonymous shared, private mappings and special mappings. It keeps track of VMAs that should be accounted for with VM_ACCOUNT and VMAs that never had a reserve with VM_NORESERVE. Overcommit for hugetlbfs is much riskier than overcommit for base pages due to contiguity requirements. It avoids overcommiting on both shared and private mappings using reservation counters that are checked and updated during mmap(). This ensures (within limits) that hugepages exist in the future when faults occurs or it is too easy to applications to be SIGKILLed. As hugetlbfs makes its own reservations of a different unit to the base page size, VM_ACCOUNT should never be set. Even if the units were correct, we would double account for the usage in the core VM and hugetlbfs. VM_NORESERVE may be set because an application can request no reserves be made for hugetlbfs at the risk of getting killed later. With commit fc8744adc870a8d4366908221508bb113d8b72ee, VM_NORESERVE and VM_ACCOUNT are getting unconditionally set for hugetlbfs-backed mappings. This breaks the accounting for both the core VM and hugetlbfs, can trigger an OOM storm when hugepage pools are too small lockups and corrupted counters otherwise are used. This patch brings hugetlbfs more in line with how the core VM treats VM_NORESERVE but prevents VM_ACCOUNT being set. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'ipc')
-rw-r--r--ipc/shm.c8
1 files changed, 5 insertions, 3 deletions
diff --git a/ipc/shm.c b/ipc/shm.c
index f8f69fad3a2..05d51d2a792 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -340,6 +340,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
340 struct file * file; 340 struct file * file;
341 char name[13]; 341 char name[13];
342 int id; 342 int id;
343 int acctflag = 0;
343 344
344 if (size < SHMMIN || size > ns->shm_ctlmax) 345 if (size < SHMMIN || size > ns->shm_ctlmax)
345 return -EINVAL; 346 return -EINVAL;
@@ -364,11 +365,12 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
364 365
365 sprintf (name, "SYSV%08x", key); 366 sprintf (name, "SYSV%08x", key);
366 if (shmflg & SHM_HUGETLB) { 367 if (shmflg & SHM_HUGETLB) {
367 /* hugetlb_file_setup takes care of mlock user accounting */ 368 /* hugetlb_file_setup applies strict accounting */
368 file = hugetlb_file_setup(name, size); 369 if (shmflg & SHM_NORESERVE)
370 acctflag = VM_NORESERVE;
371 file = hugetlb_file_setup(name, size, acctflag);
369 shp->mlock_user = current_user(); 372 shp->mlock_user = current_user();
370 } else { 373 } else {
371 int acctflag = 0;
372 /* 374 /*
373 * Do not allow no accounting for OVERCOMMIT_NEVER, even 375 * Do not allow no accounting for OVERCOMMIT_NEVER, even
374 * if it's asked for. 376 * if it's asked for.