aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mmap.c
diff options
context:
space:
mode:
authorMel Gorman <mel@csn.ul.ie>2009-02-10 09:02:27 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-02-10 13:48:42 -0500
commit5a6fe125950676015f5108fb71b2a67441755003 (patch)
treec985fac46de39392466c4917c497b50bdc9c0757 /mm/mmap.c
parent4c098bcd55fad34dcf224bf8343db6a9ac58fc68 (diff)
Do not account for the address space used by hugetlbfs using VM_ACCOUNT
When overcommit is disabled, the core VM accounts for pages used by anonymous shared, private mappings and special mappings. It keeps track of VMAs that should be accounted for with VM_ACCOUNT and VMAs that never had a reserve with VM_NORESERVE. Overcommit for hugetlbfs is much riskier than overcommit for base pages due to contiguity requirements. It avoids overcommiting on both shared and private mappings using reservation counters that are checked and updated during mmap(). This ensures (within limits) that hugepages exist in the future when faults occurs or it is too easy to applications to be SIGKILLed. As hugetlbfs makes its own reservations of a different unit to the base page size, VM_ACCOUNT should never be set. Even if the units were correct, we would double account for the usage in the core VM and hugetlbfs. VM_NORESERVE may be set because an application can request no reserves be made for hugetlbfs at the risk of getting killed later. With commit fc8744adc870a8d4366908221508bb113d8b72ee, VM_NORESERVE and VM_ACCOUNT are getting unconditionally set for hugetlbfs-backed mappings. This breaks the accounting for both the core VM and hugetlbfs, can trigger an OOM storm when hugepage pools are too small lockups and corrupted counters otherwise are used. This patch brings hugetlbfs more in line with how the core VM treats VM_NORESERVE but prevents VM_ACCOUNT being set. Signed-off-by: Mel Gorman <mel@csn.ul.ie> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c38
1 files changed, 22 insertions, 16 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 214b6a258eeb..eb1270bebe67 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -918,7 +918,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
918 struct inode *inode; 918 struct inode *inode;
919 unsigned int vm_flags; 919 unsigned int vm_flags;
920 int error; 920 int error;
921 int accountable = 1;
922 unsigned long reqprot = prot; 921 unsigned long reqprot = prot;
923 922
924 /* 923 /*
@@ -1019,8 +1018,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1019 return -EPERM; 1018 return -EPERM;
1020 vm_flags &= ~VM_MAYEXEC; 1019 vm_flags &= ~VM_MAYEXEC;
1021 } 1020 }
1022 if (is_file_hugepages(file))
1023 accountable = 0;
1024 1021
1025 if (!file->f_op || !file->f_op->mmap) 1022 if (!file->f_op || !file->f_op->mmap)
1026 return -ENODEV; 1023 return -ENODEV;
@@ -1053,8 +1050,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1053 if (error) 1050 if (error)
1054 return error; 1051 return error;
1055 1052
1056 return mmap_region(file, addr, len, flags, vm_flags, pgoff, 1053 return mmap_region(file, addr, len, flags, vm_flags, pgoff);
1057 accountable);
1058} 1054}
1059EXPORT_SYMBOL(do_mmap_pgoff); 1055EXPORT_SYMBOL(do_mmap_pgoff);
1060 1056
@@ -1092,17 +1088,23 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
1092 1088
1093/* 1089/*
1094 * We account for memory if it's a private writeable mapping, 1090 * We account for memory if it's a private writeable mapping,
1095 * and VM_NORESERVE wasn't set. 1091 * not hugepages and VM_NORESERVE wasn't set.
1096 */ 1092 */
1097static inline int accountable_mapping(unsigned int vm_flags) 1093static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
1098{ 1094{
1095 /*
1096 * hugetlb has its own accounting separate from the core VM
1097 * VM_HUGETLB may not be set yet so we cannot check for that flag.
1098 */
1099 if (file && is_file_hugepages(file))
1100 return 0;
1101
1099 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; 1102 return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
1100} 1103}
1101 1104
1102unsigned long mmap_region(struct file *file, unsigned long addr, 1105unsigned long mmap_region(struct file *file, unsigned long addr,
1103 unsigned long len, unsigned long flags, 1106 unsigned long len, unsigned long flags,
1104 unsigned int vm_flags, unsigned long pgoff, 1107 unsigned int vm_flags, unsigned long pgoff)
1105 int accountable)
1106{ 1108{
1107 struct mm_struct *mm = current->mm; 1109 struct mm_struct *mm = current->mm;
1108 struct vm_area_struct *vma, *prev; 1110 struct vm_area_struct *vma, *prev;
@@ -1128,18 +1130,22 @@ munmap_back:
1128 1130
1129 /* 1131 /*
1130 * Set 'VM_NORESERVE' if we should not account for the 1132 * Set 'VM_NORESERVE' if we should not account for the
1131 * memory use of this mapping. We only honor MAP_NORESERVE 1133 * memory use of this mapping.
1132 * if we're allowed to overcommit memory.
1133 */ 1134 */
1134 if ((flags & MAP_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER) 1135 if ((flags & MAP_NORESERVE)) {
1135 vm_flags |= VM_NORESERVE; 1136 /* We honor MAP_NORESERVE if allowed to overcommit */
1136 if (!accountable) 1137 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1137 vm_flags |= VM_NORESERVE; 1138 vm_flags |= VM_NORESERVE;
1139
1140 /* hugetlb applies strict overcommit unless MAP_NORESERVE */
1141 if (file && is_file_hugepages(file))
1142 vm_flags |= VM_NORESERVE;
1143 }
1138 1144
1139 /* 1145 /*
1140 * Private writable mapping: check memory availability 1146 * Private writable mapping: check memory availability
1141 */ 1147 */
1142 if (accountable_mapping(vm_flags)) { 1148 if (accountable_mapping(file, vm_flags)) {
1143 charged = len >> PAGE_SHIFT; 1149 charged = len >> PAGE_SHIFT;
1144 if (security_vm_enough_memory(charged)) 1150 if (security_vm_enough_memory(charged))
1145 return -ENOMEM; 1151 return -ENOMEM;