From de33c8db5910cda599899dd431cc30d7c1018cbf Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 29 Jan 2009 17:46:42 -0800 Subject: Fix OOPS in mmap_region() when merging adjacent VM_LOCKED file segments As of commit ba470de43188cdbff795b5da43a1474523c6c2fb ("map: handle mlocked pages during map, remap, unmap") we now use the 'vma' variable at the end of mmap_region() to handle the page-in of newly mapped mlocked pages. However, if we merged adjacent vma's together, the vma we're using may be stale. We historically consciously avoided using it after the merge operation, but that got overlooked when redoing the locked page handling. This commit simplifies mmap_region() by doing any vma merges early, avoiding the issue entirely, and 'vma' will always be valid. As pointed out by Hugh Dickins, this depends on any drivers that change the page offset of flags to have set one of the VM_SPECIAL bits (so that they cannot trigger the early merge logic), but that's true in general. Reported-and-tested-by: Maksim Yevmenkin Cc: Lee Schermerhorn Cc: Nick Piggin Cc: Andrew Morton Cc: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/mmap.c | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index 8d95902e9a38..d3fa10a726cf 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1134,16 +1134,11 @@ munmap_back: } /* - * Can we just expand an old private anonymous mapping? - * The VM_SHARED test is necessary because shmem_zero_setup - * will create the file object for a shared anonymous map below. + * Can we just expand an old mapping? */ - if (!file && !(vm_flags & VM_SHARED)) { - vma = vma_merge(mm, prev, addr, addr + len, vm_flags, - NULL, NULL, pgoff, NULL); - if (vma) - goto out; - } + vma = vma_merge(mm, prev, addr, addr + len, vm_flags, NULL, file, pgoff, NULL); + if (vma) + goto out; /* * Determine the object being mapped and call the appropriate @@ -1206,17 +1201,8 @@ munmap_back: if (vma_wants_writenotify(vma)) vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED); - if (file && vma_merge(mm, prev, addr, vma->vm_end, - vma->vm_flags, NULL, file, pgoff, vma_policy(vma))) { - mpol_put(vma_policy(vma)); - kmem_cache_free(vm_area_cachep, vma); - fput(file); - if (vm_flags & VM_EXECUTABLE) - removed_exe_file_vma(mm); - } else { - vma_link(mm, vma, prev, rb_link, rb_parent); - file = vma->vm_file; - } + vma_link(mm, vma, prev, rb_link, rb_parent); + file = vma->vm_file; /* Once vma denies write, undo our temporary denial count */ if (correct_wcount) -- cgit v1.2.2 From 33bfad54b58cf05cfe6678c3ec9235d4bc8db4c2 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 30 Jan 2009 11:37:22 -0800 Subject: Allow opportunistic merging of VM_CAN_NONLINEAR areas Commit de33c8db5910cda599899dd431cc30d7c1018cbf ("Fix OOPS in mmap_region() when merging adjacent VM_LOCKED file segments") unified the vma merging of anonymous and file maps to just one place, which simplified the code and fixed a use-after-free bug that could cause an oops. But by doing the merge opportunistically before even having called ->mmap() on the file method, it now compares two different 'vm_flags' values: the pre-mmap() value of the new not-yet-formed vma, and previous mappings of the same file around it. And in doing so, it refused to merge the common file case, which adds a marker to say "I can be made non-linear". This fixes it by just adding a set of flags that don't have to match, because we know they are ok to merge. Currently it's only that single VM_CAN_NONLINEAR flag, but at least conceptually there could be others in the future. Reported-and-acked-by: Hugh Dickins Cc: Lee Schermerhorn Cc: Nick Piggin Cc: Andrew Morton Cc: Greg KH Signed-off-by: Linus Torvalds --- mm/mmap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index d3fa10a726cf..c581df14d0de 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -658,6 +658,9 @@ again: remove_next = 1 + (end > next->vm_end); validate_mm(mm); } +/* Flags that can be inherited from an existing mapping when merging */ +#define VM_MERGEABLE_FLAGS (VM_CAN_NONLINEAR) + /* * If the vma has a ->close operation then the driver probably needs to release * per-vma resources, so we don't attempt to merge those. @@ -665,7 +668,7 @@ again: remove_next = 1 + (end > next->vm_end); static inline int is_mergeable_vma(struct vm_area_struct *vma, struct file *file, unsigned long vm_flags) { - if (vma->vm_flags != vm_flags) + if ((vma->vm_flags ^ vm_flags) & ~VM_MERGEABLE_FLAGS) return 0; if (vma->vm_file != file) return 0; -- cgit v1.2.2 From fc8744adc870a8d4366908221508bb113d8b72ee Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 31 Jan 2009 15:08:56 -0800 Subject: Stop playing silly games with the VM_ACCOUNT flag The mmap_region() code would temporarily set the VM_ACCOUNT flag for anonymous shared mappings just to inform shmem_zero_setup() that it should enable accounting for the resulting shm object. It would then clear the flag after calling ->mmap (for the /dev/zero case) or doing shmem_zero_setup() (for the MAP_ANON case). This just resulted in vma merge issues, but also made for just unnecessary confusion. Use the already-existing VM_NORESERVE flag for this instead, and let shmem_{zero|file}_setup() just figure it out from that. This also happens to make it obvious that the new DRI2 GEM layer uses a non-reserving backing store for its object allocation - which is quite possibly not intentional. But since I didn't want to change semantics in this patch, I left it alone, and just updated the caller to use the new flag semantics. Signed-off-by: Linus Torvalds --- mm/mmap.c | 48 +++++++++++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 23 deletions(-) (limited to 'mm/mmap.c') diff --git a/mm/mmap.c b/mm/mmap.c index c581df14d0de..214b6a258eeb 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1090,6 +1090,15 @@ int vma_wants_writenotify(struct vm_area_struct *vma) mapping_cap_account_dirty(vma->vm_file->f_mapping); } +/* + * We account for memory if it's a private writeable mapping, + * and VM_NORESERVE wasn't set. + */ +static inline int accountable_mapping(unsigned int vm_flags) +{ + return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; +} + unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, unsigned long flags, unsigned int vm_flags, unsigned long pgoff, @@ -1117,23 +1126,24 @@ munmap_back: if (!may_expand_vm(mm, len >> PAGE_SHIFT)) return -ENOMEM; - if (flags & MAP_NORESERVE) + /* + * Set 'VM_NORESERVE' if we should not account for the + * memory use of this mapping. We only honor MAP_NORESERVE + * if we're allowed to overcommit memory. + */ + if ((flags & MAP_NORESERVE) && sysctl_overcommit_memory != OVERCOMMIT_NEVER) + vm_flags |= VM_NORESERVE; + if (!accountable) vm_flags |= VM_NORESERVE; - if (accountable && (!(flags & MAP_NORESERVE) || - sysctl_overcommit_memory == OVERCOMMIT_NEVER)) { - if (vm_flags & VM_SHARED) { - /* Check memory availability in shmem_file_setup? */ - vm_flags |= VM_ACCOUNT; - } else if (vm_flags & VM_WRITE) { - /* - * Private writable mapping: check memory availability - */ - charged = len >> PAGE_SHIFT; - if (security_vm_enough_memory(charged)) - return -ENOMEM; - vm_flags |= VM_ACCOUNT; - } + /* + * Private writable mapping: check memory availability + */ + if (accountable_mapping(vm_flags)) { + charged = len >> PAGE_SHIFT; + if (security_vm_enough_memory(charged)) + return -ENOMEM; + vm_flags |= VM_ACCOUNT; } /* @@ -1184,14 +1194,6 @@ munmap_back: goto free_vma; } - /* We set VM_ACCOUNT in a shared mapping's vm_flags, to inform - * shmem_zero_setup (perhaps called through /dev/zero's ->mmap) - * that memory reservation must be checked; but that reservation - * belongs to shared memory object, not to vma: so now clear it. - */ - if ((vm_flags & (VM_SHARED|VM_ACCOUNT)) == (VM_SHARED|VM_ACCOUNT)) - vma->vm_flags &= ~VM_ACCOUNT; - /* Can addr have changed?? * * Answer: Yes, several device drivers can do it in their -- cgit v1.2.2