aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mmap.c
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2015-02-10 17:09:46 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-10 17:30:30 -0500
commitc8d78c1823f46519473949d33f0d1d33fe21ea16 (patch)
treef098d7732dfcfb494365f02cd325be8d97e9eb37 /mm/mmap.c
parent3c4868710951dd7a6b991d71ca5f46737c4acf28 (diff)
mm: replace remap_file_pages() syscall with emulation
remap_file_pages(2) was invented to be able efficiently map parts of huge file into limited 32-bit virtual address space such as in database workloads. Nonlinear mappings are pain to support and it seems there's no legitimate use-cases nowadays since 64-bit systems are widely available. Let's drop it and get rid of all these special-cased code. The patch replaces the syscall with emulation which creates new VMA on each remap_file_pages(), unless they it can be merged with an adjacent one. I didn't find *any* real code that uses remap_file_pages(2) to test emulation impact on. I've checked Debian code search and source of all packages in ALT Linux. No real users: libc wrappers, mentions in strace, gdb, valgrind and this kind of stuff. There are few basic tests in LTP for the syscall. They work just fine with emulation. To test performance impact, I've written small test case which demonstrate pretty much worst case scenario: map 4G shmfs file, write to begin of every page pgoff of the page, remap pages in reverse order, read every page. The test creates 1 million of VMAs if emulation is in use, so I had to set vm.max_map_count to 1100000 to avoid -ENOMEM. Before: 23.3 ( +- 4.31% ) seconds After: 43.9 ( +- 0.85% ) seconds Slowdown: 1.88x I believe we can live with that. Test case: #define _GNU_SOURCE #include <assert.h> #include <stdlib.h> #include <stdio.h> #include <sys/mman.h> #define MB (1024UL * 1024) #define SIZE (4096 * MB) int main(int argc, char **argv) { unsigned long *p; long i, pass; for (pass = 0; pass < 10; pass++) { p = mmap(NULL, SIZE, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); if (p == MAP_FAILED) { perror("mmap"); return -1; } for (i = 0; i < SIZE / 4096; i++) p[i * 4096 / sizeof(*p)] = i; for (i = 0; i < SIZE / 4096; i++) { if (remap_file_pages(p + i * 4096 / sizeof(*p), 4096, 0, (SIZE - 4096 * (i + 1)) >> 12, 0)) { perror("remap_file_pages"); return -1; } } for (i = SIZE / 4096 - 1; i >= 0; i--) assert(p[i * 4096 / sizeof(*p)] == SIZE / 4096 - i - 1); munmap(p, SIZE); } return 0; } [akpm@linux-foundation.org: fix spello] [sasha.levin@oracle.com: initialize populate before usage] [sasha.levin@oracle.com: grab file ref to prevent race while mmaping] Signed-off-by: "Kirill A. Shutemov" <kirill@shutemov.name> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Dave Jones <davej@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Armin Rigo <arigo@tunes.org> Signed-off-by: Sasha Levin <sasha.levin@oracle.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c69
1 files changed, 69 insertions, 0 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 7f684d5a8087..e023dc5e59a8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2634,6 +2634,75 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
2634 return vm_munmap(addr, len); 2634 return vm_munmap(addr, len);
2635} 2635}
2636 2636
2637
2638/*
2639 * Emulation of deprecated remap_file_pages() syscall.
2640 */
2641SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
2642 unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
2643{
2644
2645 struct mm_struct *mm = current->mm;
2646 struct vm_area_struct *vma;
2647 unsigned long populate = 0;
2648 unsigned long ret = -EINVAL;
2649 struct file *file;
2650
2651 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
2652 "See Documentation/vm/remap_file_pages.txt.\n",
2653 current->comm, current->pid);
2654
2655 if (prot)
2656 return ret;
2657 start = start & PAGE_MASK;
2658 size = size & PAGE_MASK;
2659
2660 if (start + size <= start)
2661 return ret;
2662
2663 /* Does pgoff wrap? */
2664 if (pgoff + (size >> PAGE_SHIFT) < pgoff)
2665 return ret;
2666
2667 down_write(&mm->mmap_sem);
2668 vma = find_vma(mm, start);
2669
2670 if (!vma || !(vma->vm_flags & VM_SHARED))
2671 goto out;
2672
2673 if (start < vma->vm_start || start + size > vma->vm_end)
2674 goto out;
2675
2676 if (pgoff == linear_page_index(vma, start)) {
2677 ret = 0;
2678 goto out;
2679 }
2680
2681 prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
2682 prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
2683 prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
2684
2685 flags &= MAP_NONBLOCK;
2686 flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
2687 if (vma->vm_flags & VM_LOCKED) {
2688 flags |= MAP_LOCKED;
2689 /* drop PG_Mlocked flag for over-mapped range */
2690 munlock_vma_pages_range(vma, start, start + size);
2691 }
2692
2693 file = get_file(vma->vm_file);
2694 ret = do_mmap_pgoff(vma->vm_file, start, size,
2695 prot, flags, pgoff, &populate);
2696 fput(file);
2697out:
2698 up_write(&mm->mmap_sem);
2699 if (populate)
2700 mm_populate(ret, populate);
2701 if (!IS_ERR_VALUE(ret))
2702 ret = 0;
2703 return ret;
2704}
2705
2637static inline void verify_mm_writelocked(struct mm_struct *mm) 2706static inline void verify_mm_writelocked(struct mm_struct *mm)
2638{ 2707{
2639#ifdef CONFIG_DEBUG_VM 2708#ifdef CONFIG_DEBUG_VM