aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavidlohr Bueso <davidlohr@hp.com>2014-04-07 18:37:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 19:35:53 -0400
commit615d6e8756c87149f2d4c1b93d471bca002bd849 (patch)
tree45b039ccafb606a30e53c1012775efe848e789ed
parentd7c1755179b82d954f593ca5285b9360f2f62e9c (diff)
mm: per-thread vma caching
This patch is a continuation of efforts trying to optimize find_vma(), avoiding potentially expensive rbtree walks to locate a vma upon faults. The original approach (https://lkml.org/lkml/2013/11/1/410), where the largest vma was also cached, ended up being too specific and random, thus further comparison with other approaches were needed. There are two things to consider when dealing with this, the cache hit rate and the latency of find_vma(). Improving the hit-rate does not necessarily translate in finding the vma any faster, as the overhead of any fancy caching schemes can be too high to consider. We currently cache the last used vma for the whole address space, which provides a nice optimization, reducing the total cycles in find_vma() by up to 250%, for workloads with good locality. On the other hand, this simple scheme is pretty much useless for workloads with poor locality. Analyzing ebizzy runs shows that, no matter how many threads are running, the mmap_cache hit rate is less than 2%, and in many situations below 1%. The proposed approach is to replace this scheme with a small per-thread cache, maximizing hit rates at a very low maintenance cost. Invalidations are performed by simply bumping up a 32-bit sequence number. The only expensive operation is in the rare case of a seq number overflow, where all caches that share the same address space are flushed. Upon a miss, the proposed replacement policy is based on the page number that contains the virtual address in question. Concretely, the following results are seen on an 80 core, 8 socket x86-64 box: 1) System bootup: Most programs are single threaded, so the per-thread scheme does improve ~50% hit rate by just adding a few more slots to the cache. +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 50.61% | 19.90 | | patched | 73.45% | 13.58 | +----------------+----------+------------------+ 2) Kernel build: This one is already pretty good with the current approach as we're dealing with good locality. +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 75.28% | 11.03 | | patched | 88.09% | 9.31 | +----------------+----------+------------------+ 3) Oracle 11g Data Mining (4k pages): Similar to the kernel build workload. +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 70.66% | 17.14 | | patched | 91.15% | 12.57 | +----------------+----------+------------------+ 4) Ebizzy: There's a fair amount of variation from run to run, but this approach always shows nearly perfect hit rates, while baseline is just about non-existent. The amounts of cycles can fluctuate between anywhere from ~60 to ~116 for the baseline scheme, but this approach reduces it considerably. For instance, with 80 threads: +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 1.06% | 91.54 | | patched | 99.97% | 14.18 | +----------------+----------+------------------+ [akpm@linux-foundation.org: fix nommu build, per Davidlohr] [akpm@linux-foundation.org: document vmacache_valid() logic] [akpm@linux-foundation.org: attempt to untangle header files] [akpm@linux-foundation.org: add vmacache_find() BUG_ON] [hughd@google.com: add vmacache_valid_mm() (from Oleg)] [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: adjust and enhance comments] Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> Reviewed-by: Rik van Riel <riel@redhat.com> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Reviewed-by: Michel Lespinasse <walken@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Tested-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--arch/unicore32/include/asm/mmu_context.h4
-rw-r--r--fs/exec.c5
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--include/linux/mm_types.h4
-rw-r--r--include/linux/sched.h7
-rw-r--r--include/linux/vmacache.h38
-rw-r--r--kernel/debug/debug_core.c14
-rw-r--r--kernel/fork.c7
-rw-r--r--mm/Makefile2
-rw-r--r--mm/mmap.c55
-rw-r--r--mm/nommu.c24
-rw-r--r--mm/vmacache.c112
12 files changed, 231 insertions, 44 deletions
diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h
index fb5e4c658f7a..ef470a7a3d0f 100644
--- a/arch/unicore32/include/asm/mmu_context.h
+++ b/arch/unicore32/include/asm/mmu_context.h
@@ -14,6 +14,8 @@
14 14
15#include <linux/compiler.h> 15#include <linux/compiler.h>
16#include <linux/sched.h> 16#include <linux/sched.h>
17#include <linux/mm.h>
18#include <linux/vmacache.h>
17#include <linux/io.h> 19#include <linux/io.h>
18 20
19#include <asm/cacheflush.h> 21#include <asm/cacheflush.h>
@@ -73,7 +75,7 @@ do { \
73 else \ 75 else \
74 mm->mmap = NULL; \ 76 mm->mmap = NULL; \
75 rb_erase(&high_vma->vm_rb, &mm->mm_rb); \ 77 rb_erase(&high_vma->vm_rb, &mm->mm_rb); \
76 mm->mmap_cache = NULL; \ 78 vmacache_invalidate(mm); \
77 mm->map_count--; \ 79 mm->map_count--; \
78 remove_vma(high_vma); \ 80 remove_vma(high_vma); \
79 } \ 81 } \
diff --git a/fs/exec.c b/fs/exec.c
index 25dfeba6d55f..b60ccf969a8b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -26,6 +26,7 @@
26#include <linux/file.h> 26#include <linux/file.h>
27#include <linux/fdtable.h> 27#include <linux/fdtable.h>
28#include <linux/mm.h> 28#include <linux/mm.h>
29#include <linux/vmacache.h>
29#include <linux/stat.h> 30#include <linux/stat.h>
30#include <linux/fcntl.h> 31#include <linux/fcntl.h>
31#include <linux/swap.h> 32#include <linux/swap.h>
@@ -822,7 +823,7 @@ EXPORT_SYMBOL(read_code);
822static int exec_mmap(struct mm_struct *mm) 823static int exec_mmap(struct mm_struct *mm)
823{ 824{
824 struct task_struct *tsk; 825 struct task_struct *tsk;
825 struct mm_struct * old_mm, *active_mm; 826 struct mm_struct *old_mm, *active_mm;
826 827
827 /* Notify parent that we're no longer interested in the old VM */ 828 /* Notify parent that we're no longer interested in the old VM */
828 tsk = current; 829 tsk = current;
@@ -848,6 +849,8 @@ static int exec_mmap(struct mm_struct *mm)
848 tsk->mm = mm; 849 tsk->mm = mm;
849 tsk->active_mm = mm; 850 tsk->active_mm = mm;
850 activate_mm(active_mm, mm); 851 activate_mm(active_mm, mm);
852 tsk->mm->vmacache_seqnum = 0;
853 vmacache_flush(tsk);
851 task_unlock(tsk); 854 task_unlock(tsk);
852 if (old_mm) { 855 if (old_mm) {
853 up_read(&old_mm->mmap_sem); 856 up_read(&old_mm->mmap_sem);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index fb52b548080d..442177b1119a 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1,4 +1,5 @@
1#include <linux/mm.h> 1#include <linux/mm.h>
2#include <linux/vmacache.h>
2#include <linux/hugetlb.h> 3#include <linux/hugetlb.h>
3#include <linux/huge_mm.h> 4#include <linux/huge_mm.h>
4#include <linux/mount.h> 5#include <linux/mount.h>
@@ -152,7 +153,7 @@ static void *m_start(struct seq_file *m, loff_t *pos)
152 153
153 /* 154 /*
154 * We remember last_addr rather than next_addr to hit with 155 * We remember last_addr rather than next_addr to hit with
155 * mmap_cache most of the time. We have zero last_addr at 156 * vmacache most of the time. We have zero last_addr at
156 * the beginning and also after lseek. We will have -1 last_addr 157 * the beginning and also after lseek. We will have -1 last_addr
157 * after the end of the vmas. 158 * after the end of the vmas.
158 */ 159 */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 290901a8c1de..2b58d192ea24 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -342,9 +342,9 @@ struct mm_rss_stat {
342 342
343struct kioctx_table; 343struct kioctx_table;
344struct mm_struct { 344struct mm_struct {
345 struct vm_area_struct * mmap; /* list of VMAs */ 345 struct vm_area_struct *mmap; /* list of VMAs */
346 struct rb_root mm_rb; 346 struct rb_root mm_rb;
347 struct vm_area_struct * mmap_cache; /* last find_vma result */ 347 u32 vmacache_seqnum; /* per-thread vmacache */
348#ifdef CONFIG_MMU 348#ifdef CONFIG_MMU
349 unsigned long (*get_unmapped_area) (struct file *filp, 349 unsigned long (*get_unmapped_area) (struct file *filp,
350 unsigned long addr, unsigned long len, 350 unsigned long addr, unsigned long len,
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7cb07fd26680..642477dd814a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -132,6 +132,10 @@ struct perf_event_context;
132struct blk_plug; 132struct blk_plug;
133struct filename; 133struct filename;
134 134
135#define VMACACHE_BITS 2
136#define VMACACHE_SIZE (1U << VMACACHE_BITS)
137#define VMACACHE_MASK (VMACACHE_SIZE - 1)
138
135/* 139/*
136 * List of flags we want to share for kernel threads, 140 * List of flags we want to share for kernel threads,
137 * if only because they are not used by them anyway. 141 * if only because they are not used by them anyway.
@@ -1235,6 +1239,9 @@ struct task_struct {
1235#ifdef CONFIG_COMPAT_BRK 1239#ifdef CONFIG_COMPAT_BRK
1236 unsigned brk_randomized:1; 1240 unsigned brk_randomized:1;
1237#endif 1241#endif
1242 /* per-thread vma caching */
1243 u32 vmacache_seqnum;
1244 struct vm_area_struct *vmacache[VMACACHE_SIZE];
1238#if defined(SPLIT_RSS_COUNTING) 1245#if defined(SPLIT_RSS_COUNTING)
1239 struct task_rss_stat rss_stat; 1246 struct task_rss_stat rss_stat;
1240#endif 1247#endif
diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h
new file mode 100644
index 000000000000..c3fa0fd43949
--- /dev/null
+++ b/include/linux/vmacache.h
@@ -0,0 +1,38 @@
1#ifndef __LINUX_VMACACHE_H
2#define __LINUX_VMACACHE_H
3
4#include <linux/sched.h>
5#include <linux/mm.h>
6
7/*
8 * Hash based on the page number. Provides a good hit rate for
9 * workloads with good locality and those with random accesses as well.
10 */
11#define VMACACHE_HASH(addr) ((addr >> PAGE_SHIFT) & VMACACHE_MASK)
12
13static inline void vmacache_flush(struct task_struct *tsk)
14{
15 memset(tsk->vmacache, 0, sizeof(tsk->vmacache));
16}
17
18extern void vmacache_flush_all(struct mm_struct *mm);
19extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma);
20extern struct vm_area_struct *vmacache_find(struct mm_struct *mm,
21 unsigned long addr);
22
23#ifndef CONFIG_MMU
24extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
25 unsigned long start,
26 unsigned long end);
27#endif
28
29static inline void vmacache_invalidate(struct mm_struct *mm)
30{
31 mm->vmacache_seqnum++;
32
33 /* deal with overflows */
34 if (unlikely(mm->vmacache_seqnum == 0))
35 vmacache_flush_all(mm);
36}
37
38#endif /* __LINUX_VMACACHE_H */
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 99982a70ddad..2956c8da1605 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -49,6 +49,7 @@
49#include <linux/pid.h> 49#include <linux/pid.h>
50#include <linux/smp.h> 50#include <linux/smp.h>
51#include <linux/mm.h> 51#include <linux/mm.h>
52#include <linux/vmacache.h>
52#include <linux/rcupdate.h> 53#include <linux/rcupdate.h>
53 54
54#include <asm/cacheflush.h> 55#include <asm/cacheflush.h>
@@ -224,10 +225,17 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
224 if (!CACHE_FLUSH_IS_SAFE) 225 if (!CACHE_FLUSH_IS_SAFE)
225 return; 226 return;
226 227
227 if (current->mm && current->mm->mmap_cache) { 228 if (current->mm) {
228 flush_cache_range(current->mm->mmap_cache, 229 int i;
229 addr, addr + BREAK_INSTR_SIZE); 230
231 for (i = 0; i < VMACACHE_SIZE; i++) {
232 if (!current->vmacache[i])
233 continue;
234 flush_cache_range(current->vmacache[i],
235 addr, addr + BREAK_INSTR_SIZE);
236 }
230 } 237 }
238
231 /* Force flush instruction cache if it was outside the mm */ 239 /* Force flush instruction cache if it was outside the mm */
232 flush_icache_range(addr, addr + BREAK_INSTR_SIZE); 240 flush_icache_range(addr, addr + BREAK_INSTR_SIZE);
233} 241}
diff --git a/kernel/fork.c b/kernel/fork.c
index e40c0a01d5a6..bc0e96b78dfd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -28,6 +28,8 @@
28#include <linux/mman.h> 28#include <linux/mman.h>
29#include <linux/mmu_notifier.h> 29#include <linux/mmu_notifier.h>
30#include <linux/fs.h> 30#include <linux/fs.h>
31#include <linux/mm.h>
32#include <linux/vmacache.h>
31#include <linux/nsproxy.h> 33#include <linux/nsproxy.h>
32#include <linux/capability.h> 34#include <linux/capability.h>
33#include <linux/cpu.h> 35#include <linux/cpu.h>
@@ -364,7 +366,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
364 366
365 mm->locked_vm = 0; 367 mm->locked_vm = 0;
366 mm->mmap = NULL; 368 mm->mmap = NULL;
367 mm->mmap_cache = NULL; 369 mm->vmacache_seqnum = 0;
368 mm->map_count = 0; 370 mm->map_count = 0;
369 cpumask_clear(mm_cpumask(mm)); 371 cpumask_clear(mm_cpumask(mm));
370 mm->mm_rb = RB_ROOT; 372 mm->mm_rb = RB_ROOT;
@@ -882,6 +884,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
882 if (!oldmm) 884 if (!oldmm)
883 return 0; 885 return 0;
884 886
887 /* initialize the new vmacache entries */
888 vmacache_flush(tsk);
889
885 if (clone_flags & CLONE_VM) { 890 if (clone_flags & CLONE_VM) {
886 atomic_inc(&oldmm->mm_users); 891 atomic_inc(&oldmm->mm_users);
887 mm = oldmm; 892 mm = oldmm;
diff --git a/mm/Makefile b/mm/Makefile
index cdd741519ee0..23a6f7e23019 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -16,7 +16,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
16 readahead.o swap.o truncate.o vmscan.o shmem.o \ 16 readahead.o swap.o truncate.o vmscan.o shmem.o \
17 util.o mmzone.o vmstat.o backing-dev.o \ 17 util.o mmzone.o vmstat.o backing-dev.o \
18 mm_init.o mmu_context.o percpu.o slab_common.o \ 18 mm_init.o mmu_context.o percpu.o slab_common.o \
19 compaction.o balloon_compaction.o \ 19 compaction.o balloon_compaction.o vmacache.o \
20 interval_tree.o list_lru.o workingset.o $(mmu-y) 20 interval_tree.o list_lru.o workingset.o $(mmu-y)
21 21
22obj-y += init-mm.o 22obj-y += init-mm.o
diff --git a/mm/mmap.c b/mm/mmap.c
index 46433e137abc..b1202cf81f4b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -10,6 +10,7 @@
10#include <linux/slab.h> 10#include <linux/slab.h>
11#include <linux/backing-dev.h> 11#include <linux/backing-dev.h>
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/vmacache.h>
13#include <linux/shm.h> 14#include <linux/shm.h>
14#include <linux/mman.h> 15#include <linux/mman.h>
15#include <linux/pagemap.h> 16#include <linux/pagemap.h>
@@ -681,8 +682,9 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
681 prev->vm_next = next = vma->vm_next; 682 prev->vm_next = next = vma->vm_next;
682 if (next) 683 if (next)
683 next->vm_prev = prev; 684 next->vm_prev = prev;
684 if (mm->mmap_cache == vma) 685
685 mm->mmap_cache = prev; 686 /* Kill the cache */
687 vmacache_invalidate(mm);
686} 688}
687 689
688/* 690/*
@@ -1989,34 +1991,33 @@ EXPORT_SYMBOL(get_unmapped_area);
1989/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */ 1991/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
1990struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr) 1992struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
1991{ 1993{
1992 struct vm_area_struct *vma = NULL; 1994 struct rb_node *rb_node;
1995 struct vm_area_struct *vma;
1993 1996
1994 /* Check the cache first. */ 1997 /* Check the cache first. */
1995 /* (Cache hit rate is typically around 35%.) */ 1998 vma = vmacache_find(mm, addr);
1996 vma = ACCESS_ONCE(mm->mmap_cache); 1999 if (likely(vma))
1997 if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) { 2000 return vma;
1998 struct rb_node *rb_node;
1999 2001
2000 rb_node = mm->mm_rb.rb_node; 2002 rb_node = mm->mm_rb.rb_node;
2001 vma = NULL; 2003 vma = NULL;
2002 2004
2003 while (rb_node) { 2005 while (rb_node) {
2004 struct vm_area_struct *vma_tmp; 2006 struct vm_area_struct *tmp;
2005 2007
2006 vma_tmp = rb_entry(rb_node, 2008 tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2007 struct vm_area_struct, vm_rb); 2009
2008 2010 if (tmp->vm_end > addr) {
2009 if (vma_tmp->vm_end > addr) { 2011 vma = tmp;
2010 vma = vma_tmp; 2012 if (tmp->vm_start <= addr)
2011 if (vma_tmp->vm_start <= addr) 2013 break;
2012 break; 2014 rb_node = rb_node->rb_left;
2013 rb_node = rb_node->rb_left; 2015 } else
2014 } else 2016 rb_node = rb_node->rb_right;
2015 rb_node = rb_node->rb_right;
2016 }
2017 if (vma)
2018 mm->mmap_cache = vma;
2019 } 2017 }
2018
2019 if (vma)
2020 vmacache_update(addr, vma);
2020 return vma; 2021 return vma;
2021} 2022}
2022 2023
@@ -2388,7 +2389,9 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
2388 } else 2389 } else
2389 mm->highest_vm_end = prev ? prev->vm_end : 0; 2390 mm->highest_vm_end = prev ? prev->vm_end : 0;
2390 tail_vma->vm_next = NULL; 2391 tail_vma->vm_next = NULL;
2391 mm->mmap_cache = NULL; /* Kill the cache. */ 2392
2393 /* Kill the cache */
2394 vmacache_invalidate(mm);
2392} 2395}
2393 2396
2394/* 2397/*
diff --git a/mm/nommu.c b/mm/nommu.c
index e19482533ce3..5d3f3524bbdc 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -15,6 +15,7 @@
15 15
16#include <linux/export.h> 16#include <linux/export.h>
17#include <linux/mm.h> 17#include <linux/mm.h>
18#include <linux/vmacache.h>
18#include <linux/mman.h> 19#include <linux/mman.h>
19#include <linux/swap.h> 20#include <linux/swap.h>
20#include <linux/file.h> 21#include <linux/file.h>
@@ -768,16 +769,23 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
768 */ 769 */
769static void delete_vma_from_mm(struct vm_area_struct *vma) 770static void delete_vma_from_mm(struct vm_area_struct *vma)
770{ 771{
772 int i;
771 struct address_space *mapping; 773 struct address_space *mapping;
772 struct mm_struct *mm = vma->vm_mm; 774 struct mm_struct *mm = vma->vm_mm;
775 struct task_struct *curr = current;
773 776
774 kenter("%p", vma); 777 kenter("%p", vma);
775 778
776 protect_vma(vma, 0); 779 protect_vma(vma, 0);
777 780
778 mm->map_count--; 781 mm->map_count--;
779 if (mm->mmap_cache == vma) 782 for (i = 0; i < VMACACHE_SIZE; i++) {
780 mm->mmap_cache = NULL; 783 /* if the vma is cached, invalidate the entire cache */
784 if (curr->vmacache[i] == vma) {
785 vmacache_invalidate(curr->mm);
786 break;
787 }
788 }
781 789
782 /* remove the VMA from the mapping */ 790 /* remove the VMA from the mapping */
783 if (vma->vm_file) { 791 if (vma->vm_file) {
@@ -825,8 +833,8 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
825 struct vm_area_struct *vma; 833 struct vm_area_struct *vma;
826 834
827 /* check the cache first */ 835 /* check the cache first */
828 vma = ACCESS_ONCE(mm->mmap_cache); 836 vma = vmacache_find(mm, addr);
829 if (vma && vma->vm_start <= addr && vma->vm_end > addr) 837 if (likely(vma))
830 return vma; 838 return vma;
831 839
832 /* trawl the list (there may be multiple mappings in which addr 840 /* trawl the list (there may be multiple mappings in which addr
@@ -835,7 +843,7 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
835 if (vma->vm_start > addr) 843 if (vma->vm_start > addr)
836 return NULL; 844 return NULL;
837 if (vma->vm_end > addr) { 845 if (vma->vm_end > addr) {
838 mm->mmap_cache = vma; 846 vmacache_update(addr, vma);
839 return vma; 847 return vma;
840 } 848 }
841 } 849 }
@@ -874,8 +882,8 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
874 unsigned long end = addr + len; 882 unsigned long end = addr + len;
875 883
876 /* check the cache first */ 884 /* check the cache first */
877 vma = mm->mmap_cache; 885 vma = vmacache_find_exact(mm, addr, end);
878 if (vma && vma->vm_start == addr && vma->vm_end == end) 886 if (vma)
879 return vma; 887 return vma;
880 888
881 /* trawl the list (there may be multiple mappings in which addr 889 /* trawl the list (there may be multiple mappings in which addr
@@ -886,7 +894,7 @@ static struct vm_area_struct *find_vma_exact(struct mm_struct *mm,
886 if (vma->vm_start > addr) 894 if (vma->vm_start > addr)
887 return NULL; 895 return NULL;
888 if (vma->vm_end == end) { 896 if (vma->vm_end == end) {
889 mm->mmap_cache = vma; 897 vmacache_update(addr, vma);
890 return vma; 898 return vma;
891 } 899 }
892 } 900 }
diff --git a/mm/vmacache.c b/mm/vmacache.c
new file mode 100644
index 000000000000..d4224b397c0e
--- /dev/null
+++ b/mm/vmacache.c
@@ -0,0 +1,112 @@
1/*
2 * Copyright (C) 2014 Davidlohr Bueso.
3 */
4#include <linux/sched.h>
5#include <linux/mm.h>
6#include <linux/vmacache.h>
7
8/*
9 * Flush vma caches for threads that share a given mm.
10 *
11 * The operation is safe because the caller holds the mmap_sem
12 * exclusively and other threads accessing the vma cache will
13 * have mmap_sem held at least for read, so no extra locking
14 * is required to maintain the vma cache.
15 */
16void vmacache_flush_all(struct mm_struct *mm)
17{
18 struct task_struct *g, *p;
19
20 rcu_read_lock();
21 for_each_process_thread(g, p) {
22 /*
23 * Only flush the vmacache pointers as the
24 * mm seqnum is already set and curr's will
25 * be set upon invalidation when the next
26 * lookup is done.
27 */
28 if (mm == p->mm)
29 vmacache_flush(p);
30 }
31 rcu_read_unlock();
32}
33
34/*
35 * This task may be accessing a foreign mm via (for example)
36 * get_user_pages()->find_vma(). The vmacache is task-local and this
37 * task's vmacache pertains to a different mm (ie, its own). There is
38 * nothing we can do here.
39 *
40 * Also handle the case where a kernel thread has adopted this mm via use_mm().
41 * That kernel thread's vmacache is not applicable to this mm.
42 */
43static bool vmacache_valid_mm(struct mm_struct *mm)
44{
45 return current->mm == mm && !(current->flags & PF_KTHREAD);
46}
47
48void vmacache_update(unsigned long addr, struct vm_area_struct *newvma)
49{
50 if (vmacache_valid_mm(newvma->vm_mm))
51 current->vmacache[VMACACHE_HASH(addr)] = newvma;
52}
53
54static bool vmacache_valid(struct mm_struct *mm)
55{
56 struct task_struct *curr;
57
58 if (!vmacache_valid_mm(mm))
59 return false;
60
61 curr = current;
62 if (mm->vmacache_seqnum != curr->vmacache_seqnum) {
63 /*
64 * First attempt will always be invalid, initialize
65 * the new cache for this task here.
66 */
67 curr->vmacache_seqnum = mm->vmacache_seqnum;
68 vmacache_flush(curr);
69 return false;
70 }
71 return true;
72}
73
74struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
75{
76 int i;
77
78 if (!vmacache_valid(mm))
79 return NULL;
80
81 for (i = 0; i < VMACACHE_SIZE; i++) {
82 struct vm_area_struct *vma = current->vmacache[i];
83
84 if (vma && vma->vm_start <= addr && vma->vm_end > addr) {
85 BUG_ON(vma->vm_mm != mm);
86 return vma;
87 }
88 }
89
90 return NULL;
91}
92
93#ifndef CONFIG_MMU
94struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
95 unsigned long start,
96 unsigned long end)
97{
98 int i;
99
100 if (!vmacache_valid(mm))
101 return NULL;
102
103 for (i = 0; i < VMACACHE_SIZE; i++) {
104 struct vm_area_struct *vma = current->vmacache[i];
105
106 if (vma && vma->vm_start == start && vma->vm_end == end)
107 return vma;
108 }
109
110 return NULL;
111}
112#endif