mm: per-thread vma caching

This patch is a continuation of efforts trying to optimize find_vma(), avoiding potentially expensive rbtree walks to locate a vma upon faults. The original approach (https://lkml.org/lkml/2013/11/1/410), where the largest vma was also cached, ended up being too specific and random, thus further comparison with other approaches were needed. There are two things to consider when dealing with this, the cache hit rate and the latency of find_vma(). Improving the hit-rate does not necessarily translate in finding the vma any faster, as the overhead of any fancy caching schemes can be too high to consider. We currently cache the last used vma for the whole address space, which provides a nice optimization, reducing the total cycles in find_vma() by up to 250%, for workloads with good locality. On the other hand, this simple scheme is pretty much useless for workloads with poor locality. Analyzing ebizzy runs shows that, no matter how many threads are running, the mmap_cache hit rate is less than 2%, and in many situations below 1%. The proposed approach is to replace this scheme with a small per-thread cache, maximizing hit rates at a very low maintenance cost. Invalidations are performed by simply bumping up a 32-bit sequence number. The only expensive operation is in the rare case of a seq number overflow, where all caches that share the same address space are flushed. Upon a miss, the proposed replacement policy is based on the page number that contains the virtual address in question. Concretely, the following results are seen on an 80 core, 8 socket x86-64 box: 1) System bootup: Most programs are single threaded, so the per-thread scheme does improve ~50% hit rate by just adding a few more slots to the cache. +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 50.61% | 19.90 | | patched | 73.45% | 13.58 | +----------------+----------+------------------+ 2) Kernel build: This one is already pretty good with the current approach as we're dealing with good locality. +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 75.28% | 11.03 | | patched | 88.09% | 9.31 | +----------------+----------+------------------+ 3) Oracle 11g Data Mining (4k pages): Similar to the kernel build workload. +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 70.66% | 17.14 | | patched | 91.15% | 12.57 | +----------------+----------+------------------+ 4) Ebizzy: There's a fair amount of variation from run to run, but this approach always shows nearly perfect hit rates, while baseline is just about non-existent. The amounts of cycles can fluctuate between anywhere from ~60 to ~116 for the baseline scheme, but this approach reduces it considerably. For instance, with 80 threads: +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 1.06% | 91.54 | | patched | 99.97% | 14.18 | +----------------+----------+------------------+ [akpm@linux-foundation.org: fix nommu build, per Davidlohr] [akpm@linux-foundation.org: document vmacache_valid() logic] [akpm@linux-foundation.org: attempt to untangle header files] [akpm@linux-foundation.org: add vmacache_find() BUG_ON] [hughd@google.com: add vmacache_valid_mm() (from Oleg)] [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: adjust and enhance comments] Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> Reviewed-by: Rik van Riel <riel@redhat.com> Acked-by: Linus Torvalds <torvalds@linux-foundation.org> Reviewed-by: Michel Lespinasse <walken@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Tested-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Davidlohr Bueso <davidlohr@hp.com> 2014-04-07 18:37:25 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-04-07 19:35:53 -0400
commit: 615d6e8756c87149f2d4c1b93d471bca002bd849 (patch)
tree: 45b039ccafb606a30e53c1012775efe848e789ed /mm/mmap.c
parent: d7c1755179b82d954f593ca5285b9360f2f62e9c (diff)
1 files changed, 29 insertions, 26 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 46433e137abc..b1202cf81f4b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/backing-dev.h>
 #include <linux/mm.h>
+#include <linux/vmacache.h>
 #include <linux/shm.h>
 #include <linux/mman.h>
 #include <linux/pagemap.h>
@@ -681,8 +682,9 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
        prev->vm_next = next = vma->vm_next;
        if (next)
                next->vm_prev = prev;
-        if (mm->mmap_cache == vma)
-                mm->mmap_cache = prev;
+        /* Kill the cache */
+        vmacache_invalidate(mm);
 }
 /*
@@ -1989,34 +1991,33 @@ EXPORT_SYMBOL(get_unmapped_area);
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
 struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
 {
-        struct vm_area_struct *vma = NULL;
+        struct rb_node *rb_node;
+        struct vm_area_struct *vma;
        /* Check the cache first. */
-        /* (Cache hit rate is typically around 35%.) */
+        vma = vmacache_find(mm, addr);
-        vma = ACCESS_ONCE(mm->mmap_cache);
+        if (likely(vma))
-        if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {
+                return vma;
-                struct rb_node *rb_node;
-                rb_node = mm->mm_rb.rb_node;
+        rb_node = mm->mm_rb.rb_node;
-                vma = NULL;
+        vma = NULL;
-                while (rb_node) {
+        while (rb_node) {
-                        struct vm_area_struct *vma_tmp;
+                struct vm_area_struct *tmp;
-                        vma_tmp = rb_entry(rb_node,
+                tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
-                                           struct vm_area_struct, vm_rb);
+                if (tmp->vm_end > addr) {
-                        if (vma_tmp->vm_end > addr) {
+                        vma = tmp;
-                                vma = vma_tmp;
+                        if (tmp->vm_start <= addr)
-                                if (vma_tmp->vm_start <= addr)
+                                break;
-                                        break;
+                        rb_node = rb_node->rb_left;
-                                rb_node = rb_node->rb_left;
+                } else
-                        } else
+                        rb_node = rb_node->rb_right;
-                                rb_node = rb_node->rb_right;
-                }
-                if (vma)
-                        mm->mmap_cache = vma;
        }
+        if (vma)
+                vmacache_update(addr, vma);
        return vma;
 }
@@ -2388,7 +2389,9 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
        } else
                mm->highest_vm_end = prev ? prev->vm_end : 0;
        tail_vma->vm_next = NULL;
-        mm->mmap_cache = NULL;          /* Kill the cache. */
+        /* Kill the cache */
+        vmacache_invalidate(mm);
 }
 /*
author	Davidlohr Bueso <davidlohr@hp.com>	2014-04-07 18:37:25 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-04-07 19:35:53 -0400
commit	615d6e8756c87149f2d4c1b93d471bca002bd849 (patch)
tree	45b039ccafb606a30e53c1012775efe848e789ed /mm/mmap.c
parent	d7c1755179b82d954f593ca5285b9360f2f62e9c (diff)

diff --git a/mm/mmap.c b/mm/mmap.c index 46433e137abc..b1202cf81f4b 100644 --- a/mm/mmap.c +++ b/mm/mmap.c
@@ -10,6 +10,7 @@
10	#include <linux/slab.h>	10	#include <linux/slab.h>
11	#include <linux/backing-dev.h>	11	#include <linux/backing-dev.h>
12	#include <linux/mm.h>	12	#include <linux/mm.h>
		13	#include <linux/vmacache.h>
13	#include <linux/shm.h>	14	#include <linux/shm.h>
14	#include <linux/mman.h>	15	#include <linux/mman.h>
15	#include <linux/pagemap.h>	16	#include <linux/pagemap.h>
@@ -681,8 +682,9 @@ __vma_unlink(struct mm_struct mm, struct vm_area_struct vma,
681	prev->vm_next = next = vma->vm_next;	682	prev->vm_next = next = vma->vm_next;
682	if (next)	683	if (next)
683	next->vm_prev = prev;	684	next->vm_prev = prev;
684	if (mm->mmap_cache == vma)	685
685	mm->mmap_cache = prev;	686	/* Kill the cache */
		687	vmacache_invalidate(mm);
686	}	688	}
687		689
688	/*	690	/*
@@ -1989,34 +1991,33 @@ EXPORT_SYMBOL(get_unmapped_area);
1989	/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */	1991	/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
1990	struct vm_area_struct find_vma(struct mm_struct mm, unsigned long addr)	1992	struct vm_area_struct find_vma(struct mm_struct mm, unsigned long addr)
1991	{	1993	{
1992	struct vm_area_struct *vma = NULL;	1994	struct rb_node *rb_node;
		1995	struct vm_area_struct *vma;
1993		1996
1994	/* Check the cache first. */	1997	/* Check the cache first. */
1995	/* (Cache hit rate is typically around 35%.) */	1998	vma = vmacache_find(mm, addr);
1996	vma = ACCESS_ONCE(mm->mmap_cache);	1999	if (likely(vma))
1997	if (!(vma && vma->vm_end > addr && vma->vm_start <= addr)) {	2000	return vma;
1998	struct rb_node *rb_node;
1999		2001
2000	rb_node = mm->mm_rb.rb_node;	2002	rb_node = mm->mm_rb.rb_node;
2001	vma = NULL;	2003	vma = NULL;
2002		2004
2003	while (rb_node) {	2005	while (rb_node) {
2004	struct vm_area_struct *vma_tmp;	2006	struct vm_area_struct *tmp;
2005		2007
2006	vma_tmp = rb_entry(rb_node,	2008	tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
2007	struct vm_area_struct, vm_rb);	2009
2008		2010	if (tmp->vm_end > addr) {
2009	if (vma_tmp->vm_end > addr) {	2011	vma = tmp;
2010	vma = vma_tmp;	2012	if (tmp->vm_start <= addr)
2011	if (vma_tmp->vm_start <= addr)	2013	break;
2012	break;	2014	rb_node = rb_node->rb_left;
2013	rb_node = rb_node->rb_left;	2015	} else
2014	} else	2016	rb_node = rb_node->rb_right;
2015	rb_node = rb_node->rb_right;
2016	}
2017	if (vma)
2018	mm->mmap_cache = vma;
2019	}	2017	}
		2018
		2019	if (vma)
		2020	vmacache_update(addr, vma);
2020	return vma;	2021	return vma;
2021	}	2022	}
2022		2023
@@ -2388,7 +2389,9 @@ detach_vmas_to_be_unmapped(struct mm_struct mm, struct vm_area_struct vma,
2388	} else	2389	} else
2389	mm->highest_vm_end = prev ? prev->vm_end : 0;	2390	mm->highest_vm_end = prev ? prev->vm_end : 0;
2390	tail_vma->vm_next = NULL;	2391	tail_vma->vm_next = NULL;
2391	mm->mmap_cache = NULL; /* Kill the cache. */	2392
		2393	/* Kill the cache */
		2394	vmacache_invalidate(mm);
2392	}	2395	}
2393		2396
2394	/*	2397	/*