summaryrefslogtreecommitdiffstats
path: root/mm/vmacache.c
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2018-08-17 18:49:58 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-17 19:20:32 -0400
commitddbf369c0a33924f76d092985bd20d9310f43d7f (patch)
treecd24a886c4e71a26ad3f51e830a5e33e1b488b32 /mm/vmacache.c
parent6b51e88199ca4f75ff647eff28efd30bfcb08dc4 (diff)
mm, vmacache: hash addresses based on pmd
When perf profiling a wide variety of different workloads, it was found that vmacache_find() had higher than expected cost: up to 0.08% of cpu utilization in some cases. This was found to rival other core VM functions such as alloc_pages_vma() with thp enabled and default mempolicy, and the conditionals in __get_vma_policy(). VMACACHE_HASH() determines which of the four per-task_struct slots a vma is cached for a particular address. This currently depends on the pfn, so pfn 5212 occupies a different vmacache slot than its neighboring pfn 5213. vmacache_find() iterates through all four of current's vmacache slots when looking up an address. Hashing based on pfn, an address has ~1/VMACACHE_SIZE chance of being cached in the first vmacache slot, or about 25%, *if* the vma is cached. This patch hashes an address by its pmd instead of pte to optimize for workloads with good spatial locality. This results in a higher probability of vmas being cached in the first slot that is checked: normally ~70% on the same workloads instead of 25%. [rientjes@google.com: various updates] Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1807231532290.109445@chino.kir.corp.google.com Link: http://lkml.kernel.org/r/alpine.DEB.2.21.1807091749150.114630@chino.kir.corp.google.com Signed-off-by: David Rientjes <rientjes@google.com> Reviewed-by: Andrew Morton <akpm@linux-foundation.org> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Alexey Dobriyan <adobriyan@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/vmacache.c')
-rw-r--r--mm/vmacache.c38
1 files changed, 29 insertions, 9 deletions
diff --git a/mm/vmacache.c b/mm/vmacache.c
index db7596eb6132..ea517bef7dc5 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -6,6 +6,18 @@
6#include <linux/sched/task.h> 6#include <linux/sched/task.h>
7#include <linux/mm.h> 7#include <linux/mm.h>
8#include <linux/vmacache.h> 8#include <linux/vmacache.h>
9#include <asm/pgtable.h>
10
11/*
12 * Hash based on the pmd of addr if configured with MMU, which provides a good
13 * hit rate for workloads with spatial locality. Otherwise, use pages.
14 */
15#ifdef CONFIG_MMU
16#define VMACACHE_SHIFT PMD_SHIFT
17#else
18#define VMACACHE_SHIFT PAGE_SHIFT
19#endif
20#define VMACACHE_HASH(addr) ((addr >> VMACACHE_SHIFT) & VMACACHE_MASK)
9 21
10/* 22/*
11 * Flush vma caches for threads that share a given mm. 23 * Flush vma caches for threads that share a given mm.
@@ -87,6 +99,7 @@ static bool vmacache_valid(struct mm_struct *mm)
87 99
88struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr) 100struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
89{ 101{
102 int idx = VMACACHE_HASH(addr);
90 int i; 103 int i;
91 104
92 count_vm_vmacache_event(VMACACHE_FIND_CALLS); 105 count_vm_vmacache_event(VMACACHE_FIND_CALLS);
@@ -95,16 +108,20 @@ struct vm_area_struct *vmacache_find(struct mm_struct *mm, unsigned long addr)
95 return NULL; 108 return NULL;
96 109
97 for (i = 0; i < VMACACHE_SIZE; i++) { 110 for (i = 0; i < VMACACHE_SIZE; i++) {
98 struct vm_area_struct *vma = current->vmacache.vmas[i]; 111 struct vm_area_struct *vma = current->vmacache.vmas[idx];
99 112
100 if (!vma) 113 if (vma) {
101 continue; 114#ifdef CONFIG_DEBUG_VM_VMACACHE
102 if (WARN_ON_ONCE(vma->vm_mm != mm)) 115 if (WARN_ON_ONCE(vma->vm_mm != mm))
103 break; 116 break;
104 if (vma->vm_start <= addr && vma->vm_end > addr) { 117#endif
105 count_vm_vmacache_event(VMACACHE_FIND_HITS); 118 if (vma->vm_start <= addr && vma->vm_end > addr) {
106 return vma; 119 count_vm_vmacache_event(VMACACHE_FIND_HITS);
120 return vma;
121 }
107 } 122 }
123 if (++idx == VMACACHE_SIZE)
124 idx = 0;
108 } 125 }
109 126
110 return NULL; 127 return NULL;
@@ -115,6 +132,7 @@ struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
115 unsigned long start, 132 unsigned long start,
116 unsigned long end) 133 unsigned long end)
117{ 134{
135 int idx = VMACACHE_HASH(start);
118 int i; 136 int i;
119 137
120 count_vm_vmacache_event(VMACACHE_FIND_CALLS); 138 count_vm_vmacache_event(VMACACHE_FIND_CALLS);
@@ -123,12 +141,14 @@ struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm,
123 return NULL; 141 return NULL;
124 142
125 for (i = 0; i < VMACACHE_SIZE; i++) { 143 for (i = 0; i < VMACACHE_SIZE; i++) {
126 struct vm_area_struct *vma = current->vmacache.vmas[i]; 144 struct vm_area_struct *vma = current->vmacache.vmas[idx];
127 145
128 if (vma && vma->vm_start == start && vma->vm_end == end) { 146 if (vma && vma->vm_start == start && vma->vm_end == end) {
129 count_vm_vmacache_event(VMACACHE_FIND_HITS); 147 count_vm_vmacache_event(VMACACHE_FIND_HITS);
130 return vma; 148 return vma;
131 } 149 }
150 if (++idx == VMACACHE_SIZE)
151 idx = 0;
132 } 152 }
133 153
134 return NULL; 154 return NULL;