aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/tlb.c
diff options
context:
space:
mode:
authorDave Hansen <dave.hansen@linux.intel.com>2014-07-31 11:40:55 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2014-07-31 11:48:50 -0400
commite9f4e0a9fe2723078b7a1a1169828dd46a7b2f9e (patch)
treec16300d2f05f2fce6b7b70b2c6fed1ac58486129 /arch/x86/mm/tlb.c
parent4995ab9cf512e9a6cc07dfd6b1d4e2fc48ce7fef (diff)
x86/mm: Rip out complicated, out-of-date, buggy TLB flushing
I think the flush_tlb_mm_range() code that tries to tune the flush sizes based on the CPU needs to get ripped out for several reasons: 1. It is obviously buggy. It uses mm->total_vm to judge the task's footprint in the TLB. It should certainly be using some measure of RSS, *NOT* ->total_vm since only resident memory can populate the TLB. 2. Haswell, and several other CPUs are missing from the intel_tlb_flushall_shift_set() function. Thus, it has been demonstrated to bitrot quickly in practice. 3. It is plain wrong in my vm: [ 0.037444] Last level iTLB entries: 4KB 0, 2MB 0, 4MB 0 [ 0.037444] Last level dTLB entries: 4KB 0, 2MB 0, 4MB 0 [ 0.037444] tlb_flushall_shift: 6 Which leads to it to never use invlpg. 4. The assumptions about TLB refill costs are wrong: http://lkml.kernel.org/r/1337782555-8088-3-git-send-email-alex.shi@intel.com (more on this in later patches) 5. I can not reproduce the original data: https://lkml.org/lkml/2012/5/17/59 I believe the sample times were too short. Running the benchmark in a loop yields times that vary quite a bit. Note that this leaves us with a static ceiling of 1 page. This is a conservative, dumb setting, and will be revised in a later patch. This also removes the code which attempts to predict whether we are flushing data or instructions. We expect instruction flushes to be relatively rare and not worth tuning for explicitly. Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Link: http://lkml.kernel.org/r/20140731154055.ABC88E89@viggo.jf.intel.com Acked-by: Rik van Riel <riel@redhat.com> Acked-by: Mel Gorman <mgorman@suse.de> Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch/x86/mm/tlb.c')
-rw-r--r--arch/x86/mm/tlb.c87
1 files changed, 11 insertions, 76 deletions
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 378fbef279d2..dff6ddebc45f 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -158,13 +158,14 @@ void flush_tlb_current_task(void)
158 preempt_enable(); 158 preempt_enable();
159} 159}
160 160
161/* in units of pages */
162unsigned long tlb_single_page_flush_ceiling = 1;
163
161void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, 164void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
162 unsigned long end, unsigned long vmflag) 165 unsigned long end, unsigned long vmflag)
163{ 166{
164 bool need_flush_others_all = true; 167 int need_flush_others_all = 1;
165 unsigned long addr; 168 unsigned long addr;
166 unsigned act_entries, tlb_entries = 0;
167 unsigned long nr_base_pages;
168 169
169 preempt_disable(); 170 preempt_disable();
170 if (current->active_mm != mm) 171 if (current->active_mm != mm)
@@ -175,29 +176,16 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
175 goto out; 176 goto out;
176 } 177 }
177 178
178 if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 179 if (end == TLB_FLUSH_ALL || vmflag & VM_HUGETLB) {
179 || vmflag & VM_HUGETLB) {
180 local_flush_tlb(); 180 local_flush_tlb();
181 goto out; 181 goto out;
182 } 182 }
183 183
184 /* In modern CPU, last level tlb used for both data/ins */ 184 if ((end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
185 if (vmflag & VM_EXEC)
186 tlb_entries = tlb_lli_4k[ENTRIES];
187 else
188 tlb_entries = tlb_lld_4k[ENTRIES];
189
190 /* Assume all of TLB entries was occupied by this task */
191 act_entries = tlb_entries >> tlb_flushall_shift;
192 act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm;
193 nr_base_pages = (end - start) >> PAGE_SHIFT;
194
195 /* tlb_flushall_shift is on balance point, details in commit log */
196 if (nr_base_pages > act_entries) {
197 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); 185 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
198 local_flush_tlb(); 186 local_flush_tlb();
199 } else { 187 } else {
200 need_flush_others_all = false; 188 need_flush_others_all = 0;
201 /* flush range by one by one 'invlpg' */ 189 /* flush range by one by one 'invlpg' */
202 for (addr = start; addr < end; addr += PAGE_SIZE) { 190 for (addr = start; addr < end; addr += PAGE_SIZE) {
203 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); 191 count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
@@ -259,68 +247,15 @@ static void do_kernel_range_flush(void *info)
259 247
260void flush_tlb_kernel_range(unsigned long start, unsigned long end) 248void flush_tlb_kernel_range(unsigned long start, unsigned long end)
261{ 249{
262 unsigned act_entries;
263 struct flush_tlb_info info;
264
265 /* In modern CPU, last level tlb used for both data/ins */
266 act_entries = tlb_lld_4k[ENTRIES];
267 250
268 /* Balance as user space task's flush, a bit conservative */ 251 /* Balance as user space task's flush, a bit conservative */
269 if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 || 252 if (end == TLB_FLUSH_ALL ||
270 (end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) 253 (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
271
272 on_each_cpu(do_flush_tlb_all, NULL, 1); 254 on_each_cpu(do_flush_tlb_all, NULL, 1);
273 else { 255 } else {
256 struct flush_tlb_info info;
274 info.flush_start = start; 257 info.flush_start = start;
275 info.flush_end = end; 258 info.flush_end = end;
276 on_each_cpu(do_kernel_range_flush, &info, 1); 259 on_each_cpu(do_kernel_range_flush, &info, 1);
277 } 260 }
278} 261}
279
280#ifdef CONFIG_DEBUG_TLBFLUSH
281static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
282 size_t count, loff_t *ppos)
283{
284 char buf[32];
285 unsigned int len;
286
287 len = sprintf(buf, "%hd\n", tlb_flushall_shift);
288 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
289}
290
291static ssize_t tlbflush_write_file(struct file *file,
292 const char __user *user_buf, size_t count, loff_t *ppos)
293{
294 char buf[32];
295 ssize_t len;
296 s8 shift;
297
298 len = min(count, sizeof(buf) - 1);
299 if (copy_from_user(buf, user_buf, len))
300 return -EFAULT;
301
302 buf[len] = '\0';
303 if (kstrtos8(buf, 0, &shift))
304 return -EINVAL;
305
306 if (shift < -1 || shift >= BITS_PER_LONG)
307 return -EINVAL;
308
309 tlb_flushall_shift = shift;
310 return count;
311}
312
313static const struct file_operations fops_tlbflush = {
314 .read = tlbflush_read_file,
315 .write = tlbflush_write_file,
316 .llseek = default_llseek,
317};
318
319static int __init create_tlb_flushall_shift(void)
320{
321 debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR,
322 arch_debugfs_dir, NULL, &fops_tlbflush);
323 return 0;
324}
325late_initcall(create_tlb_flushall_shift);
326#endif