aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm
diff options
context:
space:
mode:
authorAlex Shi <alex.shi@intel.com>2012-06-27 21:02:22 -0400
committerH. Peter Anvin <hpa@zytor.com>2012-06-27 22:29:11 -0400
commit611ae8e3f5204f7480b3b405993b3352cfa16662 (patch)
treefc8d829c331eafccc0939f2ed10655f605bac8c7 /arch/x86/mm
parent597e1c3580b7cfd95bb0f3167e2b297bf8a5a3ae (diff)
x86/tlb: enable tlb flush range support for x86
Not every tlb_flush execution moment is really need to evacuate all TLB entries, like in munmap, just few 'invlpg' is better for whole process performance, since it leaves most of TLB entries for later accessing. This patch also rewrite flush_tlb_range for 2 purposes: 1, split it out to get flush_blt_mm_range function. 2, clean up to reduce line breaking, thanks for Borislav's input. My micro benchmark 'mummap' http://lkml.org/lkml/2012/5/17/59 show that the random memory access on other CPU has 0~50% speed up on a 2P * 4cores * HT NHM EP while do 'munmap'. Thanks Yongjie's testing on this patch: ------------- I used Linux 3.4-RC6 w/ and w/o his patches as Xen dom0 and guest kernel. After running two benchmarks in Xen HVM guest, I found his patches brought about 1%~3% performance gain in 'kernel build' and 'netperf' testing, though the performance gain was not very stable in 'kernel build' testing. Some detailed testing results are below. Testing Environment: Hardware: Romley-EP platform Xen version: latest upstream Linux kernel: 3.4-RC6 Guest vCPU number: 8 NIC: Intel 82599 (10GB bandwidth) In 'kernel build' testing in guest: Command line | performance gain make -j 4 | 3.81% make -j 8 | 0.37% make -j 16 | -0.52% In 'netperf' testing, we tested TCP_STREAM with default socket size 16384 byte as large packet and 64 byte as small packet. I used several clients to add networking pressure, then 'netperf' server automatically generated several threads to response them. I also used large-size packet and small-size packet in the testing. Packet size | Thread number | performance gain 16384 bytes | 4 | 0.02% 16384 bytes | 8 | 2.21% 16384 bytes | 16 | 2.04% 64 bytes | 4 | 1.07% 64 bytes | 8 | 3.31% 64 bytes | 16 | 0.71% Signed-off-by: Alex Shi <alex.shi@intel.com> Link: http://lkml.kernel.org/r/1340845344-27557-8-git-send-email-alex.shi@intel.com Tested-by: Ren, Yongjie <yongjie.ren@intel.com> Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'arch/x86/mm')
-rw-r--r--arch/x86/mm/tlb.c112
1 files changed, 46 insertions, 66 deletions
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 5911f61e300e..481737def84a 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -301,23 +301,10 @@ void flush_tlb_current_task(void)
301 preempt_enable(); 301 preempt_enable();
302} 302}
303 303
304void flush_tlb_mm(struct mm_struct *mm) 304/*
305{ 305 * It can find out the THP large page, or
306 preempt_disable(); 306 * HUGETLB page in tlb_flush when THP disabled
307 307 */
308 if (current->active_mm == mm) {
309 if (current->mm)
310 local_flush_tlb();
311 else
312 leave_mm(smp_processor_id());
313 }
314 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
315 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
316
317 preempt_enable();
318}
319
320#ifdef CONFIG_TRANSPARENT_HUGEPAGE
321static inline unsigned long has_large_page(struct mm_struct *mm, 308static inline unsigned long has_large_page(struct mm_struct *mm,
322 unsigned long start, unsigned long end) 309 unsigned long start, unsigned long end)
323{ 310{
@@ -339,68 +326,61 @@ static inline unsigned long has_large_page(struct mm_struct *mm,
339 } 326 }
340 return 0; 327 return 0;
341} 328}
342#else
343static inline unsigned long has_large_page(struct mm_struct *mm,
344 unsigned long start, unsigned long end)
345{
346 return 0;
347}
348#endif
349void flush_tlb_range(struct vm_area_struct *vma,
350 unsigned long start, unsigned long end)
351{
352 struct mm_struct *mm;
353 329
354 if (vma->vm_flags & VM_HUGETLB || tlb_flushall_shift == -1) { 330void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
355flush_all: 331 unsigned long end, unsigned long vmflag)
356 flush_tlb_mm(vma->vm_mm); 332{
357 return; 333 unsigned long addr;
358 } 334 unsigned act_entries, tlb_entries = 0;
359 335
360 preempt_disable(); 336 preempt_disable();
361 mm = vma->vm_mm; 337 if (current->active_mm != mm)
362 if (current->active_mm == mm) { 338 goto flush_all;
363 if (current->mm) {
364 unsigned long addr, vmflag = vma->vm_flags;
365 unsigned act_entries, tlb_entries = 0;
366 339
367 if (vmflag & VM_EXEC) 340 if (!current->mm) {
368 tlb_entries = tlb_lli_4k[ENTRIES]; 341 leave_mm(smp_processor_id());
369 else 342 goto flush_all;
370 tlb_entries = tlb_lld_4k[ENTRIES]; 343 }
371
372 act_entries = tlb_entries > mm->total_vm ?
373 mm->total_vm : tlb_entries;
374 344
375 if ((end - start) >> PAGE_SHIFT > 345 if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1
376 act_entries >> tlb_flushall_shift) 346 || vmflag == VM_HUGETLB) {
377 local_flush_tlb(); 347 local_flush_tlb();
378 else { 348 goto flush_all;
379 if (has_large_page(mm, start, end)) { 349 }
380 preempt_enable();
381 goto flush_all;
382 }
383 for (addr = start; addr < end;
384 addr += PAGE_SIZE)
385 __flush_tlb_single(addr);
386 350
387 if (cpumask_any_but(mm_cpumask(mm), 351 /* In modern CPU, last level tlb used for both data/ins */
388 smp_processor_id()) < nr_cpu_ids) 352 if (vmflag & VM_EXEC)
389 flush_tlb_others(mm_cpumask(mm), mm, 353 tlb_entries = tlb_lli_4k[ENTRIES];
390 start, end); 354 else
391 preempt_enable(); 355 tlb_entries = tlb_lld_4k[ENTRIES];
392 return; 356 /* Assume all of TLB entries was occupied by this task */
393 } 357 act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm;
394 } else { 358
395 leave_mm(smp_processor_id()); 359 /* tlb_flushall_shift is on balance point, details in commit log */
360 if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift)
361 local_flush_tlb();
362 else {
363 if (has_large_page(mm, start, end)) {
364 local_flush_tlb();
365 goto flush_all;
396 } 366 }
367 /* flush range by one by one 'invlpg' */
368 for (addr = start; addr < end; addr += PAGE_SIZE)
369 __flush_tlb_single(addr);
370
371 if (cpumask_any_but(mm_cpumask(mm),
372 smp_processor_id()) < nr_cpu_ids)
373 flush_tlb_others(mm_cpumask(mm), mm, start, end);
374 preempt_enable();
375 return;
397 } 376 }
377
378flush_all:
398 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) 379 if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
399 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); 380 flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
400 preempt_enable(); 381 preempt_enable();
401} 382}
402 383
403
404void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) 384void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
405{ 385{
406 struct mm_struct *mm = vma->vm_mm; 386 struct mm_struct *mm = vma->vm_mm;