aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlex Shi <alex.shi@intel.com>2012-06-27 21:02:19 -0400
committerH. Peter Anvin <hpa@zytor.com>2012-06-27 22:29:10 -0400
commitc4211f42d3e66875298a5e26a75109878c80f15b (patch)
tree5f4db23b52be8eb74f95c35621373df790eacdd2
parentd8dfe60d6dcad5989c4558b753b98d657e2813c0 (diff)
x86/tlb: add tlb_flushall_shift for specific CPU
Testing show different CPU type(micro architectures and NUMA mode) has different balance points between the TLB flush all and multiple invlpg. And there also has cases the tlb flush change has no any help. This patch give a interface to let x86 vendor developers have a chance to set different shift for different CPU type. like some machine in my hands, balance points is 16 entries on Romely-EP; while it is at 8 entries on Bloomfield NHM-EP; and is 256 on IVB mobile CPU. but on model 15 core2 Xeon using invlpg has nothing help. For untested machine, do a conservative optimization, same as NHM CPU. Signed-off-by: Alex Shi <alex.shi@intel.com> Link: http://lkml.kernel.org/r/1340845344-27557-5-git-send-email-alex.shi@intel.com Signed-off-by: H. Peter Anvin <hpa@zytor.com>
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/kernel/cpu/common.c14
-rw-r--r--arch/x86/kernel/cpu/intel.c34
-rw-r--r--arch/x86/mm/tlb.c7
-rw-r--r--include/asm-generic/tlb.h3
5 files changed, 53 insertions, 7 deletions
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 39b2bd48dfbc..d048cad9bcad 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -72,6 +72,8 @@ extern u16 __read_mostly tlb_lli_4m[NR_INFO];
72extern u16 __read_mostly tlb_lld_4k[NR_INFO]; 72extern u16 __read_mostly tlb_lld_4k[NR_INFO];
73extern u16 __read_mostly tlb_lld_2m[NR_INFO]; 73extern u16 __read_mostly tlb_lld_2m[NR_INFO];
74extern u16 __read_mostly tlb_lld_4m[NR_INFO]; 74extern u16 __read_mostly tlb_lld_4m[NR_INFO];
75extern s8 __read_mostly tlb_flushall_shift;
76
75/* 77/*
76 * CPU type and hardware bug flags. Kept separately for each CPU. 78 * CPU type and hardware bug flags. Kept separately for each CPU.
77 * Members of this structure are referenced in head.S, so think twice 79 * Members of this structure are referenced in head.S, so think twice
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index b2016df00813..7595552600b8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -459,16 +459,26 @@ u16 __read_mostly tlb_lld_4k[NR_INFO];
459u16 __read_mostly tlb_lld_2m[NR_INFO]; 459u16 __read_mostly tlb_lld_2m[NR_INFO];
460u16 __read_mostly tlb_lld_4m[NR_INFO]; 460u16 __read_mostly tlb_lld_4m[NR_INFO];
461 461
462/*
463 * tlb_flushall_shift shows the balance point in replacing cr3 write
464 * with multiple 'invlpg'. It will do this replacement when
465 * flush_tlb_lines <= active_lines/2^tlb_flushall_shift.
466 * If tlb_flushall_shift is -1, means the replacement will be disabled.
467 */
468s8 __read_mostly tlb_flushall_shift = -1;
469
462void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c) 470void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
463{ 471{
464 if (this_cpu->c_detect_tlb) 472 if (this_cpu->c_detect_tlb)
465 this_cpu->c_detect_tlb(c); 473 this_cpu->c_detect_tlb(c);
466 474
467 printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ 475 printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
468 "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n", 476 "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
477 "tlb_flushall_shift is 0x%x\n",
469 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], 478 tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
470 tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], 479 tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
471 tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES]); 480 tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
481 tlb_flushall_shift);
472} 482}
473 483
474void __cpuinit detect_ht(struct cpuinfo_x86 *c) 484void __cpuinit detect_ht(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index ed0d512cf51b..0a4ce2980a5a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -610,6 +610,39 @@ static void __cpuinit intel_tlb_lookup(const unsigned char desc)
610 } 610 }
611} 611}
612 612
613static void __cpuinit intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
614{
615 if (!cpu_has_invlpg) {
616 tlb_flushall_shift = -1;
617 return;
618 }
619 switch ((c->x86 << 8) + c->x86_model) {
620 case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
621 case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
622 case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
623 case 0x61d: /* six-core 45 nm xeon "Dunnington" */
624 tlb_flushall_shift = -1;
625 break;
626 case 0x61a: /* 45 nm nehalem, "Bloomfield" */
627 case 0x61e: /* 45 nm nehalem, "Lynnfield" */
628 case 0x625: /* 32 nm nehalem, "Clarkdale" */
629 case 0x62c: /* 32 nm nehalem, "Gulftown" */
630 case 0x62e: /* 45 nm nehalem-ex, "Beckton" */
631 case 0x62f: /* 32 nm Xeon E7 */
632 tlb_flushall_shift = 6;
633 break;
634 case 0x62a: /* SandyBridge */
635 case 0x62d: /* SandyBridge, "Romely-EP" */
636 tlb_flushall_shift = 5;
637 break;
638 case 0x63a: /* Ivybridge */
639 tlb_flushall_shift = 1;
640 break;
641 default:
642 tlb_flushall_shift = 6;
643 }
644}
645
613static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c) 646static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
614{ 647{
615 int i, j, n; 648 int i, j, n;
@@ -630,6 +663,7 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
630 for (j = 1 ; j < 16 ; j++) 663 for (j = 1 ; j < 16 ; j++)
631 intel_tlb_lookup(desc[j]); 664 intel_tlb_lookup(desc[j]);
632 } 665 }
666 intel_tlb_flushall_shift_set(c);
633} 667}
634 668
635static const struct cpu_dev __cpuinitconst intel_cpu_dev = { 669static const struct cpu_dev __cpuinitconst intel_cpu_dev = {
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 184a02a4d871..2939f2f9edbb 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -316,8 +316,6 @@ void flush_tlb_mm(struct mm_struct *mm)
316 preempt_enable(); 316 preempt_enable();
317} 317}
318 318
319#define FLUSHALL_BAR 16
320
321#ifdef CONFIG_TRANSPARENT_HUGEPAGE 319#ifdef CONFIG_TRANSPARENT_HUGEPAGE
322static inline unsigned long has_large_page(struct mm_struct *mm, 320static inline unsigned long has_large_page(struct mm_struct *mm,
323 unsigned long start, unsigned long end) 321 unsigned long start, unsigned long end)
@@ -352,7 +350,7 @@ void flush_tlb_range(struct vm_area_struct *vma,
352{ 350{
353 struct mm_struct *mm; 351 struct mm_struct *mm;
354 352
355 if (!cpu_has_invlpg || vma->vm_flags & VM_HUGETLB) { 353 if (vma->vm_flags & VM_HUGETLB || tlb_flushall_shift == -1) {
356flush_all: 354flush_all:
357 flush_tlb_mm(vma->vm_mm); 355 flush_tlb_mm(vma->vm_mm);
358 return; 356 return;
@@ -373,7 +371,8 @@ flush_all:
373 act_entries = tlb_entries > mm->total_vm ? 371 act_entries = tlb_entries > mm->total_vm ?
374 mm->total_vm : tlb_entries; 372 mm->total_vm : tlb_entries;
375 373
376 if ((end - start)/PAGE_SIZE > act_entries/FLUSHALL_BAR) 374 if ((end - start) >> PAGE_SHIFT >
375 act_entries >> tlb_flushall_shift)
377 local_flush_tlb(); 376 local_flush_tlb();
378 else { 377 else {
379 if (has_large_page(mm, start, end)) { 378 if (has_large_page(mm, start, end)) {
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index f96a5b58a975..75e888b3cfd2 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -113,7 +113,8 @@ static inline int tlb_fast_mode(struct mmu_gather *tlb)
113 113
114void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm); 114void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm);
115void tlb_flush_mmu(struct mmu_gather *tlb); 115void tlb_flush_mmu(struct mmu_gather *tlb);
116void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end); 116void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start,
117 unsigned long end);
117int __tlb_remove_page(struct mmu_gather *tlb, struct page *page); 118int __tlb_remove_page(struct mmu_gather *tlb, struct page *page);
118 119
119/* tlb_remove_page 120/* tlb_remove_page