aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/mm
diff options
context:
space:
mode:
authorChen, Kenneth W <kenneth.w.chen@intel.com>2006-10-13 13:08:13 -0400
committerTony Luck <tony.luck@intel.com>2007-02-06 18:04:48 -0500
commit00b65985fb2fc542b855b03fcda0d0f2bab4f442 (patch)
treedc9372aced10184945862b9adf0848da3e0e946f /arch/ia64/mm
parenta0776ec8e97bf109e7d973d09fc3e1814eb32bfb (diff)
[IA64] relax per-cpu TLB requirement to DTC
Instead of pinning per-cpu TLB into a DTR, use DTC. This will free up one TLB entry for application, or even kernel if access pattern to per-cpu data area has high temporal locality. Since per-cpu is mapped at the top of region 7 address, we just need to add special case in alt_dtlb_miss. The physical address of per-cpu data is already conveniently stored in IA64_KR(PER_CPU_DATA). Latency for alt_dtlb_miss is not affected as we can hide all the latency. It was measured that alt_dtlb_miss handler has 23 cycles latency before and after the patch. The performance effect is massive for applications that put lots of tlb pressure on CPU. Workload environment like database online transaction processing or application uses tera-byte of memory would benefit the most. Measurement with industry standard database benchmark shown an upward of 1.6% gain. While smaller workloads like cpu, java also showing small improvement. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/mm')
-rw-r--r--arch/ia64/mm/init.c11
1 files changed, 1 insertions, 10 deletions
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 1373fae7657f..07d82cd7cbdd 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -337,7 +337,7 @@ setup_gate (void)
337void __devinit 337void __devinit
338ia64_mmu_init (void *my_cpu_data) 338ia64_mmu_init (void *my_cpu_data)
339{ 339{
340 unsigned long psr, pta, impl_va_bits; 340 unsigned long pta, impl_va_bits;
341 extern void __devinit tlb_init (void); 341 extern void __devinit tlb_init (void);
342 342
343#ifdef CONFIG_DISABLE_VHPT 343#ifdef CONFIG_DISABLE_VHPT
@@ -346,15 +346,6 @@ ia64_mmu_init (void *my_cpu_data)
346# define VHPT_ENABLE_BIT 1 346# define VHPT_ENABLE_BIT 1
347#endif 347#endif
348 348
349 /* Pin mapping for percpu area into TLB */
350 psr = ia64_clear_ic();
351 ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
352 pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
353 PERCPU_PAGE_SHIFT);
354
355 ia64_set_psr(psr);
356 ia64_srlz_i();
357
358 /* 349 /*
359 * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped 350 * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
360 * address space. The IA-64 architecture guarantees that at least 50 bits of 351 * address space. The IA-64 architecture guarantees that at least 50 bits of