aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel/mca_asm.S
diff options
context:
space:
mode:
authorChen, Kenneth W <kenneth.w.chen@intel.com>2006-10-13 13:08:13 -0400
committerTony Luck <tony.luck@intel.com>2007-02-06 18:04:48 -0500
commit00b65985fb2fc542b855b03fcda0d0f2bab4f442 (patch)
treedc9372aced10184945862b9adf0848da3e0e946f /arch/ia64/kernel/mca_asm.S
parenta0776ec8e97bf109e7d973d09fc3e1814eb32bfb (diff)
[IA64] relax per-cpu TLB requirement to DTC
Instead of pinning per-cpu TLB into a DTR, use DTC. This will free up one TLB entry for application, or even kernel if access pattern to per-cpu data area has high temporal locality. Since per-cpu is mapped at the top of region 7 address, we just need to add special case in alt_dtlb_miss. The physical address of per-cpu data is already conveniently stored in IA64_KR(PER_CPU_DATA). Latency for alt_dtlb_miss is not affected as we can hide all the latency. It was measured that alt_dtlb_miss handler has 23 cycles latency before and after the patch. The performance effect is massive for applications that put lots of tlb pressure on CPU. Workload environment like database online transaction processing or application uses tera-byte of memory would benefit the most. Measurement with industry standard database benchmark shown an upward of 1.6% gain. While smaller workloads like cpu, java also showing small improvement. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/mca_asm.S')
-rw-r--r--arch/ia64/kernel/mca_asm.S24
1 files changed, 0 insertions, 24 deletions
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
index c6b607c00dee..8c9c26aa6ae0 100644
--- a/arch/ia64/kernel/mca_asm.S
+++ b/arch/ia64/kernel/mca_asm.S
@@ -101,14 +101,6 @@ ia64_do_tlb_purge:
101 ;; 101 ;;
102 srlz.d 102 srlz.d
103 ;; 103 ;;
104 // 2. Purge DTR for PERCPU data.
105 movl r16=PERCPU_ADDR
106 mov r18=PERCPU_PAGE_SHIFT<<2
107 ;;
108 ptr.d r16,r18
109 ;;
110 srlz.d
111 ;;
112 // 3. Purge ITR for PAL code. 104 // 3. Purge ITR for PAL code.
113 GET_THIS_PADDR(r2, ia64_mca_pal_base) 105 GET_THIS_PADDR(r2, ia64_mca_pal_base)
114 ;; 106 ;;
@@ -196,22 +188,6 @@ ia64_reload_tr:
196 srlz.i 188 srlz.i
197 srlz.d 189 srlz.d
198 ;; 190 ;;
199 // 2. Reload DTR register for PERCPU data.
200 GET_THIS_PADDR(r2, ia64_mca_per_cpu_pte)
201 ;;
202 movl r16=PERCPU_ADDR // vaddr
203 movl r18=PERCPU_PAGE_SHIFT<<2
204 ;;
205 mov cr.itir=r18
206 mov cr.ifa=r16
207 ;;
208 ld8 r18=[r2] // load per-CPU PTE
209 mov r16=IA64_TR_PERCPU_DATA;
210 ;;
211 itr.d dtr[r16]=r18
212 ;;
213 srlz.d
214 ;;
215 // 3. Reload ITR for PAL code. 191 // 3. Reload ITR for PAL code.
216 GET_THIS_PADDR(r2, ia64_mca_pal_pte) 192 GET_THIS_PADDR(r2, ia64_mca_pal_pte)
217 ;; 193 ;;