aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel/ivt.S
diff options
context:
space:
mode:
authorChen, Kenneth W <kenneth.w.chen@intel.com>2006-10-13 13:08:13 -0400
committerTony Luck <tony.luck@intel.com>2007-02-06 18:04:48 -0500
commit00b65985fb2fc542b855b03fcda0d0f2bab4f442 (patch)
treedc9372aced10184945862b9adf0848da3e0e946f /arch/ia64/kernel/ivt.S
parenta0776ec8e97bf109e7d973d09fc3e1814eb32bfb (diff)
[IA64] relax per-cpu TLB requirement to DTC
Instead of pinning per-cpu TLB into a DTR, use DTC. This will free up one TLB entry for application, or even kernel if access pattern to per-cpu data area has high temporal locality. Since per-cpu is mapped at the top of region 7 address, we just need to add special case in alt_dtlb_miss. The physical address of per-cpu data is already conveniently stored in IA64_KR(PER_CPU_DATA). Latency for alt_dtlb_miss is not affected as we can hide all the latency. It was measured that alt_dtlb_miss handler has 23 cycles latency before and after the patch. The performance effect is massive for applications that put lots of tlb pressure on CPU. Workload environment like database online transaction processing or application uses tera-byte of memory would benefit the most. Measurement with industry standard database benchmark shown an upward of 1.6% gain. While smaller workloads like cpu, java also showing small improvement. Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/ivt.S')
-rw-r--r--arch/ia64/kernel/ivt.S19
1 files changed, 14 insertions, 5 deletions
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 6b7fcbd3f6f1..34f44d8be00d 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -374,6 +374,7 @@ ENTRY(alt_dtlb_miss)
374 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) 374 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
375 mov r21=cr.ipsr 375 mov r21=cr.ipsr
376 mov r31=pr 376 mov r31=pr
377 mov r24=PERCPU_ADDR
377 ;; 378 ;;
378#ifdef CONFIG_DISABLE_VHPT 379#ifdef CONFIG_DISABLE_VHPT
379 shr.u r22=r16,61 // get the region number into r21 380 shr.u r22=r16,61 // get the region number into r21
@@ -386,22 +387,30 @@ ENTRY(alt_dtlb_miss)
386(p8) mov r29=b0 // save b0 387(p8) mov r29=b0 // save b0
387(p8) br.cond.dptk dtlb_fault 388(p8) br.cond.dptk dtlb_fault
388#endif 389#endif
390 cmp.ge p10,p11=r16,r24 // access to per_cpu_data?
391 tbit.z p12,p0=r16,61 // access to region 6?
392 mov r25=PERCPU_PAGE_SHIFT << 2
393 mov r26=PERCPU_PAGE_SIZE
394 nop.m 0
395 nop.b 0
396 ;;
397(p10) mov r19=IA64_KR(PER_CPU_DATA)
398(p11) and r19=r19,r16 // clear non-ppn fields
389 extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl 399 extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
390 and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field 400 and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field
391 tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? 401 tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on?
392 shr.u r18=r16,57 // move address bit 61 to bit 4
393 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
394 tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? 402 tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
395 ;; 403 ;;
396 andcm r18=0x10,r18 // bit 4=~address-bit(61) 404(p10) sub r19=r19,r26
405(p10) mov cr.itir=r25
397 cmp.ne p8,p0=r0,r23 406 cmp.ne p8,p0=r0,r23
398(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field 407(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
408(p12) dep r17=-1,r17,4,1 // set ma=UC for region 6 addr
399(p8) br.cond.spnt page_fault 409(p8) br.cond.spnt page_fault
400 410
401 dep r21=-1,r21,IA64_PSR_ED_BIT,1 411 dep r21=-1,r21,IA64_PSR_ED_BIT,1
402 or r19=r19,r17 // insert PTE control bits into r19
403 ;; 412 ;;
404 or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 413 or r19=r19,r17 // insert PTE control bits into r19
405(p6) mov cr.ipsr=r21 414(p6) mov cr.ipsr=r21
406 ;; 415 ;;
407(p7) itc.d r19 // insert the TLB entry 416(p7) itc.d r19 // insert the TLB entry