aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorHidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>2007-07-13 19:21:44 -0400
committerTony Luck <tony.luck@intel.com>2007-07-13 19:21:44 -0400
commit829a9996259e4d0b20ce7b94c49b985d6ba6b760 (patch)
tree0ed75d44d279dc233b217a407c7b7f571846235e /arch
parent83e12a076e3587d60cfbe65a761ef54e14a264e3 (diff)
[IA64] ar.itc access must really be after xtime_lock.sequence has been read
The ".acq" semantics of the load only apply w.r.t. other data access. Reading the clock (ar.itc) isn't a data access so strange things can happen here. Specifically the read of ar.itc can be launched as soon as the read of xtime_lock.sequence is ISSUED. Since this may cache miss, and that might cause a thread switch, and there may be cache contention for the line containing xtime_lock, it may be a long time before the actual value is returned, so the ar.itc value may be very stale. Move the consumption of r28 up before the read of ar.itc to make sure that we really have got the current value of xtime_lock.sequence before look at ar.itc. Signed-off-by: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/ia64/kernel/fsys.S4
1 files changed, 3 insertions, 1 deletions
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 8589e84a27c6..3f926c2dc708 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -247,6 +247,9 @@ ENTRY(fsys_gettimeofday)
247.time_redo: 247.time_redo:
248 .pred.rel.mutex p8,p9,p10 248 .pred.rel.mutex p8,p9,p10
249 ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes 249 ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes
250 ;;
251 and r28 = ~1,r28 // Make sequence even to force retry if odd
252 ;;
250(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! 253(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
251 add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20 254 add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
252(p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues.. 255(p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues..
@@ -284,7 +287,6 @@ EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have spare
284(p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET 287(p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
285(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo 288(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo
286 // simulate tbit.nz.or p7,p0 = r28,0 289 // simulate tbit.nz.or p7,p0 = r28,0
287 and r28 = ~1,r28 // Make sequence even to force retry if odd
288 getf.sig r2 = f8 290 getf.sig r2 = f8
289 mf 291 mf
290 add r8 = r8,r18 // Add time interpolator offset 292 add r8 = r8,r18 // Add time interpolator offset