aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2008-04-17 13:12:07 -0400
committerTony Luck <tony.luck@intel.com>2008-04-17 13:12:07 -0400
commit2a467d5f7d6bdc90c365db167a10022dd8351894 (patch)
tree687214aea0fd9a8dc716e14ca8becaeee2836a96 /arch
parent4b119e21d0c66c22e8ca03df05d9de623d0eb50f (diff)
parent4fe01c68eba53c3f324807faff71535218c41e9c (diff)
Pull fsys_gettimeofday into release branch
Diffstat (limited to 'arch')
-rw-r--r--arch/ia64/kernel/fsys.S28
-rw-r--r--arch/ia64/kernel/patch.c8
2 files changed, 16 insertions, 20 deletions
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 44841971f077..6a72db7ddecc 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -210,27 +210,25 @@ ENTRY(fsys_gettimeofday)
210 // Note that instructions are optimized for McKinley. McKinley can 210 // Note that instructions are optimized for McKinley. McKinley can
211 // process two bundles simultaneously and therefore we continuously 211 // process two bundles simultaneously and therefore we continuously
212 // try to feed the CPU two bundles and then a stop. 212 // try to feed the CPU two bundles and then a stop.
213 // 213
214 // Additional note that code has changed a lot. Optimization is TBD.
215 // Comments begin with "?" are maybe outdated.
216 tnat.nz p6,p0 = r31 // ? branch deferred to fit later bundle
217 mov pr = r30,0xc000 // Set predicates according to function
218 add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 214 add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
215 tnat.nz p6,p0 = r31 // guard against Nat argument
216(p6) br.cond.spnt.few .fail_einval
219 movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address 217 movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
220 ;; 218 ;;
219 ld4 r2 = [r2] // process work pending flags
221 movl r29 = itc_jitter_data // itc_jitter 220 movl r29 = itc_jitter_data // itc_jitter
222 add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time 221 add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time
223 ld4 r2 = [r2] // process work pending flags
224 ;;
225(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
226 add r21 = IA64_CLKSRC_MMIO_OFFSET,r20 222 add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
227 add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 223 mov pr = r30,0xc000 // Set predicates according to function
224 ;;
228 and r2 = TIF_ALLWORK_MASK,r2 225 and r2 = TIF_ALLWORK_MASK,r2
229(p6) br.cond.spnt.few .fail_einval // ? deferred branch 226 add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
227(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
230 ;; 228 ;;
231 add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last 229 add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
232 cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled 230 cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
233(p6) br.cond.spnt.many fsys_fallback_syscall 231(p6) br.cond.spnt.many fsys_fallback_syscall
234 ;; 232 ;;
235 // Begin critical section 233 // Begin critical section
236.time_redo: 234.time_redo:
@@ -258,7 +256,6 @@ ENTRY(fsys_gettimeofday)
258(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! 256(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
259(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. 257(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues..
260(p13) ld8 r25 = [r19] // get itc_lastcycle value 258(p13) ld8 r25 = [r19] // get itc_lastcycle value
261 ;; // ? could be removed by moving the last add upward
262 ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec 259 ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec
263 ;; 260 ;;
264 ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec 261 ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec
@@ -285,13 +282,12 @@ ENTRY(fsys_gettimeofday)
285EX(.fail_efault, probe.w.fault r31, 3) 282EX(.fail_efault, probe.w.fault r31, 3)
286 xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) 283 xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
287 ;; 284 ;;
288 // ? simulate tbit.nz.or p7,p0 = r28,0
289 getf.sig r2 = f8 285 getf.sig r2 = f8
290 mf 286 mf
291 ;; 287 ;;
292 ld4 r10 = [r20] // gtod_lock.sequence 288 ld4 r10 = [r20] // gtod_lock.sequence
293 shr.u r2 = r2,r23 // shift by factor 289 shr.u r2 = r2,r23 // shift by factor
294 ;; // ? overloaded 3 bundles! 290 ;;
295 add r8 = r8,r2 // Add xtime.nsecs 291 add r8 = r8,r2 // Add xtime.nsecs
296 cmp4.ne p7,p0 = r28,r10 292 cmp4.ne p7,p0 = r28,r10
297(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo 293(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo
@@ -319,9 +315,9 @@ EX(.fail_efault, probe.w.fault r31, 3)
319EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles 315EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
320(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it 316(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it
321 ;; 317 ;;
322 mov r8 = r0
323(p14) getf.sig r2 = f8 318(p14) getf.sig r2 = f8
324 ;; 319 ;;
320 mov r8 = r0
325(p14) shr.u r21 = r2, 4 321(p14) shr.u r21 = r2, 4
326 ;; 322 ;;
327EX(.fail_efault, st8 [r31] = r9) 323EX(.fail_efault, st8 [r31] = r9)
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
index 2cb9425e0421..e0dca8743dbb 100644
--- a/arch/ia64/kernel/patch.c
+++ b/arch/ia64/kernel/patch.c
@@ -135,10 +135,10 @@ ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
135 135
136 while (offp < (s32 *) end) { 136 while (offp < (s32 *) end) {
137 wp = (u64 *) ia64_imva((char *) offp + *offp); 137 wp = (u64 *) ia64_imva((char *) offp + *offp);
138 wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */ 138 wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
139 wp[1] = 0x0004000000000200UL; 139 wp[1] = 0x0084006880000200UL;
140 wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */ 140 wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
141 wp[3] = 0x0084006880000200UL; 141 wp[3] = 0x0004000000000200UL;
142 ia64_fc(wp); ia64_fc(wp + 2); 142 ia64_fc(wp); ia64_fc(wp + 2);
143 ++offp; 143 ++offp;
144 } 144 }