aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel/fsys.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64/kernel/fsys.S')
-rw-r--r--arch/ia64/kernel/fsys.S88
1 files changed, 68 insertions, 20 deletions
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
index 44841971f077..c1625c7e1779 100644
--- a/arch/ia64/kernel/fsys.S
+++ b/arch/ia64/kernel/fsys.S
@@ -61,13 +61,29 @@ ENTRY(fsys_getpid)
61 .prologue 61 .prologue
62 .altrp b6 62 .altrp b6
63 .body 63 .body
64 add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
65 ;;
66 ld8 r17=[r17] // r17 = current->group_leader
64 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 67 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
65 ;; 68 ;;
66 ld4 r9=[r9] 69 ld4 r9=[r9]
67 add r8=IA64_TASK_TGID_OFFSET,r16 70 add r17=IA64_TASK_TGIDLINK_OFFSET,r17
68 ;; 71 ;;
69 and r9=TIF_ALLWORK_MASK,r9 72 and r9=TIF_ALLWORK_MASK,r9
70 ld4 r8=[r8] // r8 = current->tgid 73 ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid
74 ;;
75 add r8=IA64_PID_LEVEL_OFFSET,r17
76 ;;
77 ld4 r8=[r8] // r8 = pid->level
78 add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
79 ;;
80 shl r8=r8,IA64_UPID_SHIFT
81 ;;
82 add r17=r17,r8 // r17 = &pid->numbers[pid->level]
83 ;;
84 ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
85 ;;
86 mov r17=0
71 ;; 87 ;;
72 cmp.ne p8,p0=0,r9 88 cmp.ne p8,p0=0,r9
73(p8) br.spnt.many fsys_fallback_syscall 89(p8) br.spnt.many fsys_fallback_syscall
@@ -126,15 +142,25 @@ ENTRY(fsys_set_tid_address)
126 .altrp b6 142 .altrp b6
127 .body 143 .body
128 add r9=TI_FLAGS+IA64_TASK_SIZE,r16 144 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
145 add r17=IA64_TASK_TGIDLINK_OFFSET,r16
129 ;; 146 ;;
130 ld4 r9=[r9] 147 ld4 r9=[r9]
131 tnat.z p6,p7=r32 // check argument register for being NaT 148 tnat.z p6,p7=r32 // check argument register for being NaT
149 ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid
132 ;; 150 ;;
133 and r9=TIF_ALLWORK_MASK,r9 151 and r9=TIF_ALLWORK_MASK,r9
134 add r8=IA64_TASK_PID_OFFSET,r16 152 add r8=IA64_PID_LEVEL_OFFSET,r17
135 add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 153 add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
136 ;; 154 ;;
137 ld4 r8=[r8] 155 ld4 r8=[r8] // r8 = pid->level
156 add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0]
157 ;;
158 shl r8=r8,IA64_UPID_SHIFT
159 ;;
160 add r17=r17,r8 // r17 = &pid->numbers[pid->level]
161 ;;
162 ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr
163 ;;
138 cmp.ne p8,p0=0,r9 164 cmp.ne p8,p0=0,r9
139 mov r17=-1 165 mov r17=-1
140 ;; 166 ;;
@@ -210,27 +236,25 @@ ENTRY(fsys_gettimeofday)
210 // Note that instructions are optimized for McKinley. McKinley can 236 // Note that instructions are optimized for McKinley. McKinley can
211 // process two bundles simultaneously and therefore we continuously 237 // process two bundles simultaneously and therefore we continuously
212 // try to feed the CPU two bundles and then a stop. 238 // try to feed the CPU two bundles and then a stop.
213 // 239
214 // Additional note that code has changed a lot. Optimization is TBD.
215 // Comments begin with "?" are maybe outdated.
216 tnat.nz p6,p0 = r31 // ? branch deferred to fit later bundle
217 mov pr = r30,0xc000 // Set predicates according to function
218 add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 240 add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
241 tnat.nz p6,p0 = r31 // guard against Nat argument
242(p6) br.cond.spnt.few .fail_einval
219 movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address 243 movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address
220 ;; 244 ;;
245 ld4 r2 = [r2] // process work pending flags
221 movl r29 = itc_jitter_data // itc_jitter 246 movl r29 = itc_jitter_data // itc_jitter
222 add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time 247 add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time
223 ld4 r2 = [r2] // process work pending flags
224 ;;
225(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
226 add r21 = IA64_CLKSRC_MMIO_OFFSET,r20 248 add r21 = IA64_CLKSRC_MMIO_OFFSET,r20
227 add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 249 mov pr = r30,0xc000 // Set predicates according to function
250 ;;
228 and r2 = TIF_ALLWORK_MASK,r2 251 and r2 = TIF_ALLWORK_MASK,r2
229(p6) br.cond.spnt.few .fail_einval // ? deferred branch 252 add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29
253(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time
230 ;; 254 ;;
231 add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last 255 add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last
232 cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled 256 cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
233(p6) br.cond.spnt.many fsys_fallback_syscall 257(p6) br.cond.spnt.many fsys_fallback_syscall
234 ;; 258 ;;
235 // Begin critical section 259 // Begin critical section
236.time_redo: 260.time_redo:
@@ -258,7 +282,6 @@ ENTRY(fsys_gettimeofday)
258(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! 282(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
259(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. 283(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues..
260(p13) ld8 r25 = [r19] // get itc_lastcycle value 284(p13) ld8 r25 = [r19] // get itc_lastcycle value
261 ;; // ? could be removed by moving the last add upward
262 ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec 285 ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec
263 ;; 286 ;;
264 ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec 287 ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec
@@ -285,13 +308,12 @@ ENTRY(fsys_gettimeofday)
285EX(.fail_efault, probe.w.fault r31, 3) 308EX(.fail_efault, probe.w.fault r31, 3)
286 xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) 309 xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
287 ;; 310 ;;
288 // ? simulate tbit.nz.or p7,p0 = r28,0
289 getf.sig r2 = f8 311 getf.sig r2 = f8
290 mf 312 mf
291 ;; 313 ;;
292 ld4 r10 = [r20] // gtod_lock.sequence 314 ld4 r10 = [r20] // gtod_lock.sequence
293 shr.u r2 = r2,r23 // shift by factor 315 shr.u r2 = r2,r23 // shift by factor
294 ;; // ? overloaded 3 bundles! 316 ;;
295 add r8 = r8,r2 // Add xtime.nsecs 317 add r8 = r8,r2 // Add xtime.nsecs
296 cmp4.ne p7,p0 = r28,r10 318 cmp4.ne p7,p0 = r28,r10
297(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo 319(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo
@@ -319,9 +341,9 @@ EX(.fail_efault, probe.w.fault r31, 3)
319EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles 341EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
320(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it 342(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it
321 ;; 343 ;;
322 mov r8 = r0
323(p14) getf.sig r2 = f8 344(p14) getf.sig r2 = f8
324 ;; 345 ;;
346 mov r8 = r0
325(p14) shr.u r21 = r2, 4 347(p14) shr.u r21 = r2, 4
326 ;; 348 ;;
327EX(.fail_efault, st8 [r31] = r9) 349EX(.fail_efault, st8 [r31] = r9)
@@ -660,7 +682,11 @@ GLOBAL_ENTRY(fsys_bubble_down)
660 nop.i 0 682 nop.i 0
661 ;; 683 ;;
662 mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 684 mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0
685#ifdef CONFIG_VIRT_CPU_ACCOUNTING
686 mov.m r30=ar.itc // M get cycle for accounting
687#else
663 nop.m 0 688 nop.m 0
689#endif
664 nop.i 0 690 nop.i 0
665 ;; 691 ;;
666 mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore 692 mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore
@@ -682,6 +708,28 @@ GLOBAL_ENTRY(fsys_bubble_down)
682 cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 708 cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1
683 br.call.sptk.many b7=ia64_syscall_setup // B 709 br.call.sptk.many b7=ia64_syscall_setup // B
684 ;; 710 ;;
711#ifdef CONFIG_VIRT_CPU_ACCOUNTING
712 // mov.m r30=ar.itc is called in advance
713 add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2
714 add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2
715 ;;
716 ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel
717 ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel
718 ;;
719 ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime
720 ld8 r21=[r17] // cumulated utime
721 sub r22=r19,r18 // stime before leave kernel
722 ;;
723 st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp
724 sub r18=r30,r19 // elapsed time in user mode
725 ;;
726 add r20=r20,r22 // sum stime
727 add r21=r21,r18 // sum utime
728 ;;
729 st8 [r16]=r20 // update stime
730 st8 [r17]=r21 // update utime
731 ;;
732#endif
685 mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 733 mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0
686 mov rp=r14 // I0 set the real return addr 734 mov rp=r14 // I0 set the real return addr
687 and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A 735 and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A