diff options
Diffstat (limited to 'arch/ia64/kernel/fsys.S')
-rw-r--r-- | arch/ia64/kernel/fsys.S | 88 |
1 files changed, 68 insertions, 20 deletions
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index 44841971f077..c1625c7e1779 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S | |||
@@ -61,13 +61,29 @@ ENTRY(fsys_getpid) | |||
61 | .prologue | 61 | .prologue |
62 | .altrp b6 | 62 | .altrp b6 |
63 | .body | 63 | .body |
64 | add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 | ||
65 | ;; | ||
66 | ld8 r17=[r17] // r17 = current->group_leader | ||
64 | add r9=TI_FLAGS+IA64_TASK_SIZE,r16 | 67 | add r9=TI_FLAGS+IA64_TASK_SIZE,r16 |
65 | ;; | 68 | ;; |
66 | ld4 r9=[r9] | 69 | ld4 r9=[r9] |
67 | add r8=IA64_TASK_TGID_OFFSET,r16 | 70 | add r17=IA64_TASK_TGIDLINK_OFFSET,r17 |
68 | ;; | 71 | ;; |
69 | and r9=TIF_ALLWORK_MASK,r9 | 72 | and r9=TIF_ALLWORK_MASK,r9 |
70 | ld4 r8=[r8] // r8 = current->tgid | 73 | ld8 r17=[r17] // r17 = current->group_leader->pids[PIDTYPE_PID].pid |
74 | ;; | ||
75 | add r8=IA64_PID_LEVEL_OFFSET,r17 | ||
76 | ;; | ||
77 | ld4 r8=[r8] // r8 = pid->level | ||
78 | add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] | ||
79 | ;; | ||
80 | shl r8=r8,IA64_UPID_SHIFT | ||
81 | ;; | ||
82 | add r17=r17,r8 // r17 = &pid->numbers[pid->level] | ||
83 | ;; | ||
84 | ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr | ||
85 | ;; | ||
86 | mov r17=0 | ||
71 | ;; | 87 | ;; |
72 | cmp.ne p8,p0=0,r9 | 88 | cmp.ne p8,p0=0,r9 |
73 | (p8) br.spnt.many fsys_fallback_syscall | 89 | (p8) br.spnt.many fsys_fallback_syscall |
@@ -126,15 +142,25 @@ ENTRY(fsys_set_tid_address) | |||
126 | .altrp b6 | 142 | .altrp b6 |
127 | .body | 143 | .body |
128 | add r9=TI_FLAGS+IA64_TASK_SIZE,r16 | 144 | add r9=TI_FLAGS+IA64_TASK_SIZE,r16 |
145 | add r17=IA64_TASK_TGIDLINK_OFFSET,r16 | ||
129 | ;; | 146 | ;; |
130 | ld4 r9=[r9] | 147 | ld4 r9=[r9] |
131 | tnat.z p6,p7=r32 // check argument register for being NaT | 148 | tnat.z p6,p7=r32 // check argument register for being NaT |
149 | ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid | ||
132 | ;; | 150 | ;; |
133 | and r9=TIF_ALLWORK_MASK,r9 | 151 | and r9=TIF_ALLWORK_MASK,r9 |
134 | add r8=IA64_TASK_PID_OFFSET,r16 | 152 | add r8=IA64_PID_LEVEL_OFFSET,r17 |
135 | add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 | 153 | add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 |
136 | ;; | 154 | ;; |
137 | ld4 r8=[r8] | 155 | ld4 r8=[r8] // r8 = pid->level |
156 | add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] | ||
157 | ;; | ||
158 | shl r8=r8,IA64_UPID_SHIFT | ||
159 | ;; | ||
160 | add r17=r17,r8 // r17 = &pid->numbers[pid->level] | ||
161 | ;; | ||
162 | ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr | ||
163 | ;; | ||
138 | cmp.ne p8,p0=0,r9 | 164 | cmp.ne p8,p0=0,r9 |
139 | mov r17=-1 | 165 | mov r17=-1 |
140 | ;; | 166 | ;; |
@@ -210,27 +236,25 @@ ENTRY(fsys_gettimeofday) | |||
210 | // Note that instructions are optimized for McKinley. McKinley can | 236 | // Note that instructions are optimized for McKinley. McKinley can |
211 | // process two bundles simultaneously and therefore we continuously | 237 | // process two bundles simultaneously and therefore we continuously |
212 | // try to feed the CPU two bundles and then a stop. | 238 | // try to feed the CPU two bundles and then a stop. |
213 | // | 239 | |
214 | // Additional note that code has changed a lot. Optimization is TBD. | ||
215 | // Comments begin with "?" are maybe outdated. | ||
216 | tnat.nz p6,p0 = r31 // ? branch deferred to fit later bundle | ||
217 | mov pr = r30,0xc000 // Set predicates according to function | ||
218 | add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 | 240 | add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 |
241 | tnat.nz p6,p0 = r31 // guard against Nat argument | ||
242 | (p6) br.cond.spnt.few .fail_einval | ||
219 | movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address | 243 | movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address |
220 | ;; | 244 | ;; |
245 | ld4 r2 = [r2] // process work pending flags | ||
221 | movl r29 = itc_jitter_data // itc_jitter | 246 | movl r29 = itc_jitter_data // itc_jitter |
222 | add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time | 247 | add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time |
223 | ld4 r2 = [r2] // process work pending flags | ||
224 | ;; | ||
225 | (p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time | ||
226 | add r21 = IA64_CLKSRC_MMIO_OFFSET,r20 | 248 | add r21 = IA64_CLKSRC_MMIO_OFFSET,r20 |
227 | add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 | 249 | mov pr = r30,0xc000 // Set predicates according to function |
250 | ;; | ||
228 | and r2 = TIF_ALLWORK_MASK,r2 | 251 | and r2 = TIF_ALLWORK_MASK,r2 |
229 | (p6) br.cond.spnt.few .fail_einval // ? deferred branch | 252 | add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 |
253 | (p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time | ||
230 | ;; | 254 | ;; |
231 | add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last | 255 | add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last |
232 | cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled | 256 | cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled |
233 | (p6) br.cond.spnt.many fsys_fallback_syscall | 257 | (p6) br.cond.spnt.many fsys_fallback_syscall |
234 | ;; | 258 | ;; |
235 | // Begin critical section | 259 | // Begin critical section |
236 | .time_redo: | 260 | .time_redo: |
@@ -258,7 +282,6 @@ ENTRY(fsys_gettimeofday) | |||
258 | (p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! | 282 | (p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! |
259 | (p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. | 283 | (p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. |
260 | (p13) ld8 r25 = [r19] // get itc_lastcycle value | 284 | (p13) ld8 r25 = [r19] // get itc_lastcycle value |
261 | ;; // ? could be removed by moving the last add upward | ||
262 | ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec | 285 | ld8 r9 = [r22],IA64_TIMESPEC_TV_NSEC_OFFSET // tv_sec |
263 | ;; | 286 | ;; |
264 | ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec | 287 | ld8 r8 = [r22],-IA64_TIMESPEC_TV_NSEC_OFFSET // tv_nsec |
@@ -285,13 +308,12 @@ ENTRY(fsys_gettimeofday) | |||
285 | EX(.fail_efault, probe.w.fault r31, 3) | 308 | EX(.fail_efault, probe.w.fault r31, 3) |
286 | xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) | 309 | xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) |
287 | ;; | 310 | ;; |
288 | // ? simulate tbit.nz.or p7,p0 = r28,0 | ||
289 | getf.sig r2 = f8 | 311 | getf.sig r2 = f8 |
290 | mf | 312 | mf |
291 | ;; | 313 | ;; |
292 | ld4 r10 = [r20] // gtod_lock.sequence | 314 | ld4 r10 = [r20] // gtod_lock.sequence |
293 | shr.u r2 = r2,r23 // shift by factor | 315 | shr.u r2 = r2,r23 // shift by factor |
294 | ;; // ? overloaded 3 bundles! | 316 | ;; |
295 | add r8 = r8,r2 // Add xtime.nsecs | 317 | add r8 = r8,r2 // Add xtime.nsecs |
296 | cmp4.ne p7,p0 = r28,r10 | 318 | cmp4.ne p7,p0 = r28,r10 |
297 | (p7) br.cond.dpnt.few .time_redo // sequence number changed, redo | 319 | (p7) br.cond.dpnt.few .time_redo // sequence number changed, redo |
@@ -319,9 +341,9 @@ EX(.fail_efault, probe.w.fault r31, 3) | |||
319 | EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles | 341 | EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles |
320 | (p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it | 342 | (p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it |
321 | ;; | 343 | ;; |
322 | mov r8 = r0 | ||
323 | (p14) getf.sig r2 = f8 | 344 | (p14) getf.sig r2 = f8 |
324 | ;; | 345 | ;; |
346 | mov r8 = r0 | ||
325 | (p14) shr.u r21 = r2, 4 | 347 | (p14) shr.u r21 = r2, 4 |
326 | ;; | 348 | ;; |
327 | EX(.fail_efault, st8 [r31] = r9) | 349 | EX(.fail_efault, st8 [r31] = r9) |
@@ -660,7 +682,11 @@ GLOBAL_ENTRY(fsys_bubble_down) | |||
660 | nop.i 0 | 682 | nop.i 0 |
661 | ;; | 683 | ;; |
662 | mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 | 684 | mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 |
685 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
686 | mov.m r30=ar.itc // M get cycle for accounting | ||
687 | #else | ||
663 | nop.m 0 | 688 | nop.m 0 |
689 | #endif | ||
664 | nop.i 0 | 690 | nop.i 0 |
665 | ;; | 691 | ;; |
666 | mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore | 692 | mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore |
@@ -682,6 +708,28 @@ GLOBAL_ENTRY(fsys_bubble_down) | |||
682 | cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 | 708 | cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 |
683 | br.call.sptk.many b7=ia64_syscall_setup // B | 709 | br.call.sptk.many b7=ia64_syscall_setup // B |
684 | ;; | 710 | ;; |
711 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
712 | // mov.m r30=ar.itc is called in advance | ||
713 | add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 | ||
714 | add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 | ||
715 | ;; | ||
716 | ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel | ||
717 | ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel | ||
718 | ;; | ||
719 | ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime | ||
720 | ld8 r21=[r17] // cumulated utime | ||
721 | sub r22=r19,r18 // stime before leave kernel | ||
722 | ;; | ||
723 | st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp | ||
724 | sub r18=r30,r19 // elapsed time in user mode | ||
725 | ;; | ||
726 | add r20=r20,r22 // sum stime | ||
727 | add r21=r21,r18 // sum utime | ||
728 | ;; | ||
729 | st8 [r16]=r20 // update stime | ||
730 | st8 [r17]=r21 // update utime | ||
731 | ;; | ||
732 | #endif | ||
685 | mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 | 733 | mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 |
686 | mov rp=r14 // I0 set the real return addr | 734 | mov rp=r14 // I0 set the real return addr |
687 | and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A | 735 | and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A |