aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/kernel/intvec_32.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/tile/kernel/intvec_32.S')
-rw-r--r--arch/tile/kernel/intvec_32.S360
1 files changed, 131 insertions, 229 deletions
diff --git a/arch/tile/kernel/intvec_32.S b/arch/tile/kernel/intvec_32.S
index 8f58bdff20d7..72ade79b621b 100644
--- a/arch/tile/kernel/intvec_32.S
+++ b/arch/tile/kernel/intvec_32.S
@@ -32,10 +32,6 @@
32# error "No support for kernel preemption currently" 32# error "No support for kernel preemption currently"
33#endif 33#endif
34 34
35#if INT_INTCTRL_1 < 32 || INT_INTCTRL_1 >= 48
36# error INT_INTCTRL_1 coded to set high interrupt mask
37#endif
38
39#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg) 35#define PTREGS_PTR(reg, ptreg) addli reg, sp, C_ABI_SAVE_AREA_SIZE + (ptreg)
40 36
41#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR) 37#define PTREGS_OFFSET_SYSCALL PTREGS_OFFSET_REG(TREG_SYSCALL_NR)
@@ -132,8 +128,8 @@ intvec_\vecname:
132 128
133 /* Temporarily save a register so we have somewhere to work. */ 129 /* Temporarily save a register so we have somewhere to work. */
134 130
135 mtspr SYSTEM_SAVE_1_1, r0 131 mtspr SPR_SYSTEM_SAVE_K_1, r0
136 mfspr r0, EX_CONTEXT_1_1 132 mfspr r0, SPR_EX_CONTEXT_K_1
137 133
138 /* The cmpxchg code clears sp to force us to reset it here on fault. */ 134 /* The cmpxchg code clears sp to force us to reset it here on fault. */
139 { 135 {
@@ -167,18 +163,18 @@ intvec_\vecname:
167 * The page_fault handler may be downcalled directly by the 163 * The page_fault handler may be downcalled directly by the
168 * hypervisor even when Linux is running and has ICS set. 164 * hypervisor even when Linux is running and has ICS set.
169 * 165 *
170 * In this case the contents of EX_CONTEXT_1_1 reflect the 166 * In this case the contents of EX_CONTEXT_K_1 reflect the
171 * previous fault and can't be relied on to choose whether or 167 * previous fault and can't be relied on to choose whether or
172 * not to reinitialize the stack pointer. So we add a test 168 * not to reinitialize the stack pointer. So we add a test
173 * to see whether SYSTEM_SAVE_1_2 has the high bit set, 169 * to see whether SYSTEM_SAVE_K_2 has the high bit set,
174 * and if so we don't reinitialize sp, since we must be coming 170 * and if so we don't reinitialize sp, since we must be coming
175 * from Linux. (In fact the precise case is !(val & ~1), 171 * from Linux. (In fact the precise case is !(val & ~1),
176 * but any Linux PC has to have the high bit set.) 172 * but any Linux PC has to have the high bit set.)
177 * 173 *
178 * Note that the hypervisor *always* sets SYSTEM_SAVE_1_2 for 174 * Note that the hypervisor *always* sets SYSTEM_SAVE_K_2 for
179 * any path that turns into a downcall to one of our TLB handlers. 175 * any path that turns into a downcall to one of our TLB handlers.
180 */ 176 */
181 mfspr r0, SYSTEM_SAVE_1_2 177 mfspr r0, SPR_SYSTEM_SAVE_K_2
182 { 178 {
183 blz r0, 0f /* high bit in S_S_1_2 is for a PC to use */ 179 blz r0, 0f /* high bit in S_S_1_2 is for a PC to use */
184 move r0, sp 180 move r0, sp
@@ -187,12 +183,12 @@ intvec_\vecname:
187 183
1882: 1842:
189 /* 185 /*
190 * SYSTEM_SAVE_1_0 holds the cpu number in the low bits, and 186 * SYSTEM_SAVE_K_0 holds the cpu number in the low bits, and
191 * the current stack top in the higher bits. So we recover 187 * the current stack top in the higher bits. So we recover
192 * our stack top by just masking off the low bits, then 188 * our stack top by just masking off the low bits, then
193 * point sp at the top aligned address on the actual stack page. 189 * point sp at the top aligned address on the actual stack page.
194 */ 190 */
195 mfspr r0, SYSTEM_SAVE_1_0 191 mfspr r0, SPR_SYSTEM_SAVE_K_0
196 mm r0, r0, zero, LOG2_THREAD_SIZE, 31 192 mm r0, r0, zero, LOG2_THREAD_SIZE, 31
197 193
1980: 1940:
@@ -254,7 +250,7 @@ intvec_\vecname:
254 sw sp, r3 250 sw sp, r3
255 addli sp, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(3) 251 addli sp, sp, PTREGS_OFFSET_PC - PTREGS_OFFSET_REG(3)
256 } 252 }
257 mfspr r0, EX_CONTEXT_1_0 253 mfspr r0, SPR_EX_CONTEXT_K_0
258 .ifc \processing,handle_syscall 254 .ifc \processing,handle_syscall
259 /* 255 /*
260 * Bump the saved PC by one bundle so that when we return, we won't 256 * Bump the saved PC by one bundle so that when we return, we won't
@@ -267,7 +263,7 @@ intvec_\vecname:
267 sw sp, r0 263 sw sp, r0
268 addli sp, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC 264 addli sp, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC
269 } 265 }
270 mfspr r0, EX_CONTEXT_1_1 266 mfspr r0, SPR_EX_CONTEXT_K_1
271 { 267 {
272 sw sp, r0 268 sw sp, r0
273 addi sp, sp, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1 269 addi sp, sp, PTREGS_OFFSET_FAULTNUM - PTREGS_OFFSET_EX1
@@ -289,7 +285,7 @@ intvec_\vecname:
289 .endif 285 .endif
290 addli sp, sp, PTREGS_OFFSET_REG(0) - PTREGS_OFFSET_FAULTNUM 286 addli sp, sp, PTREGS_OFFSET_REG(0) - PTREGS_OFFSET_FAULTNUM
291 } 287 }
292 mfspr r0, SYSTEM_SAVE_1_1 /* Original r0 */ 288 mfspr r0, SPR_SYSTEM_SAVE_K_1 /* Original r0 */
293 { 289 {
294 sw sp, r0 290 sw sp, r0
295 addi sp, sp, -PTREGS_OFFSET_REG(0) - 4 291 addi sp, sp, -PTREGS_OFFSET_REG(0) - 4
@@ -309,12 +305,12 @@ intvec_\vecname:
309 * See discussion below at "finish_interrupt_save". 305 * See discussion below at "finish_interrupt_save".
310 */ 306 */
311 .ifc \c_routine, do_page_fault 307 .ifc \c_routine, do_page_fault
312 mfspr r2, SYSTEM_SAVE_1_3 /* address of page fault */ 308 mfspr r2, SPR_SYSTEM_SAVE_K_3 /* address of page fault */
313 mfspr r3, SYSTEM_SAVE_1_2 /* info about page fault */ 309 mfspr r3, SPR_SYSTEM_SAVE_K_2 /* info about page fault */
314 .else 310 .else
315 .ifc \vecnum, INT_DOUBLE_FAULT 311 .ifc \vecnum, INT_DOUBLE_FAULT
316 { 312 {
317 mfspr r2, SYSTEM_SAVE_1_2 /* double fault info from HV */ 313 mfspr r2, SPR_SYSTEM_SAVE_K_2 /* double fault info from HV */
318 movei r3, 0 314 movei r3, 0
319 } 315 }
320 .else 316 .else
@@ -467,7 +463,7 @@ intvec_\vecname:
467 /* Load tp with our per-cpu offset. */ 463 /* Load tp with our per-cpu offset. */
468#ifdef CONFIG_SMP 464#ifdef CONFIG_SMP
469 { 465 {
470 mfspr r20, SYSTEM_SAVE_1_0 466 mfspr r20, SPR_SYSTEM_SAVE_K_0
471 moveli r21, lo16(__per_cpu_offset) 467 moveli r21, lo16(__per_cpu_offset)
472 } 468 }
473 { 469 {
@@ -487,7 +483,7 @@ intvec_\vecname:
487 * We load flags in r32 here so we can jump to .Lrestore_regs 483 * We load flags in r32 here so we can jump to .Lrestore_regs
488 * directly after do_page_fault_ics() if necessary. 484 * directly after do_page_fault_ics() if necessary.
489 */ 485 */
490 mfspr r32, EX_CONTEXT_1_1 486 mfspr r32, SPR_EX_CONTEXT_K_1
491 { 487 {
492 andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ 488 andi r32, r32, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
493 PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS) 489 PTREGS_PTR(r21, PTREGS_OFFSET_FLAGS)
@@ -855,14 +851,27 @@ STD_ENTRY(interrupt_return)
855 /* Check to see if there is any work to do before returning to user. */ 851 /* Check to see if there is any work to do before returning to user. */
856 { 852 {
857 addi r29, r32, THREAD_INFO_FLAGS_OFFSET 853 addi r29, r32, THREAD_INFO_FLAGS_OFFSET
858 moveli r28, lo16(_TIF_ALLWORK_MASK) 854 moveli r1, lo16(_TIF_ALLWORK_MASK)
859 } 855 }
860 { 856 {
861 lw r29, r29 857 lw r29, r29
862 auli r28, r28, ha16(_TIF_ALLWORK_MASK) 858 auli r1, r1, ha16(_TIF_ALLWORK_MASK)
863 } 859 }
864 and r28, r29, r28 860 and r1, r29, r1
865 bnz r28, .Lwork_pending 861 bzt r1, .Lrestore_all
862
863 /*
864 * Make sure we have all the registers saved for signal
865 * handling or single-step. Call out to C code to figure out
866 * exactly what we need to do for each flag bit, then if
867 * necessary, reload the flags and recheck.
868 */
869 push_extra_callee_saves r0
870 {
871 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
872 jal do_work_pending
873 }
874 bnz r0, .Lresume_userspace
866 875
867 /* 876 /*
868 * In the NMI case we 877 * In the NMI case we
@@ -957,11 +966,11 @@ STD_ENTRY(interrupt_return)
957 pop_reg_zero r21, r3, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC 966 pop_reg_zero r21, r3, sp, PTREGS_OFFSET_EX1 - PTREGS_OFFSET_PC
958 pop_reg_zero lr, r4, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_EX1 967 pop_reg_zero lr, r4, sp, PTREGS_OFFSET_REG(52) - PTREGS_OFFSET_EX1
959 { 968 {
960 mtspr EX_CONTEXT_1_0, r21 969 mtspr SPR_EX_CONTEXT_K_0, r21
961 move r5, zero 970 move r5, zero
962 } 971 }
963 { 972 {
964 mtspr EX_CONTEXT_1_1, lr 973 mtspr SPR_EX_CONTEXT_K_1, lr
965 andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */ 974 andi lr, lr, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
966 } 975 }
967 976
@@ -1020,7 +1029,7 @@ STD_ENTRY(interrupt_return)
1020 1029
1021 /* Set r1 to errno if we are returning an error, otherwise zero. */ 1030 /* Set r1 to errno if we are returning an error, otherwise zero. */
1022 { 1031 {
1023 moveli r29, 1024 1032 moveli r29, 4096
1024 sub r1, zero, r0 1033 sub r1, zero, r0
1025 } 1034 }
1026 slt_u r29, r1, r29 1035 slt_u r29, r1, r29
@@ -1103,142 +1112,9 @@ STD_ENTRY(interrupt_return)
1103 pop_reg r50 1112 pop_reg r50
1104 pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51) 1113 pop_reg r51, sp, PTREGS_OFFSET_REG(29) - PTREGS_OFFSET_REG(51)
1105 j .Lcontinue_restore_regs 1114 j .Lcontinue_restore_regs
1106
1107.Lwork_pending:
1108 /* Mask the reschedule flag */
1109 andi r28, r29, _TIF_NEED_RESCHED
1110
1111 {
1112 /*
1113 * If the NEED_RESCHED flag is called, we call schedule(), which
1114 * may drop this context right here and go do something else.
1115 * On return, jump back to .Lresume_userspace and recheck.
1116 */
1117 bz r28, .Lasync_tlb
1118
1119 /* Mask the async-tlb flag */
1120 andi r28, r29, _TIF_ASYNC_TLB
1121 }
1122
1123 jal schedule
1124 FEEDBACK_REENTER(interrupt_return)
1125
1126 /* Reload the flags and check again */
1127 j .Lresume_userspace
1128
1129.Lasync_tlb:
1130 {
1131 bz r28, .Lneed_sigpending
1132
1133 /* Mask the sigpending flag */
1134 andi r28, r29, _TIF_SIGPENDING
1135 }
1136
1137 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
1138 jal do_async_page_fault
1139 FEEDBACK_REENTER(interrupt_return)
1140
1141 /*
1142 * Go restart the "resume userspace" process. We may have
1143 * fired a signal, and we need to disable interrupts again.
1144 */
1145 j .Lresume_userspace
1146
1147.Lneed_sigpending:
1148 /*
1149 * At this point we are either doing signal handling or single-step,
1150 * so either way make sure we have all the registers saved.
1151 */
1152 push_extra_callee_saves r0
1153
1154 {
1155 /* If no signal pending, skip to singlestep check */
1156 bz r28, .Lneed_singlestep
1157
1158 /* Mask the singlestep flag */
1159 andi r28, r29, _TIF_SINGLESTEP
1160 }
1161
1162 jal do_signal
1163 FEEDBACK_REENTER(interrupt_return)
1164
1165 /* Reload the flags and check again */
1166 j .Lresume_userspace
1167
1168.Lneed_singlestep:
1169 {
1170 /* Get a pointer to the EX1 field */
1171 PTREGS_PTR(r29, PTREGS_OFFSET_EX1)
1172
1173 /* If we get here, our bit must be set. */
1174 bz r28, .Lwork_confusion
1175 }
1176 /* If we are in priv mode, don't single step */
1177 lw r28, r29
1178 andi r28, r28, SPR_EX_CONTEXT_1_1__PL_MASK /* mask off ICS */
1179 bnz r28, .Lrestore_all
1180
1181 /* Allow interrupts within the single step code */
1182 TRACE_IRQS_ON /* Note: clobbers registers r0-r29 */
1183 IRQ_ENABLE(r20, r21)
1184
1185 /* try to single-step the current instruction */
1186 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
1187 jal single_step_once
1188 FEEDBACK_REENTER(interrupt_return)
1189
1190 /* Re-disable interrupts. TRACE_IRQS_OFF in .Lrestore_all. */
1191 IRQ_DISABLE(r20,r21)
1192
1193 j .Lrestore_all
1194
1195.Lwork_confusion:
1196 move r0, r28
1197 panic "thread_info allwork flags unhandled on userspace resume: %#x"
1198
1199 STD_ENDPROC(interrupt_return) 1115 STD_ENDPROC(interrupt_return)
1200 1116
1201 /* 1117 /*
1202 * This interrupt variant clears the INT_INTCTRL_1 interrupt mask bit
1203 * before returning, so we can properly get more downcalls.
1204 */
1205 .pushsection .text.handle_interrupt_downcall,"ax"
1206handle_interrupt_downcall:
1207 finish_interrupt_save handle_interrupt_downcall
1208 check_single_stepping normal, .Ldispatch_downcall
1209.Ldispatch_downcall:
1210
1211 /* Clear INTCTRL_1 from the set of interrupts we ever enable. */
1212 GET_INTERRUPTS_ENABLED_MASK_PTR(r30)
1213 {
1214 addi r30, r30, 4
1215 movei r31, INT_MASK(INT_INTCTRL_1)
1216 }
1217 {
1218 lw r20, r30
1219 nor r21, r31, zero
1220 }
1221 and r20, r20, r21
1222 sw r30, r20
1223
1224 {
1225 jalr r0
1226 PTREGS_PTR(r0, PTREGS_OFFSET_BASE)
1227 }
1228 FEEDBACK_REENTER(handle_interrupt_downcall)
1229
1230 /* Allow INTCTRL_1 to be enabled next time we enable interrupts. */
1231 lw r20, r30
1232 or r20, r20, r31
1233 sw r30, r20
1234
1235 {
1236 movei r30, 0 /* not an NMI */
1237 j interrupt_return
1238 }
1239 STD_ENDPROC(handle_interrupt_downcall)
1240
1241 /*
1242 * Some interrupts don't check for single stepping 1118 * Some interrupts don't check for single stepping
1243 */ 1119 */
1244 .pushsection .text.handle_interrupt_no_single_step,"ax" 1120 .pushsection .text.handle_interrupt_no_single_step,"ax"
@@ -1342,8 +1218,8 @@ handle_syscall:
1342 lw r20, r20 1218 lw r20, r20
1343 1219
1344 /* Jump to syscall handler. */ 1220 /* Jump to syscall handler. */
1345 jalr r20; .Lhandle_syscall_link: 1221 jalr r20
1346 FEEDBACK_REENTER(handle_syscall) 1222.Lhandle_syscall_link: /* value of "lr" after "jalr r20" above */
1347 1223
1348 /* 1224 /*
1349 * Write our r0 onto the stack so it gets restored instead 1225 * Write our r0 onto the stack so it gets restored instead
@@ -1352,6 +1228,9 @@ handle_syscall:
1352 PTREGS_PTR(r29, PTREGS_OFFSET_REG(0)) 1228 PTREGS_PTR(r29, PTREGS_OFFSET_REG(0))
1353 sw r29, r0 1229 sw r29, r0
1354 1230
1231.Lsyscall_sigreturn_skip:
1232 FEEDBACK_REENTER(handle_syscall)
1233
1355 /* Do syscall trace again, if requested. */ 1234 /* Do syscall trace again, if requested. */
1356 lw r30, r31 1235 lw r30, r31
1357 andi r30, r30, _TIF_SYSCALL_TRACE 1236 andi r30, r30, _TIF_SYSCALL_TRACE
@@ -1472,7 +1351,12 @@ handle_ill:
1472 lw r26, r24 1351 lw r26, r24
1473 sw r28, r26 1352 sw r28, r26
1474 1353
1475 /* Clear TIF_SINGLESTEP */ 1354 /*
1355 * Clear TIF_SINGLESTEP to prevent recursion if we execute an ill.
1356 * The normal non-arch flow redundantly clears TIF_SINGLESTEP, but we
1357 * need to clear it here and can't really impose on all other arches.
1358 * So what's another write between friends?
1359 */
1476 GET_THREAD_INFO(r0) 1360 GET_THREAD_INFO(r0)
1477 1361
1478 addi r1, r0, THREAD_INFO_FLAGS_OFFSET 1362 addi r1, r0, THREAD_INFO_FLAGS_OFFSET
@@ -1509,7 +1393,7 @@ handle_ill:
1509/* Various stub interrupt handlers and syscall handlers */ 1393/* Various stub interrupt handlers and syscall handlers */
1510 1394
1511STD_ENTRY_LOCAL(_kernel_double_fault) 1395STD_ENTRY_LOCAL(_kernel_double_fault)
1512 mfspr r1, EX_CONTEXT_1_0 1396 mfspr r1, SPR_EX_CONTEXT_K_0
1513 move r2, lr 1397 move r2, lr
1514 move r3, sp 1398 move r3, sp
1515 move r4, r52 1399 move r4, r52
@@ -1518,34 +1402,44 @@ STD_ENTRY_LOCAL(_kernel_double_fault)
1518 STD_ENDPROC(_kernel_double_fault) 1402 STD_ENDPROC(_kernel_double_fault)
1519 1403
1520STD_ENTRY_LOCAL(bad_intr) 1404STD_ENTRY_LOCAL(bad_intr)
1521 mfspr r2, EX_CONTEXT_1_0 1405 mfspr r2, SPR_EX_CONTEXT_K_0
1522 panic "Unhandled interrupt %#x: PC %#lx" 1406 panic "Unhandled interrupt %#x: PC %#lx"
1523 STD_ENDPROC(bad_intr) 1407 STD_ENDPROC(bad_intr)
1524 1408
1525/* Put address of pt_regs in reg and jump. */ 1409/* Put address of pt_regs in reg and jump. */
1526#define PTREGS_SYSCALL(x, reg) \ 1410#define PTREGS_SYSCALL(x, reg) \
1527 STD_ENTRY(x); \ 1411 STD_ENTRY(_##x); \
1528 { \ 1412 { \
1529 PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \ 1413 PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \
1530 j _##x \ 1414 j x \
1531 }; \ 1415 }; \
1532 STD_ENDPROC(x) 1416 STD_ENDPROC(_##x)
1417
1418/*
1419 * Special-case sigreturn to not write r0 to the stack on return.
1420 * This is technically more efficient, but it also avoids difficulties
1421 * in the 64-bit OS when handling 32-bit compat code, since we must not
1422 * sign-extend r0 for the sigreturn return-value case.
1423 */
1424#define PTREGS_SYSCALL_SIGRETURN(x, reg) \
1425 STD_ENTRY(_##x); \
1426 addli lr, lr, .Lsyscall_sigreturn_skip - .Lhandle_syscall_link; \
1427 { \
1428 PTREGS_PTR(reg, PTREGS_OFFSET_BASE); \
1429 j x \
1430 }; \
1431 STD_ENDPROC(_##x)
1533 1432
1534PTREGS_SYSCALL(sys_execve, r3) 1433PTREGS_SYSCALL(sys_execve, r3)
1535PTREGS_SYSCALL(sys_sigaltstack, r2) 1434PTREGS_SYSCALL(sys_sigaltstack, r2)
1536PTREGS_SYSCALL(sys_rt_sigreturn, r0) 1435PTREGS_SYSCALL_SIGRETURN(sys_rt_sigreturn, r0)
1537 1436PTREGS_SYSCALL(sys_cmpxchg_badaddr, r1)
1538/* Save additional callee-saves to pt_regs, put address in reg and jump. */
1539#define PTREGS_SYSCALL_ALL_REGS(x, reg) \
1540 STD_ENTRY(x); \
1541 push_extra_callee_saves reg; \
1542 j _##x; \
1543 STD_ENDPROC(x)
1544 1437
1545PTREGS_SYSCALL_ALL_REGS(sys_fork, r0) 1438/* Save additional callee-saves to pt_regs, put address in r4 and jump. */
1546PTREGS_SYSCALL_ALL_REGS(sys_vfork, r0) 1439STD_ENTRY(_sys_clone)
1547PTREGS_SYSCALL_ALL_REGS(sys_clone, r4) 1440 push_extra_callee_saves r4
1548PTREGS_SYSCALL_ALL_REGS(sys_cmpxchg_badaddr, r1) 1441 j sys_clone
1442 STD_ENDPROC(_sys_clone)
1549 1443
1550/* 1444/*
1551 * This entrypoint is taken for the cmpxchg and atomic_update fast 1445 * This entrypoint is taken for the cmpxchg and atomic_update fast
@@ -1558,12 +1452,14 @@ PTREGS_SYSCALL_ALL_REGS(sys_cmpxchg_badaddr, r1)
1558 * to be available to it on entry. It does not modify any callee-save 1452 * to be available to it on entry. It does not modify any callee-save
1559 * registers (including "lr"). It does not check what PL it is being 1453 * registers (including "lr"). It does not check what PL it is being
1560 * called at, so you'd better not call it other than at PL0. 1454 * called at, so you'd better not call it other than at PL0.
1455 * The <atomic.h> wrapper assumes it only clobbers r20-r29, so if
1456 * it ever is necessary to use more registers, be aware.
1561 * 1457 *
1562 * It does not use the stack, but since it might be re-interrupted by 1458 * It does not use the stack, but since it might be re-interrupted by
1563 * a page fault which would assume the stack was valid, it does 1459 * a page fault which would assume the stack was valid, it does
1564 * save/restore the stack pointer and zero it out to make sure it gets reset. 1460 * save/restore the stack pointer and zero it out to make sure it gets reset.
1565 * Since we always keep interrupts disabled, the hypervisor won't 1461 * Since we always keep interrupts disabled, the hypervisor won't
1566 * clobber our EX_CONTEXT_1_x registers, so we don't save/restore them 1462 * clobber our EX_CONTEXT_K_x registers, so we don't save/restore them
1567 * (other than to advance the PC on return). 1463 * (other than to advance the PC on return).
1568 * 1464 *
1569 * We have to manually validate the user vs kernel address range 1465 * We have to manually validate the user vs kernel address range
@@ -1574,13 +1470,19 @@ PTREGS_SYSCALL_ALL_REGS(sys_cmpxchg_badaddr, r1)
1574 * We place it in the __HEAD section to ensure it is relatively 1470 * We place it in the __HEAD section to ensure it is relatively
1575 * near to the intvec_SWINT_1 code (reachable by a conditional branch). 1471 * near to the intvec_SWINT_1 code (reachable by a conditional branch).
1576 * 1472 *
1577 * Must match register usage in do_page_fault(). 1473 * Our use of ATOMIC_LOCK_REG here must match do_page_fault_ics().
1474 *
1475 * As we do in lib/atomic_asm_32.S, we bypass a store if the value we
1476 * would store is the same as the value we just loaded.
1578 */ 1477 */
1579 __HEAD 1478 __HEAD
1580 .align 64 1479 .align 64
1581 /* Align much later jump on the start of a cache line. */ 1480 /* Align much later jump on the start of a cache line. */
1582#if !ATOMIC_LOCKS_FOUND_VIA_TABLE() 1481#if !ATOMIC_LOCKS_FOUND_VIA_TABLE()
1583 nop; nop 1482 nop
1483#if PAGE_SIZE >= 0x10000
1484 nop
1485#endif
1584#endif 1486#endif
1585ENTRY(sys_cmpxchg) 1487ENTRY(sys_cmpxchg)
1586 1488
@@ -1608,9 +1510,13 @@ ENTRY(sys_cmpxchg)
1608 * about aliasing among multiple mappings of the same physical page, 1510 * about aliasing among multiple mappings of the same physical page,
1609 * and we ignore the low 3 bits so we have one lock that covers 1511 * and we ignore the low 3 bits so we have one lock that covers
1610 * both a cmpxchg64() and a cmpxchg() on either its low or high word. 1512 * both a cmpxchg64() and a cmpxchg() on either its low or high word.
1611 * NOTE: this code must match __atomic_hashed_lock() in lib/atomic.c. 1513 * NOTE: this must match __atomic_hashed_lock() in lib/atomic_32.c.
1612 */ 1514 */
1613 1515
1516#if (PAGE_OFFSET & 0xffff) != 0
1517# error Code here assumes PAGE_OFFSET can be loaded with just hi16()
1518#endif
1519
1614#if ATOMIC_LOCKS_FOUND_VIA_TABLE() 1520#if ATOMIC_LOCKS_FOUND_VIA_TABLE()
1615 { 1521 {
1616 /* Check for unaligned input. */ 1522 /* Check for unaligned input. */
@@ -1628,17 +1534,7 @@ ENTRY(sys_cmpxchg)
1628 { 1534 {
1629 shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT 1535 shri r20, r25, 32 - ATOMIC_HASH_L1_SHIFT
1630 slt_u r23, r0, r23 1536 slt_u r23, r0, r23
1631 1537 lw r26, r0 /* see comment in the "#else" for the "lw r26". */
1632 /*
1633 * Ensure that the TLB is loaded before we take out the lock.
1634 * On TILEPro, this will start fetching the value all the way
1635 * into our L1 as well (and if it gets modified before we
1636 * grab the lock, it will be invalidated from our cache
1637 * before we reload it). On tile64, we'll start fetching it
1638 * into our L1 if we're the home, and if we're not, we'll
1639 * still at least start fetching it into the home's L2.
1640 */
1641 lw r26, r0
1642 } 1538 }
1643 { 1539 {
1644 s2a r21, r20, r21 1540 s2a r21, r20, r21
@@ -1654,18 +1550,9 @@ ENTRY(sys_cmpxchg)
1654 bbs r23, .Lcmpxchg64 1550 bbs r23, .Lcmpxchg64
1655 andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */ 1551 andi r23, r0, 7 /* Precompute alignment for cmpxchg64. */
1656 } 1552 }
1657
1658 { 1553 {
1659 /*
1660 * We very carefully align the code that actually runs with
1661 * the lock held (nine bundles) so that we know it is all in
1662 * the icache when we start. This instruction (the jump) is
1663 * at the start of the first cache line, address zero mod 64;
1664 * we jump to somewhere in the second cache line to issue the
1665 * tns, then jump back to finish up.
1666 */
1667 s2a ATOMIC_LOCK_REG_NAME, r25, r21 1554 s2a ATOMIC_LOCK_REG_NAME, r25, r21
1668 j .Lcmpxchg32_tns 1555 j .Lcmpxchg32_tns /* see comment in the #else for the jump. */
1669 } 1556 }
1670 1557
1671#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ 1558#else /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
@@ -1703,11 +1590,14 @@ ENTRY(sys_cmpxchg)
1703 lw r26, r0 1590 lw r26, r0
1704 } 1591 }
1705 { 1592 {
1706 /* atomic_locks is page aligned so this suffices to get its addr. */ 1593 auli r21, zero, ha16(atomic_locks)
1707 auli r21, zero, hi16(atomic_locks)
1708 1594
1709 bbns r23, .Lcmpxchg_badaddr 1595 bbns r23, .Lcmpxchg_badaddr
1710 } 1596 }
1597#if PAGE_SIZE < 0x10000
1598 /* atomic_locks is page-aligned so for big pages we don't need this. */
1599 addli r21, r21, lo16(atomic_locks)
1600#endif
1711 { 1601 {
1712 /* 1602 /*
1713 * Insert the hash bits into the page-aligned pointer. 1603 * Insert the hash bits into the page-aligned pointer.
@@ -1727,24 +1617,25 @@ ENTRY(sys_cmpxchg)
1727 { 1617 {
1728 /* 1618 /*
1729 * We very carefully align the code that actually runs with 1619 * We very carefully align the code that actually runs with
1730 * the lock held (nine bundles) so that we know it is all in 1620 * the lock held (twelve bundles) so that we know it is all in
1731 * the icache when we start. This instruction (the jump) is 1621 * the icache when we start. This instruction (the jump) is
1732 * at the start of the first cache line, address zero mod 64; 1622 * at the start of the first cache line, address zero mod 64;
1733 * we jump to somewhere in the second cache line to issue the 1623 * we jump to the very end of the second cache line to get that
1734 * tns, then jump back to finish up. 1624 * line loaded in the icache, then fall through to issue the tns
1625 * in the third cache line, at which point it's all cached.
1626 * Note that is for performance, not correctness.
1735 */ 1627 */
1736 j .Lcmpxchg32_tns 1628 j .Lcmpxchg32_tns
1737 } 1629 }
1738 1630
1739#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */ 1631#endif /* ATOMIC_LOCKS_FOUND_VIA_TABLE() */
1740 1632
1741 ENTRY(__sys_cmpxchg_grab_lock) 1633/* Symbol for do_page_fault_ics() to use to compare against the PC. */
1634.global __sys_cmpxchg_grab_lock
1635__sys_cmpxchg_grab_lock:
1742 1636
1743 /* 1637 /*
1744 * Perform the actual cmpxchg or atomic_update. 1638 * Perform the actual cmpxchg or atomic_update.
1745 * Note that __futex_mark_unlocked() in uClibc relies on
1746 * atomic_update() to always perform an "mf", so don't make
1747 * it optional or conditional without modifying that code.
1748 */ 1639 */
1749.Ldo_cmpxchg32: 1640.Ldo_cmpxchg32:
1750 { 1641 {
@@ -1762,18 +1653,20 @@ ENTRY(sys_cmpxchg)
1762 } 1653 }
1763 { 1654 {
1764 mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */ 1655 mvnz r24, r23, r25 /* Use atomic_update value if appropriate. */
1765 bbns r22, .Lcmpxchg32_mismatch 1656 bbns r22, .Lcmpxchg32_nostore
1766 } 1657 }
1658 seq r22, r24, r21 /* Are we storing the value we loaded? */
1659 bbs r22, .Lcmpxchg32_nostore
1767 sw r0, r24 1660 sw r0, r24
1768 1661
1662 /* The following instruction is the start of the second cache line. */
1769 /* Do slow mtspr here so the following "mf" waits less. */ 1663 /* Do slow mtspr here so the following "mf" waits less. */
1770 { 1664 {
1771 move sp, r27 1665 move sp, r27
1772 mtspr EX_CONTEXT_1_0, r28 1666 mtspr SPR_EX_CONTEXT_K_0, r28
1773 } 1667 }
1774 mf 1668 mf
1775 1669
1776 /* The following instruction is the start of the second cache line. */
1777 { 1670 {
1778 move r0, r21 1671 move r0, r21
1779 sw ATOMIC_LOCK_REG_NAME, zero 1672 sw ATOMIC_LOCK_REG_NAME, zero
@@ -1781,14 +1674,14 @@ ENTRY(sys_cmpxchg)
1781 iret 1674 iret
1782 1675
1783 /* Duplicated code here in the case where we don't overlap "mf" */ 1676 /* Duplicated code here in the case where we don't overlap "mf" */
1784.Lcmpxchg32_mismatch: 1677.Lcmpxchg32_nostore:
1785 { 1678 {
1786 move r0, r21 1679 move r0, r21
1787 sw ATOMIC_LOCK_REG_NAME, zero 1680 sw ATOMIC_LOCK_REG_NAME, zero
1788 } 1681 }
1789 { 1682 {
1790 move sp, r27 1683 move sp, r27
1791 mtspr EX_CONTEXT_1_0, r28 1684 mtspr SPR_EX_CONTEXT_K_0, r28
1792 } 1685 }
1793 iret 1686 iret
1794 1687
@@ -1797,8 +1690,6 @@ ENTRY(sys_cmpxchg)
1797 * and for 64-bit cmpxchg. We provide it as a macro and put 1690 * and for 64-bit cmpxchg. We provide it as a macro and put
1798 * it into both versions. We can't share the code literally 1691 * it into both versions. We can't share the code literally
1799 * since it depends on having the right branch-back address. 1692 * since it depends on having the right branch-back address.
1800 * Note that the first few instructions should share the cache
1801 * line with the second half of the actual locked code.
1802 */ 1693 */
1803 .macro cmpxchg_lock, bitwidth 1694 .macro cmpxchg_lock, bitwidth
1804 1695
@@ -1816,7 +1707,7 @@ ENTRY(sys_cmpxchg)
1816#endif 1707#endif
1817 1708
1818 /* Issue the slow SPR here while the tns result is in flight. */ 1709 /* Issue the slow SPR here while the tns result is in flight. */
1819 mfspr r28, EX_CONTEXT_1_0 1710 mfspr r28, SPR_EX_CONTEXT_K_0
1820 1711
1821 { 1712 {
1822 addi r28, r28, 8 /* return to the instruction after the swint1 */ 1713 addi r28, r28, 8 /* return to the instruction after the swint1 */
@@ -1824,7 +1715,7 @@ ENTRY(sys_cmpxchg)
1824 } 1715 }
1825 /* 1716 /*
1826 * The preceding instruction is the last thing that must be 1717 * The preceding instruction is the last thing that must be
1827 * on the second cache line. 1718 * hot in the icache before we do the "tns" above.
1828 */ 1719 */
1829 1720
1830#ifdef CONFIG_SMP 1721#ifdef CONFIG_SMP
@@ -1855,6 +1746,12 @@ ENTRY(sys_cmpxchg)
1855 .endm 1746 .endm
1856 1747
1857.Lcmpxchg32_tns: 1748.Lcmpxchg32_tns:
1749 /*
1750 * This is the last instruction on the second cache line.
1751 * The nop here loads the second line, then we fall through
1752 * to the tns to load the third line before we take the lock.
1753 */
1754 nop
1858 cmpxchg_lock 32 1755 cmpxchg_lock 32
1859 1756
1860 /* 1757 /*
@@ -1904,7 +1801,7 @@ ENTRY(sys_cmpxchg)
1904.Lcmpxchg64_mismatch: 1801.Lcmpxchg64_mismatch:
1905 { 1802 {
1906 move sp, r27 1803 move sp, r27
1907 mtspr EX_CONTEXT_1_0, r28 1804 mtspr SPR_EX_CONTEXT_K_0, r28
1908 } 1805 }
1909 mf 1806 mf
1910 { 1807 {
@@ -1985,21 +1882,26 @@ int_unalign:
1985 int_hand INT_PERF_COUNT, PERF_COUNT, \ 1882 int_hand INT_PERF_COUNT, PERF_COUNT, \
1986 op_handle_perf_interrupt, handle_nmi 1883 op_handle_perf_interrupt, handle_nmi
1987 int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr 1884 int_hand INT_INTCTRL_3, INTCTRL_3, bad_intr
1885#if CONFIG_KERNEL_PL == 2
1886 dc_dispatch INT_INTCTRL_2, INTCTRL_2
1887 int_hand INT_INTCTRL_1, INTCTRL_1, bad_intr
1888#else
1988 int_hand INT_INTCTRL_2, INTCTRL_2, bad_intr 1889 int_hand INT_INTCTRL_2, INTCTRL_2, bad_intr
1989 dc_dispatch INT_INTCTRL_1, INTCTRL_1 1890 dc_dispatch INT_INTCTRL_1, INTCTRL_1
1891#endif
1990 int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr 1892 int_hand INT_INTCTRL_0, INTCTRL_0, bad_intr
1991 int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \ 1893 int_hand INT_MESSAGE_RCV_DWNCL, MESSAGE_RCV_DWNCL, \
1992 hv_message_intr, handle_interrupt_downcall 1894 hv_message_intr
1993 int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \ 1895 int_hand INT_DEV_INTR_DWNCL, DEV_INTR_DWNCL, \
1994 tile_dev_intr, handle_interrupt_downcall 1896 tile_dev_intr
1995 int_hand INT_I_ASID, I_ASID, bad_intr 1897 int_hand INT_I_ASID, I_ASID, bad_intr
1996 int_hand INT_D_ASID, D_ASID, bad_intr 1898 int_hand INT_D_ASID, D_ASID, bad_intr
1997 int_hand INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \ 1899 int_hand INT_DMATLB_MISS_DWNCL, DMATLB_MISS_DWNCL, \
1998 do_page_fault, handle_interrupt_downcall 1900 do_page_fault
1999 int_hand INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \ 1901 int_hand INT_SNITLB_MISS_DWNCL, SNITLB_MISS_DWNCL, \
2000 do_page_fault, handle_interrupt_downcall 1902 do_page_fault
2001 int_hand INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \ 1903 int_hand INT_DMATLB_ACCESS_DWNCL, DMATLB_ACCESS_DWNCL, \
2002 do_page_fault, handle_interrupt_downcall 1904 do_page_fault
2003 int_hand INT_SN_CPL, SN_CPL, bad_intr 1905 int_hand INT_SN_CPL, SN_CPL, bad_intr
2004 int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap 1906 int_hand INT_DOUBLE_FAULT, DOUBLE_FAULT, do_trap
2005#if CHIP_HAS_AUX_PERF_COUNTERS() 1907#if CHIP_HAS_AUX_PERF_COUNTERS()