aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Zankel <chris@zankel.net>2014-01-29 01:09:51 -0500
committerChris Zankel <chris@zankel.net>2014-01-29 01:09:51 -0500
commit6b5a1f74e50170e64104135490dc32b657483594 (patch)
tree6bb08372aa016f77f27ec12d8ce4bbcc16291af0
parent3251f1e27a5a17f0efd436cfd1e7b9896cfab0a0 (diff)
xtensa: fix fast_syscall_spill_registers
The original implementation could clobber registers under certain conditions. The Xtensa processor architecture uses windowed registers and the original implementation was using a4 as a temporary register, which under certain conditions could be register a0 of the oldest window frame, and didn't always restore the content correctly. By moving the _spill_registers routine inside the fast system call, it frees up one more register (the return address is not required anymore) for the spill routine. Signed-off-by: Chris Zankel <chris@zankel.net>
-rw-r--r--arch/xtensa/kernel/entry.S383
1 files changed, 174 insertions, 209 deletions
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S
index b61e25146a2f..ef7f4990722b 100644
--- a/arch/xtensa/kernel/entry.S
+++ b/arch/xtensa/kernel/entry.S
@@ -1081,34 +1081,202 @@ ENTRY(fast_syscall_spill_registers)
1081 1081
1082 rsr a0, sar 1082 rsr a0, sar
1083 s32i a3, a2, PT_AREG3 1083 s32i a3, a2, PT_AREG3
1084 s32i a4, a2, PT_AREG4 1084 s32i a0, a2, PT_SAR
1085 s32i a0, a2, PT_AREG5 # store SAR to PT_AREG5
1086 1085
1087 /* The spill routine might clobber a7, a11, and a15. */ 1086 /* The spill routine might clobber a4, a7, a8, a11, a12, and a15. */
1088 1087
1088 s32i a4, a2, PT_AREG4
1089 s32i a7, a2, PT_AREG7 1089 s32i a7, a2, PT_AREG7
1090 s32i a8, a2, PT_AREG8
1090 s32i a11, a2, PT_AREG11 1091 s32i a11, a2, PT_AREG11
1092 s32i a12, a2, PT_AREG12
1091 s32i a15, a2, PT_AREG15 1093 s32i a15, a2, PT_AREG15
1092 1094
1093 call0 _spill_registers # destroys a3, a4, and SAR 1095 /*
1096 * Rotate ws so that the current windowbase is at bit 0.
1097 * Assume ws = xxxwww1yy (www1 current window frame).
1098 * Rotate ws right so that a4 = yyxxxwww1.
1099 */
1100
1101 rsr a0, windowbase
1102 rsr a3, windowstart # a3 = xxxwww1yy
1103 ssr a0 # holds WB
1104 slli a0, a3, WSBITS
1105 or a3, a3, a0 # a3 = xxxwww1yyxxxwww1yy
1106 srl a3, a3 # a3 = 00xxxwww1yyxxxwww1
1107
1108 /* We are done if there are no more than the current register frame. */
1109
1110 extui a3, a3, 1, WSBITS-1 # a3 = 0yyxxxwww
1111 movi a0, (1 << (WSBITS-1))
1112 _beqz a3, .Lnospill # only one active frame? jump
1113
1114 /* We want 1 at the top, so that we return to the current windowbase */
1115
1116 or a3, a3, a0 # 1yyxxxwww
1117
1118 /* Skip empty frames - get 'oldest' WINDOWSTART-bit. */
1119
1120 wsr a3, windowstart # save shifted windowstart
1121 neg a0, a3
1122 and a3, a0, a3 # first bit set from right: 000010000
1123
1124 ffs_ws a0, a3 # a0: shifts to skip empty frames
1125 movi a3, WSBITS
1126 sub a0, a3, a0 # WSBITS-a0:number of 0-bits from right
1127 ssr a0 # save in SAR for later.
1128
1129 rsr a3, windowbase
1130 add a3, a3, a0
1131 wsr a3, windowbase
1132 rsync
1133
1134 rsr a3, windowstart
1135 srl a3, a3 # shift windowstart
1136
1137 /* WB is now just one frame below the oldest frame in the register
1138 window. WS is shifted so the oldest frame is in bit 0, thus, WB
1139 and WS differ by one 4-register frame. */
1140
1141 /* Save frames. Depending what call was used (call4, call8, call12),
1142 * we have to save 4,8. or 12 registers.
1143 */
1144
1145
1146.Lloop: _bbsi.l a3, 1, .Lc4
1147 _bbci.l a3, 2, .Lc12
1148
1149.Lc8: s32e a4, a13, -16
1150 l32e a4, a5, -12
1151 s32e a8, a4, -32
1152 s32e a5, a13, -12
1153 s32e a6, a13, -8
1154 s32e a7, a13, -4
1155 s32e a9, a4, -28
1156 s32e a10, a4, -24
1157 s32e a11, a4, -20
1158 srli a11, a3, 2 # shift windowbase by 2
1159 rotw 2
1160 _bnei a3, 1, .Lloop
1161 j .Lexit
1162
1163.Lc4: s32e a4, a9, -16
1164 s32e a5, a9, -12
1165 s32e a6, a9, -8
1166 s32e a7, a9, -4
1167
1168 srli a7, a3, 1
1169 rotw 1
1170 _bnei a3, 1, .Lloop
1171 j .Lexit
1172
1173.Lc12: _bbci.l a3, 3, .Linvalid_mask # bit 2 shouldn't be zero!
1174
1175 /* 12-register frame (call12) */
1176
1177 l32e a0, a5, -12
1178 s32e a8, a0, -48
1179 mov a8, a0
1180
1181 s32e a9, a8, -44
1182 s32e a10, a8, -40
1183 s32e a11, a8, -36
1184 s32e a12, a8, -32
1185 s32e a13, a8, -28
1186 s32e a14, a8, -24
1187 s32e a15, a8, -20
1188 srli a15, a3, 3
1189
1190 /* The stack pointer for a4..a7 is out of reach, so we rotate the
1191 * window, grab the stackpointer, and rotate back.
1192 * Alternatively, we could also use the following approach, but that
1193 * makes the fixup routine much more complicated:
1194 * rotw 1
1195 * s32e a0, a13, -16
1196 * ...
1197 * rotw 2
1198 */
1199
1200 rotw 1
1201 mov a4, a13
1202 rotw -1
1203
1204 s32e a4, a8, -16
1205 s32e a5, a8, -12
1206 s32e a6, a8, -8
1207 s32e a7, a8, -4
1208
1209 rotw 3
1210
1211 _beqi a3, 1, .Lexit
1212 j .Lloop
1213
1214.Lexit:
1215
1216 /* Done. Do the final rotation and set WS */
1217
1218 rotw 1
1219 rsr a3, windowbase
1220 ssl a3
1221 movi a3, 1
1222 sll a3, a3
1223 wsr a3, windowstart
1224.Lnospill:
1094 1225
1095 /* Advance PC, restore registers and SAR, and return from exception. */ 1226 /* Advance PC, restore registers and SAR, and return from exception. */
1096 1227
1097 l32i a3, a2, PT_AREG5 1228 l32i a3, a2, PT_SAR
1098 l32i a4, a2, PT_AREG4
1099 l32i a0, a2, PT_AREG0 1229 l32i a0, a2, PT_AREG0
1100 wsr a3, sar 1230 wsr a3, sar
1101 l32i a3, a2, PT_AREG3 1231 l32i a3, a2, PT_AREG3
1102 1232
1103 /* Restore clobbered registers. */ 1233 /* Restore clobbered registers. */
1104 1234
1235 l32i a4, a2, PT_AREG4
1105 l32i a7, a2, PT_AREG7 1236 l32i a7, a2, PT_AREG7
1237 l32i a8, a2, PT_AREG8
1106 l32i a11, a2, PT_AREG11 1238 l32i a11, a2, PT_AREG11
1239 l32i a12, a2, PT_AREG12
1107 l32i a15, a2, PT_AREG15 1240 l32i a15, a2, PT_AREG15
1108 1241
1109 movi a2, 0 1242 movi a2, 0
1110 rfe 1243 rfe
1111 1244
1245.Linvalid_mask:
1246
1247 /* We get here because of an unrecoverable error in the window
1248 * registers, so set up a dummy frame and kill the user application.
1249 * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer.
1250 */
1251
1252 movi a0, 1
1253 movi a1, 0
1254
1255 wsr a0, windowstart
1256 wsr a1, windowbase
1257 rsync
1258
1259 movi a0, 0
1260
1261 rsr a3, excsave1
1262 l32i a1, a3, EXC_TABLE_KSTK
1263
1264 movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL
1265 wsr a4, ps
1266 rsync
1267
1268 movi a6, SIGSEGV
1269 movi a4, do_exit
1270 callx4 a4
1271
1272 /* shouldn't return, so panic */
1273
1274 wsr a0, excsave1
1275 movi a0, unrecoverable_exception
1276 callx0 a0 # should not return
12771: j 1b
1278
1279
1112ENDPROC(fast_syscall_spill_registers) 1280ENDPROC(fast_syscall_spill_registers)
1113 1281
1114/* Fixup handler. 1282/* Fixup handler.
@@ -1232,209 +1400,6 @@ ENTRY(fast_syscall_spill_registers_fixup_return)
1232 1400
1233ENDPROC(fast_syscall_spill_registers_fixup_return) 1401ENDPROC(fast_syscall_spill_registers_fixup_return)
1234 1402
1235/*
1236 * spill all registers.
1237 *
1238 * This is not a real function. The following conditions must be met:
1239 *
1240 * - must be called with call0.
1241 * - uses a3, a4 and SAR.
1242 * - the last 'valid' register of each frame are clobbered.
1243 * - the caller must have registered a fixup handler
1244 * (or be inside a critical section)
1245 * - PS_EXCM must be set (PS_WOE cleared?)
1246 */
1247
1248ENTRY(_spill_registers)
1249
1250 /*
1251 * Rotate ws so that the current windowbase is at bit 0.
1252 * Assume ws = xxxwww1yy (www1 current window frame).
1253 * Rotate ws right so that a4 = yyxxxwww1.
1254 */
1255
1256 rsr a4, windowbase
1257 rsr a3, windowstart # a3 = xxxwww1yy
1258 ssr a4 # holds WB
1259 slli a4, a3, WSBITS
1260 or a3, a3, a4 # a3 = xxxwww1yyxxxwww1yy
1261 srl a3, a3 # a3 = 00xxxwww1yyxxxwww1
1262
1263 /* We are done if there are no more than the current register frame. */
1264
1265 extui a3, a3, 1, WSBITS-1 # a3 = 0yyxxxwww
1266 movi a4, (1 << (WSBITS-1))
1267 _beqz a3, .Lnospill # only one active frame? jump
1268
1269 /* We want 1 at the top, so that we return to the current windowbase */
1270
1271 or a3, a3, a4 # 1yyxxxwww
1272
1273 /* Skip empty frames - get 'oldest' WINDOWSTART-bit. */
1274
1275 wsr a3, windowstart # save shifted windowstart
1276 neg a4, a3
1277 and a3, a4, a3 # first bit set from right: 000010000
1278
1279 ffs_ws a4, a3 # a4: shifts to skip empty frames
1280 movi a3, WSBITS
1281 sub a4, a3, a4 # WSBITS-a4:number of 0-bits from right
1282 ssr a4 # save in SAR for later.
1283
1284 rsr a3, windowbase
1285 add a3, a3, a4
1286 wsr a3, windowbase
1287 rsync
1288
1289 rsr a3, windowstart
1290 srl a3, a3 # shift windowstart
1291
1292 /* WB is now just one frame below the oldest frame in the register
1293 window. WS is shifted so the oldest frame is in bit 0, thus, WB
1294 and WS differ by one 4-register frame. */
1295
1296 /* Save frames. Depending what call was used (call4, call8, call12),
1297 * we have to save 4,8. or 12 registers.
1298 */
1299
1300 _bbsi.l a3, 1, .Lc4
1301 _bbsi.l a3, 2, .Lc8
1302
1303 /* Special case: we have a call12-frame starting at a4. */
1304
1305 _bbci.l a3, 3, .Lc12 # bit 3 shouldn't be zero! (Jump to Lc12 first)
1306
1307 s32e a4, a1, -16 # a1 is valid with an empty spill area
1308 l32e a4, a5, -12
1309 s32e a8, a4, -48
1310 mov a8, a4
1311 l32e a4, a1, -16
1312 j .Lc12c
1313
1314.Lnospill:
1315 ret
1316
1317.Lloop: _bbsi.l a3, 1, .Lc4
1318 _bbci.l a3, 2, .Lc12
1319
1320.Lc8: s32e a4, a13, -16
1321 l32e a4, a5, -12
1322 s32e a8, a4, -32
1323 s32e a5, a13, -12
1324 s32e a6, a13, -8
1325 s32e a7, a13, -4
1326 s32e a9, a4, -28
1327 s32e a10, a4, -24
1328 s32e a11, a4, -20
1329
1330 srli a11, a3, 2 # shift windowbase by 2
1331 rotw 2
1332 _bnei a3, 1, .Lloop
1333
1334.Lexit: /* Done. Do the final rotation, set WS, and return. */
1335
1336 rotw 1
1337 rsr a3, windowbase
1338 ssl a3
1339 movi a3, 1
1340 sll a3, a3
1341 wsr a3, windowstart
1342 ret
1343
1344.Lc4: s32e a4, a9, -16
1345 s32e a5, a9, -12
1346 s32e a6, a9, -8
1347 s32e a7, a9, -4
1348
1349 srli a7, a3, 1
1350 rotw 1
1351 _bnei a3, 1, .Lloop
1352 j .Lexit
1353
1354.Lc12: _bbci.l a3, 3, .Linvalid_mask # bit 2 shouldn't be zero!
1355
1356 /* 12-register frame (call12) */
1357
1358 l32e a2, a5, -12
1359 s32e a8, a2, -48
1360 mov a8, a2
1361
1362.Lc12c: s32e a9, a8, -44
1363 s32e a10, a8, -40
1364 s32e a11, a8, -36
1365 s32e a12, a8, -32
1366 s32e a13, a8, -28
1367 s32e a14, a8, -24
1368 s32e a15, a8, -20
1369 srli a15, a3, 3
1370
1371 /* The stack pointer for a4..a7 is out of reach, so we rotate the
1372 * window, grab the stackpointer, and rotate back.
1373 * Alternatively, we could also use the following approach, but that
1374 * makes the fixup routine much more complicated:
1375 * rotw 1
1376 * s32e a0, a13, -16
1377 * ...
1378 * rotw 2
1379 */
1380
1381 rotw 1
1382 mov a5, a13
1383 rotw -1
1384
1385 s32e a4, a9, -16
1386 s32e a5, a9, -12
1387 s32e a6, a9, -8
1388 s32e a7, a9, -4
1389
1390 rotw 3
1391
1392 _beqi a3, 1, .Lexit
1393 j .Lloop
1394
1395.Linvalid_mask:
1396
1397 /* We get here because of an unrecoverable error in the window
1398 * registers. If we are in user space, we kill the application,
1399 * however, this condition is unrecoverable in kernel space.
1400 */
1401
1402 rsr a0, ps
1403 _bbci.l a0, PS_UM_BIT, 1f
1404
1405 /* User space: Setup a dummy frame and kill application.
1406 * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer.
1407 */
1408
1409 movi a0, 1
1410 movi a1, 0
1411
1412 wsr a0, windowstart
1413 wsr a1, windowbase
1414 rsync
1415
1416 movi a0, 0
1417
1418 rsr a3, excsave1
1419 l32i a1, a3, EXC_TABLE_KSTK
1420
1421 movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL
1422 wsr a4, ps
1423 rsync
1424
1425 movi a6, SIGSEGV
1426 movi a4, do_exit
1427 callx4 a4
1428
14291: /* Kernel space: PANIC! */
1430
1431 wsr a0, excsave1
1432 movi a0, unrecoverable_exception
1433 callx0 a0 # should not return
14341: j 1b
1435
1436ENDPROC(_spill_registers)
1437
1438#ifdef CONFIG_MMU 1403#ifdef CONFIG_MMU
1439/* 1404/*
1440 * We should never get here. Bail out! 1405 * We should never get here. Bail out!