diff options
author | Chris Zankel <chris@zankel.net> | 2014-01-29 01:09:51 -0500 |
---|---|---|
committer | Chris Zankel <chris@zankel.net> | 2014-01-29 01:09:51 -0500 |
commit | 6b5a1f74e50170e64104135490dc32b657483594 (patch) | |
tree | 6bb08372aa016f77f27ec12d8ce4bbcc16291af0 | |
parent | 3251f1e27a5a17f0efd436cfd1e7b9896cfab0a0 (diff) |
xtensa: fix fast_syscall_spill_registers
The original implementation could clobber registers under certain conditions.
The Xtensa processor architecture uses windowed registers and the original
implementation was using a4 as a temporary register, which under certain
conditions could be register a0 of the oldest window frame, and didn't always
restore the content correctly.
By moving the _spill_registers routine inside the fast system call, it frees
up one more register (the return address is not required anymore) for the
spill routine.
Signed-off-by: Chris Zankel <chris@zankel.net>
-rw-r--r-- | arch/xtensa/kernel/entry.S | 383 |
1 files changed, 174 insertions, 209 deletions
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index b61e25146a2f..ef7f4990722b 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S | |||
@@ -1081,34 +1081,202 @@ ENTRY(fast_syscall_spill_registers) | |||
1081 | 1081 | ||
1082 | rsr a0, sar | 1082 | rsr a0, sar |
1083 | s32i a3, a2, PT_AREG3 | 1083 | s32i a3, a2, PT_AREG3 |
1084 | s32i a4, a2, PT_AREG4 | 1084 | s32i a0, a2, PT_SAR |
1085 | s32i a0, a2, PT_AREG5 # store SAR to PT_AREG5 | ||
1086 | 1085 | ||
1087 | /* The spill routine might clobber a7, a11, and a15. */ | 1086 | /* The spill routine might clobber a4, a7, a8, a11, a12, and a15. */ |
1088 | 1087 | ||
1088 | s32i a4, a2, PT_AREG4 | ||
1089 | s32i a7, a2, PT_AREG7 | 1089 | s32i a7, a2, PT_AREG7 |
1090 | s32i a8, a2, PT_AREG8 | ||
1090 | s32i a11, a2, PT_AREG11 | 1091 | s32i a11, a2, PT_AREG11 |
1092 | s32i a12, a2, PT_AREG12 | ||
1091 | s32i a15, a2, PT_AREG15 | 1093 | s32i a15, a2, PT_AREG15 |
1092 | 1094 | ||
1093 | call0 _spill_registers # destroys a3, a4, and SAR | 1095 | /* |
1096 | * Rotate ws so that the current windowbase is at bit 0. | ||
1097 | * Assume ws = xxxwww1yy (www1 current window frame). | ||
1098 | * Rotate ws right so that a4 = yyxxxwww1. | ||
1099 | */ | ||
1100 | |||
1101 | rsr a0, windowbase | ||
1102 | rsr a3, windowstart # a3 = xxxwww1yy | ||
1103 | ssr a0 # holds WB | ||
1104 | slli a0, a3, WSBITS | ||
1105 | or a3, a3, a0 # a3 = xxxwww1yyxxxwww1yy | ||
1106 | srl a3, a3 # a3 = 00xxxwww1yyxxxwww1 | ||
1107 | |||
1108 | /* We are done if there are no more than the current register frame. */ | ||
1109 | |||
1110 | extui a3, a3, 1, WSBITS-1 # a3 = 0yyxxxwww | ||
1111 | movi a0, (1 << (WSBITS-1)) | ||
1112 | _beqz a3, .Lnospill # only one active frame? jump | ||
1113 | |||
1114 | /* We want 1 at the top, so that we return to the current windowbase */ | ||
1115 | |||
1116 | or a3, a3, a0 # 1yyxxxwww | ||
1117 | |||
1118 | /* Skip empty frames - get 'oldest' WINDOWSTART-bit. */ | ||
1119 | |||
1120 | wsr a3, windowstart # save shifted windowstart | ||
1121 | neg a0, a3 | ||
1122 | and a3, a0, a3 # first bit set from right: 000010000 | ||
1123 | |||
1124 | ffs_ws a0, a3 # a0: shifts to skip empty frames | ||
1125 | movi a3, WSBITS | ||
1126 | sub a0, a3, a0 # WSBITS-a0:number of 0-bits from right | ||
1127 | ssr a0 # save in SAR for later. | ||
1128 | |||
1129 | rsr a3, windowbase | ||
1130 | add a3, a3, a0 | ||
1131 | wsr a3, windowbase | ||
1132 | rsync | ||
1133 | |||
1134 | rsr a3, windowstart | ||
1135 | srl a3, a3 # shift windowstart | ||
1136 | |||
1137 | /* WB is now just one frame below the oldest frame in the register | ||
1138 | window. WS is shifted so the oldest frame is in bit 0, thus, WB | ||
1139 | and WS differ by one 4-register frame. */ | ||
1140 | |||
1141 | /* Save frames. Depending what call was used (call4, call8, call12), | ||
1142 | * we have to save 4,8. or 12 registers. | ||
1143 | */ | ||
1144 | |||
1145 | |||
1146 | .Lloop: _bbsi.l a3, 1, .Lc4 | ||
1147 | _bbci.l a3, 2, .Lc12 | ||
1148 | |||
1149 | .Lc8: s32e a4, a13, -16 | ||
1150 | l32e a4, a5, -12 | ||
1151 | s32e a8, a4, -32 | ||
1152 | s32e a5, a13, -12 | ||
1153 | s32e a6, a13, -8 | ||
1154 | s32e a7, a13, -4 | ||
1155 | s32e a9, a4, -28 | ||
1156 | s32e a10, a4, -24 | ||
1157 | s32e a11, a4, -20 | ||
1158 | srli a11, a3, 2 # shift windowbase by 2 | ||
1159 | rotw 2 | ||
1160 | _bnei a3, 1, .Lloop | ||
1161 | j .Lexit | ||
1162 | |||
1163 | .Lc4: s32e a4, a9, -16 | ||
1164 | s32e a5, a9, -12 | ||
1165 | s32e a6, a9, -8 | ||
1166 | s32e a7, a9, -4 | ||
1167 | |||
1168 | srli a7, a3, 1 | ||
1169 | rotw 1 | ||
1170 | _bnei a3, 1, .Lloop | ||
1171 | j .Lexit | ||
1172 | |||
1173 | .Lc12: _bbci.l a3, 3, .Linvalid_mask # bit 2 shouldn't be zero! | ||
1174 | |||
1175 | /* 12-register frame (call12) */ | ||
1176 | |||
1177 | l32e a0, a5, -12 | ||
1178 | s32e a8, a0, -48 | ||
1179 | mov a8, a0 | ||
1180 | |||
1181 | s32e a9, a8, -44 | ||
1182 | s32e a10, a8, -40 | ||
1183 | s32e a11, a8, -36 | ||
1184 | s32e a12, a8, -32 | ||
1185 | s32e a13, a8, -28 | ||
1186 | s32e a14, a8, -24 | ||
1187 | s32e a15, a8, -20 | ||
1188 | srli a15, a3, 3 | ||
1189 | |||
1190 | /* The stack pointer for a4..a7 is out of reach, so we rotate the | ||
1191 | * window, grab the stackpointer, and rotate back. | ||
1192 | * Alternatively, we could also use the following approach, but that | ||
1193 | * makes the fixup routine much more complicated: | ||
1194 | * rotw 1 | ||
1195 | * s32e a0, a13, -16 | ||
1196 | * ... | ||
1197 | * rotw 2 | ||
1198 | */ | ||
1199 | |||
1200 | rotw 1 | ||
1201 | mov a4, a13 | ||
1202 | rotw -1 | ||
1203 | |||
1204 | s32e a4, a8, -16 | ||
1205 | s32e a5, a8, -12 | ||
1206 | s32e a6, a8, -8 | ||
1207 | s32e a7, a8, -4 | ||
1208 | |||
1209 | rotw 3 | ||
1210 | |||
1211 | _beqi a3, 1, .Lexit | ||
1212 | j .Lloop | ||
1213 | |||
1214 | .Lexit: | ||
1215 | |||
1216 | /* Done. Do the final rotation and set WS */ | ||
1217 | |||
1218 | rotw 1 | ||
1219 | rsr a3, windowbase | ||
1220 | ssl a3 | ||
1221 | movi a3, 1 | ||
1222 | sll a3, a3 | ||
1223 | wsr a3, windowstart | ||
1224 | .Lnospill: | ||
1094 | 1225 | ||
1095 | /* Advance PC, restore registers and SAR, and return from exception. */ | 1226 | /* Advance PC, restore registers and SAR, and return from exception. */ |
1096 | 1227 | ||
1097 | l32i a3, a2, PT_AREG5 | 1228 | l32i a3, a2, PT_SAR |
1098 | l32i a4, a2, PT_AREG4 | ||
1099 | l32i a0, a2, PT_AREG0 | 1229 | l32i a0, a2, PT_AREG0 |
1100 | wsr a3, sar | 1230 | wsr a3, sar |
1101 | l32i a3, a2, PT_AREG3 | 1231 | l32i a3, a2, PT_AREG3 |
1102 | 1232 | ||
1103 | /* Restore clobbered registers. */ | 1233 | /* Restore clobbered registers. */ |
1104 | 1234 | ||
1235 | l32i a4, a2, PT_AREG4 | ||
1105 | l32i a7, a2, PT_AREG7 | 1236 | l32i a7, a2, PT_AREG7 |
1237 | l32i a8, a2, PT_AREG8 | ||
1106 | l32i a11, a2, PT_AREG11 | 1238 | l32i a11, a2, PT_AREG11 |
1239 | l32i a12, a2, PT_AREG12 | ||
1107 | l32i a15, a2, PT_AREG15 | 1240 | l32i a15, a2, PT_AREG15 |
1108 | 1241 | ||
1109 | movi a2, 0 | 1242 | movi a2, 0 |
1110 | rfe | 1243 | rfe |
1111 | 1244 | ||
1245 | .Linvalid_mask: | ||
1246 | |||
1247 | /* We get here because of an unrecoverable error in the window | ||
1248 | * registers, so set up a dummy frame and kill the user application. | ||
1249 | * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer. | ||
1250 | */ | ||
1251 | |||
1252 | movi a0, 1 | ||
1253 | movi a1, 0 | ||
1254 | |||
1255 | wsr a0, windowstart | ||
1256 | wsr a1, windowbase | ||
1257 | rsync | ||
1258 | |||
1259 | movi a0, 0 | ||
1260 | |||
1261 | rsr a3, excsave1 | ||
1262 | l32i a1, a3, EXC_TABLE_KSTK | ||
1263 | |||
1264 | movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL | ||
1265 | wsr a4, ps | ||
1266 | rsync | ||
1267 | |||
1268 | movi a6, SIGSEGV | ||
1269 | movi a4, do_exit | ||
1270 | callx4 a4 | ||
1271 | |||
1272 | /* shouldn't return, so panic */ | ||
1273 | |||
1274 | wsr a0, excsave1 | ||
1275 | movi a0, unrecoverable_exception | ||
1276 | callx0 a0 # should not return | ||
1277 | 1: j 1b | ||
1278 | |||
1279 | |||
1112 | ENDPROC(fast_syscall_spill_registers) | 1280 | ENDPROC(fast_syscall_spill_registers) |
1113 | 1281 | ||
1114 | /* Fixup handler. | 1282 | /* Fixup handler. |
@@ -1232,209 +1400,6 @@ ENTRY(fast_syscall_spill_registers_fixup_return) | |||
1232 | 1400 | ||
1233 | ENDPROC(fast_syscall_spill_registers_fixup_return) | 1401 | ENDPROC(fast_syscall_spill_registers_fixup_return) |
1234 | 1402 | ||
1235 | /* | ||
1236 | * spill all registers. | ||
1237 | * | ||
1238 | * This is not a real function. The following conditions must be met: | ||
1239 | * | ||
1240 | * - must be called with call0. | ||
1241 | * - uses a3, a4 and SAR. | ||
1242 | * - the last 'valid' register of each frame are clobbered. | ||
1243 | * - the caller must have registered a fixup handler | ||
1244 | * (or be inside a critical section) | ||
1245 | * - PS_EXCM must be set (PS_WOE cleared?) | ||
1246 | */ | ||
1247 | |||
1248 | ENTRY(_spill_registers) | ||
1249 | |||
1250 | /* | ||
1251 | * Rotate ws so that the current windowbase is at bit 0. | ||
1252 | * Assume ws = xxxwww1yy (www1 current window frame). | ||
1253 | * Rotate ws right so that a4 = yyxxxwww1. | ||
1254 | */ | ||
1255 | |||
1256 | rsr a4, windowbase | ||
1257 | rsr a3, windowstart # a3 = xxxwww1yy | ||
1258 | ssr a4 # holds WB | ||
1259 | slli a4, a3, WSBITS | ||
1260 | or a3, a3, a4 # a3 = xxxwww1yyxxxwww1yy | ||
1261 | srl a3, a3 # a3 = 00xxxwww1yyxxxwww1 | ||
1262 | |||
1263 | /* We are done if there are no more than the current register frame. */ | ||
1264 | |||
1265 | extui a3, a3, 1, WSBITS-1 # a3 = 0yyxxxwww | ||
1266 | movi a4, (1 << (WSBITS-1)) | ||
1267 | _beqz a3, .Lnospill # only one active frame? jump | ||
1268 | |||
1269 | /* We want 1 at the top, so that we return to the current windowbase */ | ||
1270 | |||
1271 | or a3, a3, a4 # 1yyxxxwww | ||
1272 | |||
1273 | /* Skip empty frames - get 'oldest' WINDOWSTART-bit. */ | ||
1274 | |||
1275 | wsr a3, windowstart # save shifted windowstart | ||
1276 | neg a4, a3 | ||
1277 | and a3, a4, a3 # first bit set from right: 000010000 | ||
1278 | |||
1279 | ffs_ws a4, a3 # a4: shifts to skip empty frames | ||
1280 | movi a3, WSBITS | ||
1281 | sub a4, a3, a4 # WSBITS-a4:number of 0-bits from right | ||
1282 | ssr a4 # save in SAR for later. | ||
1283 | |||
1284 | rsr a3, windowbase | ||
1285 | add a3, a3, a4 | ||
1286 | wsr a3, windowbase | ||
1287 | rsync | ||
1288 | |||
1289 | rsr a3, windowstart | ||
1290 | srl a3, a3 # shift windowstart | ||
1291 | |||
1292 | /* WB is now just one frame below the oldest frame in the register | ||
1293 | window. WS is shifted so the oldest frame is in bit 0, thus, WB | ||
1294 | and WS differ by one 4-register frame. */ | ||
1295 | |||
1296 | /* Save frames. Depending what call was used (call4, call8, call12), | ||
1297 | * we have to save 4,8. or 12 registers. | ||
1298 | */ | ||
1299 | |||
1300 | _bbsi.l a3, 1, .Lc4 | ||
1301 | _bbsi.l a3, 2, .Lc8 | ||
1302 | |||
1303 | /* Special case: we have a call12-frame starting at a4. */ | ||
1304 | |||
1305 | _bbci.l a3, 3, .Lc12 # bit 3 shouldn't be zero! (Jump to Lc12 first) | ||
1306 | |||
1307 | s32e a4, a1, -16 # a1 is valid with an empty spill area | ||
1308 | l32e a4, a5, -12 | ||
1309 | s32e a8, a4, -48 | ||
1310 | mov a8, a4 | ||
1311 | l32e a4, a1, -16 | ||
1312 | j .Lc12c | ||
1313 | |||
1314 | .Lnospill: | ||
1315 | ret | ||
1316 | |||
1317 | .Lloop: _bbsi.l a3, 1, .Lc4 | ||
1318 | _bbci.l a3, 2, .Lc12 | ||
1319 | |||
1320 | .Lc8: s32e a4, a13, -16 | ||
1321 | l32e a4, a5, -12 | ||
1322 | s32e a8, a4, -32 | ||
1323 | s32e a5, a13, -12 | ||
1324 | s32e a6, a13, -8 | ||
1325 | s32e a7, a13, -4 | ||
1326 | s32e a9, a4, -28 | ||
1327 | s32e a10, a4, -24 | ||
1328 | s32e a11, a4, -20 | ||
1329 | |||
1330 | srli a11, a3, 2 # shift windowbase by 2 | ||
1331 | rotw 2 | ||
1332 | _bnei a3, 1, .Lloop | ||
1333 | |||
1334 | .Lexit: /* Done. Do the final rotation, set WS, and return. */ | ||
1335 | |||
1336 | rotw 1 | ||
1337 | rsr a3, windowbase | ||
1338 | ssl a3 | ||
1339 | movi a3, 1 | ||
1340 | sll a3, a3 | ||
1341 | wsr a3, windowstart | ||
1342 | ret | ||
1343 | |||
1344 | .Lc4: s32e a4, a9, -16 | ||
1345 | s32e a5, a9, -12 | ||
1346 | s32e a6, a9, -8 | ||
1347 | s32e a7, a9, -4 | ||
1348 | |||
1349 | srli a7, a3, 1 | ||
1350 | rotw 1 | ||
1351 | _bnei a3, 1, .Lloop | ||
1352 | j .Lexit | ||
1353 | |||
1354 | .Lc12: _bbci.l a3, 3, .Linvalid_mask # bit 2 shouldn't be zero! | ||
1355 | |||
1356 | /* 12-register frame (call12) */ | ||
1357 | |||
1358 | l32e a2, a5, -12 | ||
1359 | s32e a8, a2, -48 | ||
1360 | mov a8, a2 | ||
1361 | |||
1362 | .Lc12c: s32e a9, a8, -44 | ||
1363 | s32e a10, a8, -40 | ||
1364 | s32e a11, a8, -36 | ||
1365 | s32e a12, a8, -32 | ||
1366 | s32e a13, a8, -28 | ||
1367 | s32e a14, a8, -24 | ||
1368 | s32e a15, a8, -20 | ||
1369 | srli a15, a3, 3 | ||
1370 | |||
1371 | /* The stack pointer for a4..a7 is out of reach, so we rotate the | ||
1372 | * window, grab the stackpointer, and rotate back. | ||
1373 | * Alternatively, we could also use the following approach, but that | ||
1374 | * makes the fixup routine much more complicated: | ||
1375 | * rotw 1 | ||
1376 | * s32e a0, a13, -16 | ||
1377 | * ... | ||
1378 | * rotw 2 | ||
1379 | */ | ||
1380 | |||
1381 | rotw 1 | ||
1382 | mov a5, a13 | ||
1383 | rotw -1 | ||
1384 | |||
1385 | s32e a4, a9, -16 | ||
1386 | s32e a5, a9, -12 | ||
1387 | s32e a6, a9, -8 | ||
1388 | s32e a7, a9, -4 | ||
1389 | |||
1390 | rotw 3 | ||
1391 | |||
1392 | _beqi a3, 1, .Lexit | ||
1393 | j .Lloop | ||
1394 | |||
1395 | .Linvalid_mask: | ||
1396 | |||
1397 | /* We get here because of an unrecoverable error in the window | ||
1398 | * registers. If we are in user space, we kill the application, | ||
1399 | * however, this condition is unrecoverable in kernel space. | ||
1400 | */ | ||
1401 | |||
1402 | rsr a0, ps | ||
1403 | _bbci.l a0, PS_UM_BIT, 1f | ||
1404 | |||
1405 | /* User space: Setup a dummy frame and kill application. | ||
1406 | * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer. | ||
1407 | */ | ||
1408 | |||
1409 | movi a0, 1 | ||
1410 | movi a1, 0 | ||
1411 | |||
1412 | wsr a0, windowstart | ||
1413 | wsr a1, windowbase | ||
1414 | rsync | ||
1415 | |||
1416 | movi a0, 0 | ||
1417 | |||
1418 | rsr a3, excsave1 | ||
1419 | l32i a1, a3, EXC_TABLE_KSTK | ||
1420 | |||
1421 | movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL | ||
1422 | wsr a4, ps | ||
1423 | rsync | ||
1424 | |||
1425 | movi a6, SIGSEGV | ||
1426 | movi a4, do_exit | ||
1427 | callx4 a4 | ||
1428 | |||
1429 | 1: /* Kernel space: PANIC! */ | ||
1430 | |||
1431 | wsr a0, excsave1 | ||
1432 | movi a0, unrecoverable_exception | ||
1433 | callx0 a0 # should not return | ||
1434 | 1: j 1b | ||
1435 | |||
1436 | ENDPROC(_spill_registers) | ||
1437 | |||
1438 | #ifdef CONFIG_MMU | 1403 | #ifdef CONFIG_MMU |
1439 | /* | 1404 | /* |
1440 | * We should never get here. Bail out! | 1405 | * We should never get here. Bail out! |