diff options
| author | Chris Zankel <chris@zankel.net> | 2014-01-29 01:09:51 -0500 |
|---|---|---|
| committer | Chris Zankel <chris@zankel.net> | 2014-01-29 01:09:51 -0500 |
| commit | 6b5a1f74e50170e64104135490dc32b657483594 (patch) | |
| tree | 6bb08372aa016f77f27ec12d8ce4bbcc16291af0 | |
| parent | 3251f1e27a5a17f0efd436cfd1e7b9896cfab0a0 (diff) | |
xtensa: fix fast_syscall_spill_registers
The original implementation could clobber registers under certain conditions.
The Xtensa processor architecture uses windowed registers and the original
implementation was using a4 as a temporary register, which under certain
conditions could be register a0 of the oldest window frame, and didn't always
restore the content correctly.
By moving the _spill_registers routine inside the fast system call, it frees
up one more register (the return address is not required anymore) for the
spill routine.
Signed-off-by: Chris Zankel <chris@zankel.net>
| -rw-r--r-- | arch/xtensa/kernel/entry.S | 383 |
1 files changed, 174 insertions, 209 deletions
diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index b61e25146a2f..ef7f4990722b 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S | |||
| @@ -1081,34 +1081,202 @@ ENTRY(fast_syscall_spill_registers) | |||
| 1081 | 1081 | ||
| 1082 | rsr a0, sar | 1082 | rsr a0, sar |
| 1083 | s32i a3, a2, PT_AREG3 | 1083 | s32i a3, a2, PT_AREG3 |
| 1084 | s32i a4, a2, PT_AREG4 | 1084 | s32i a0, a2, PT_SAR |
| 1085 | s32i a0, a2, PT_AREG5 # store SAR to PT_AREG5 | ||
| 1086 | 1085 | ||
| 1087 | /* The spill routine might clobber a7, a11, and a15. */ | 1086 | /* The spill routine might clobber a4, a7, a8, a11, a12, and a15. */ |
| 1088 | 1087 | ||
| 1088 | s32i a4, a2, PT_AREG4 | ||
| 1089 | s32i a7, a2, PT_AREG7 | 1089 | s32i a7, a2, PT_AREG7 |
| 1090 | s32i a8, a2, PT_AREG8 | ||
| 1090 | s32i a11, a2, PT_AREG11 | 1091 | s32i a11, a2, PT_AREG11 |
| 1092 | s32i a12, a2, PT_AREG12 | ||
| 1091 | s32i a15, a2, PT_AREG15 | 1093 | s32i a15, a2, PT_AREG15 |
| 1092 | 1094 | ||
| 1093 | call0 _spill_registers # destroys a3, a4, and SAR | 1095 | /* |
| 1096 | * Rotate ws so that the current windowbase is at bit 0. | ||
| 1097 | * Assume ws = xxxwww1yy (www1 current window frame). | ||
| 1098 | * Rotate ws right so that a4 = yyxxxwww1. | ||
| 1099 | */ | ||
| 1100 | |||
| 1101 | rsr a0, windowbase | ||
| 1102 | rsr a3, windowstart # a3 = xxxwww1yy | ||
| 1103 | ssr a0 # holds WB | ||
| 1104 | slli a0, a3, WSBITS | ||
| 1105 | or a3, a3, a0 # a3 = xxxwww1yyxxxwww1yy | ||
| 1106 | srl a3, a3 # a3 = 00xxxwww1yyxxxwww1 | ||
| 1107 | |||
| 1108 | /* We are done if there are no more than the current register frame. */ | ||
| 1109 | |||
| 1110 | extui a3, a3, 1, WSBITS-1 # a3 = 0yyxxxwww | ||
| 1111 | movi a0, (1 << (WSBITS-1)) | ||
| 1112 | _beqz a3, .Lnospill # only one active frame? jump | ||
| 1113 | |||
| 1114 | /* We want 1 at the top, so that we return to the current windowbase */ | ||
| 1115 | |||
| 1116 | or a3, a3, a0 # 1yyxxxwww | ||
| 1117 | |||
| 1118 | /* Skip empty frames - get 'oldest' WINDOWSTART-bit. */ | ||
| 1119 | |||
| 1120 | wsr a3, windowstart # save shifted windowstart | ||
| 1121 | neg a0, a3 | ||
| 1122 | and a3, a0, a3 # first bit set from right: 000010000 | ||
| 1123 | |||
| 1124 | ffs_ws a0, a3 # a0: shifts to skip empty frames | ||
| 1125 | movi a3, WSBITS | ||
| 1126 | sub a0, a3, a0 # WSBITS-a0:number of 0-bits from right | ||
| 1127 | ssr a0 # save in SAR for later. | ||
| 1128 | |||
| 1129 | rsr a3, windowbase | ||
| 1130 | add a3, a3, a0 | ||
| 1131 | wsr a3, windowbase | ||
| 1132 | rsync | ||
| 1133 | |||
| 1134 | rsr a3, windowstart | ||
| 1135 | srl a3, a3 # shift windowstart | ||
| 1136 | |||
| 1137 | /* WB is now just one frame below the oldest frame in the register | ||
| 1138 | window. WS is shifted so the oldest frame is in bit 0, thus, WB | ||
| 1139 | and WS differ by one 4-register frame. */ | ||
| 1140 | |||
| 1141 | /* Save frames. Depending what call was used (call4, call8, call12), | ||
| 1142 | * we have to save 4,8. or 12 registers. | ||
| 1143 | */ | ||
| 1144 | |||
| 1145 | |||
| 1146 | .Lloop: _bbsi.l a3, 1, .Lc4 | ||
| 1147 | _bbci.l a3, 2, .Lc12 | ||
| 1148 | |||
| 1149 | .Lc8: s32e a4, a13, -16 | ||
| 1150 | l32e a4, a5, -12 | ||
| 1151 | s32e a8, a4, -32 | ||
| 1152 | s32e a5, a13, -12 | ||
| 1153 | s32e a6, a13, -8 | ||
| 1154 | s32e a7, a13, -4 | ||
| 1155 | s32e a9, a4, -28 | ||
| 1156 | s32e a10, a4, -24 | ||
| 1157 | s32e a11, a4, -20 | ||
| 1158 | srli a11, a3, 2 # shift windowbase by 2 | ||
| 1159 | rotw 2 | ||
| 1160 | _bnei a3, 1, .Lloop | ||
| 1161 | j .Lexit | ||
| 1162 | |||
| 1163 | .Lc4: s32e a4, a9, -16 | ||
| 1164 | s32e a5, a9, -12 | ||
| 1165 | s32e a6, a9, -8 | ||
| 1166 | s32e a7, a9, -4 | ||
| 1167 | |||
| 1168 | srli a7, a3, 1 | ||
| 1169 | rotw 1 | ||
| 1170 | _bnei a3, 1, .Lloop | ||
| 1171 | j .Lexit | ||
| 1172 | |||
| 1173 | .Lc12: _bbci.l a3, 3, .Linvalid_mask # bit 2 shouldn't be zero! | ||
| 1174 | |||
| 1175 | /* 12-register frame (call12) */ | ||
| 1176 | |||
| 1177 | l32e a0, a5, -12 | ||
| 1178 | s32e a8, a0, -48 | ||
| 1179 | mov a8, a0 | ||
| 1180 | |||
| 1181 | s32e a9, a8, -44 | ||
| 1182 | s32e a10, a8, -40 | ||
| 1183 | s32e a11, a8, -36 | ||
| 1184 | s32e a12, a8, -32 | ||
| 1185 | s32e a13, a8, -28 | ||
| 1186 | s32e a14, a8, -24 | ||
| 1187 | s32e a15, a8, -20 | ||
| 1188 | srli a15, a3, 3 | ||
| 1189 | |||
| 1190 | /* The stack pointer for a4..a7 is out of reach, so we rotate the | ||
| 1191 | * window, grab the stackpointer, and rotate back. | ||
| 1192 | * Alternatively, we could also use the following approach, but that | ||
| 1193 | * makes the fixup routine much more complicated: | ||
| 1194 | * rotw 1 | ||
| 1195 | * s32e a0, a13, -16 | ||
| 1196 | * ... | ||
| 1197 | * rotw 2 | ||
| 1198 | */ | ||
| 1199 | |||
| 1200 | rotw 1 | ||
| 1201 | mov a4, a13 | ||
| 1202 | rotw -1 | ||
| 1203 | |||
| 1204 | s32e a4, a8, -16 | ||
| 1205 | s32e a5, a8, -12 | ||
| 1206 | s32e a6, a8, -8 | ||
| 1207 | s32e a7, a8, -4 | ||
| 1208 | |||
| 1209 | rotw 3 | ||
| 1210 | |||
| 1211 | _beqi a3, 1, .Lexit | ||
| 1212 | j .Lloop | ||
| 1213 | |||
| 1214 | .Lexit: | ||
| 1215 | |||
| 1216 | /* Done. Do the final rotation and set WS */ | ||
| 1217 | |||
| 1218 | rotw 1 | ||
| 1219 | rsr a3, windowbase | ||
| 1220 | ssl a3 | ||
| 1221 | movi a3, 1 | ||
| 1222 | sll a3, a3 | ||
| 1223 | wsr a3, windowstart | ||
| 1224 | .Lnospill: | ||
| 1094 | 1225 | ||
| 1095 | /* Advance PC, restore registers and SAR, and return from exception. */ | 1226 | /* Advance PC, restore registers and SAR, and return from exception. */ |
| 1096 | 1227 | ||
| 1097 | l32i a3, a2, PT_AREG5 | 1228 | l32i a3, a2, PT_SAR |
| 1098 | l32i a4, a2, PT_AREG4 | ||
| 1099 | l32i a0, a2, PT_AREG0 | 1229 | l32i a0, a2, PT_AREG0 |
| 1100 | wsr a3, sar | 1230 | wsr a3, sar |
| 1101 | l32i a3, a2, PT_AREG3 | 1231 | l32i a3, a2, PT_AREG3 |
| 1102 | 1232 | ||
| 1103 | /* Restore clobbered registers. */ | 1233 | /* Restore clobbered registers. */ |
| 1104 | 1234 | ||
| 1235 | l32i a4, a2, PT_AREG4 | ||
| 1105 | l32i a7, a2, PT_AREG7 | 1236 | l32i a7, a2, PT_AREG7 |
| 1237 | l32i a8, a2, PT_AREG8 | ||
| 1106 | l32i a11, a2, PT_AREG11 | 1238 | l32i a11, a2, PT_AREG11 |
| 1239 | l32i a12, a2, PT_AREG12 | ||
| 1107 | l32i a15, a2, PT_AREG15 | 1240 | l32i a15, a2, PT_AREG15 |
| 1108 | 1241 | ||
| 1109 | movi a2, 0 | 1242 | movi a2, 0 |
| 1110 | rfe | 1243 | rfe |
| 1111 | 1244 | ||
| 1245 | .Linvalid_mask: | ||
| 1246 | |||
| 1247 | /* We get here because of an unrecoverable error in the window | ||
| 1248 | * registers, so set up a dummy frame and kill the user application. | ||
| 1249 | * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer. | ||
| 1250 | */ | ||
| 1251 | |||
| 1252 | movi a0, 1 | ||
| 1253 | movi a1, 0 | ||
| 1254 | |||
| 1255 | wsr a0, windowstart | ||
| 1256 | wsr a1, windowbase | ||
| 1257 | rsync | ||
| 1258 | |||
| 1259 | movi a0, 0 | ||
| 1260 | |||
| 1261 | rsr a3, excsave1 | ||
| 1262 | l32i a1, a3, EXC_TABLE_KSTK | ||
| 1263 | |||
| 1264 | movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL | ||
| 1265 | wsr a4, ps | ||
| 1266 | rsync | ||
| 1267 | |||
| 1268 | movi a6, SIGSEGV | ||
| 1269 | movi a4, do_exit | ||
| 1270 | callx4 a4 | ||
| 1271 | |||
| 1272 | /* shouldn't return, so panic */ | ||
| 1273 | |||
| 1274 | wsr a0, excsave1 | ||
| 1275 | movi a0, unrecoverable_exception | ||
| 1276 | callx0 a0 # should not return | ||
| 1277 | 1: j 1b | ||
| 1278 | |||
| 1279 | |||
| 1112 | ENDPROC(fast_syscall_spill_registers) | 1280 | ENDPROC(fast_syscall_spill_registers) |
| 1113 | 1281 | ||
| 1114 | /* Fixup handler. | 1282 | /* Fixup handler. |
| @@ -1232,209 +1400,6 @@ ENTRY(fast_syscall_spill_registers_fixup_return) | |||
| 1232 | 1400 | ||
| 1233 | ENDPROC(fast_syscall_spill_registers_fixup_return) | 1401 | ENDPROC(fast_syscall_spill_registers_fixup_return) |
| 1234 | 1402 | ||
| 1235 | /* | ||
| 1236 | * spill all registers. | ||
| 1237 | * | ||
| 1238 | * This is not a real function. The following conditions must be met: | ||
| 1239 | * | ||
| 1240 | * - must be called with call0. | ||
| 1241 | * - uses a3, a4 and SAR. | ||
| 1242 | * - the last 'valid' register of each frame are clobbered. | ||
| 1243 | * - the caller must have registered a fixup handler | ||
| 1244 | * (or be inside a critical section) | ||
| 1245 | * - PS_EXCM must be set (PS_WOE cleared?) | ||
| 1246 | */ | ||
| 1247 | |||
| 1248 | ENTRY(_spill_registers) | ||
| 1249 | |||
| 1250 | /* | ||
| 1251 | * Rotate ws so that the current windowbase is at bit 0. | ||
| 1252 | * Assume ws = xxxwww1yy (www1 current window frame). | ||
| 1253 | * Rotate ws right so that a4 = yyxxxwww1. | ||
| 1254 | */ | ||
| 1255 | |||
| 1256 | rsr a4, windowbase | ||
| 1257 | rsr a3, windowstart # a3 = xxxwww1yy | ||
| 1258 | ssr a4 # holds WB | ||
| 1259 | slli a4, a3, WSBITS | ||
| 1260 | or a3, a3, a4 # a3 = xxxwww1yyxxxwww1yy | ||
| 1261 | srl a3, a3 # a3 = 00xxxwww1yyxxxwww1 | ||
| 1262 | |||
| 1263 | /* We are done if there are no more than the current register frame. */ | ||
| 1264 | |||
| 1265 | extui a3, a3, 1, WSBITS-1 # a3 = 0yyxxxwww | ||
| 1266 | movi a4, (1 << (WSBITS-1)) | ||
| 1267 | _beqz a3, .Lnospill # only one active frame? jump | ||
| 1268 | |||
| 1269 | /* We want 1 at the top, so that we return to the current windowbase */ | ||
| 1270 | |||
| 1271 | or a3, a3, a4 # 1yyxxxwww | ||
| 1272 | |||
| 1273 | /* Skip empty frames - get 'oldest' WINDOWSTART-bit. */ | ||
| 1274 | |||
| 1275 | wsr a3, windowstart # save shifted windowstart | ||
| 1276 | neg a4, a3 | ||
| 1277 | and a3, a4, a3 # first bit set from right: 000010000 | ||
| 1278 | |||
| 1279 | ffs_ws a4, a3 # a4: shifts to skip empty frames | ||
| 1280 | movi a3, WSBITS | ||
| 1281 | sub a4, a3, a4 # WSBITS-a4:number of 0-bits from right | ||
| 1282 | ssr a4 # save in SAR for later. | ||
| 1283 | |||
| 1284 | rsr a3, windowbase | ||
| 1285 | add a3, a3, a4 | ||
| 1286 | wsr a3, windowbase | ||
| 1287 | rsync | ||
| 1288 | |||
| 1289 | rsr a3, windowstart | ||
| 1290 | srl a3, a3 # shift windowstart | ||
| 1291 | |||
| 1292 | /* WB is now just one frame below the oldest frame in the register | ||
| 1293 | window. WS is shifted so the oldest frame is in bit 0, thus, WB | ||
| 1294 | and WS differ by one 4-register frame. */ | ||
| 1295 | |||
| 1296 | /* Save frames. Depending what call was used (call4, call8, call12), | ||
| 1297 | * we have to save 4,8. or 12 registers. | ||
| 1298 | */ | ||
| 1299 | |||
| 1300 | _bbsi.l a3, 1, .Lc4 | ||
| 1301 | _bbsi.l a3, 2, .Lc8 | ||
| 1302 | |||
| 1303 | /* Special case: we have a call12-frame starting at a4. */ | ||
| 1304 | |||
| 1305 | _bbci.l a3, 3, .Lc12 # bit 3 shouldn't be zero! (Jump to Lc12 first) | ||
| 1306 | |||
| 1307 | s32e a4, a1, -16 # a1 is valid with an empty spill area | ||
| 1308 | l32e a4, a5, -12 | ||
| 1309 | s32e a8, a4, -48 | ||
| 1310 | mov a8, a4 | ||
| 1311 | l32e a4, a1, -16 | ||
| 1312 | j .Lc12c | ||
| 1313 | |||
| 1314 | .Lnospill: | ||
| 1315 | ret | ||
| 1316 | |||
| 1317 | .Lloop: _bbsi.l a3, 1, .Lc4 | ||
| 1318 | _bbci.l a3, 2, .Lc12 | ||
| 1319 | |||
| 1320 | .Lc8: s32e a4, a13, -16 | ||
| 1321 | l32e a4, a5, -12 | ||
| 1322 | s32e a8, a4, -32 | ||
| 1323 | s32e a5, a13, -12 | ||
| 1324 | s32e a6, a13, -8 | ||
| 1325 | s32e a7, a13, -4 | ||
| 1326 | s32e a9, a4, -28 | ||
| 1327 | s32e a10, a4, -24 | ||
| 1328 | s32e a11, a4, -20 | ||
| 1329 | |||
| 1330 | srli a11, a3, 2 # shift windowbase by 2 | ||
| 1331 | rotw 2 | ||
| 1332 | _bnei a3, 1, .Lloop | ||
| 1333 | |||
| 1334 | .Lexit: /* Done. Do the final rotation, set WS, and return. */ | ||
| 1335 | |||
| 1336 | rotw 1 | ||
| 1337 | rsr a3, windowbase | ||
| 1338 | ssl a3 | ||
| 1339 | movi a3, 1 | ||
| 1340 | sll a3, a3 | ||
| 1341 | wsr a3, windowstart | ||
| 1342 | ret | ||
| 1343 | |||
| 1344 | .Lc4: s32e a4, a9, -16 | ||
| 1345 | s32e a5, a9, -12 | ||
| 1346 | s32e a6, a9, -8 | ||
| 1347 | s32e a7, a9, -4 | ||
| 1348 | |||
| 1349 | srli a7, a3, 1 | ||
| 1350 | rotw 1 | ||
| 1351 | _bnei a3, 1, .Lloop | ||
| 1352 | j .Lexit | ||
| 1353 | |||
| 1354 | .Lc12: _bbci.l a3, 3, .Linvalid_mask # bit 2 shouldn't be zero! | ||
| 1355 | |||
| 1356 | /* 12-register frame (call12) */ | ||
| 1357 | |||
| 1358 | l32e a2, a5, -12 | ||
| 1359 | s32e a8, a2, -48 | ||
| 1360 | mov a8, a2 | ||
| 1361 | |||
| 1362 | .Lc12c: s32e a9, a8, -44 | ||
| 1363 | s32e a10, a8, -40 | ||
| 1364 | s32e a11, a8, -36 | ||
| 1365 | s32e a12, a8, -32 | ||
| 1366 | s32e a13, a8, -28 | ||
| 1367 | s32e a14, a8, -24 | ||
| 1368 | s32e a15, a8, -20 | ||
| 1369 | srli a15, a3, 3 | ||
| 1370 | |||
| 1371 | /* The stack pointer for a4..a7 is out of reach, so we rotate the | ||
| 1372 | * window, grab the stackpointer, and rotate back. | ||
| 1373 | * Alternatively, we could also use the following approach, but that | ||
| 1374 | * makes the fixup routine much more complicated: | ||
| 1375 | * rotw 1 | ||
| 1376 | * s32e a0, a13, -16 | ||
| 1377 | * ... | ||
| 1378 | * rotw 2 | ||
| 1379 | */ | ||
| 1380 | |||
| 1381 | rotw 1 | ||
| 1382 | mov a5, a13 | ||
| 1383 | rotw -1 | ||
| 1384 | |||
| 1385 | s32e a4, a9, -16 | ||
| 1386 | s32e a5, a9, -12 | ||
| 1387 | s32e a6, a9, -8 | ||
| 1388 | s32e a7, a9, -4 | ||
| 1389 | |||
| 1390 | rotw 3 | ||
| 1391 | |||
| 1392 | _beqi a3, 1, .Lexit | ||
| 1393 | j .Lloop | ||
| 1394 | |||
| 1395 | .Linvalid_mask: | ||
| 1396 | |||
| 1397 | /* We get here because of an unrecoverable error in the window | ||
| 1398 | * registers. If we are in user space, we kill the application, | ||
| 1399 | * however, this condition is unrecoverable in kernel space. | ||
| 1400 | */ | ||
| 1401 | |||
| 1402 | rsr a0, ps | ||
| 1403 | _bbci.l a0, PS_UM_BIT, 1f | ||
| 1404 | |||
| 1405 | /* User space: Setup a dummy frame and kill application. | ||
| 1406 | * Note: We assume EXC_TABLE_KSTK contains a valid stack pointer. | ||
| 1407 | */ | ||
| 1408 | |||
| 1409 | movi a0, 1 | ||
| 1410 | movi a1, 0 | ||
| 1411 | |||
| 1412 | wsr a0, windowstart | ||
| 1413 | wsr a1, windowbase | ||
| 1414 | rsync | ||
| 1415 | |||
| 1416 | movi a0, 0 | ||
| 1417 | |||
| 1418 | rsr a3, excsave1 | ||
| 1419 | l32i a1, a3, EXC_TABLE_KSTK | ||
| 1420 | |||
| 1421 | movi a4, (1 << PS_WOE_BIT) | LOCKLEVEL | ||
| 1422 | wsr a4, ps | ||
| 1423 | rsync | ||
| 1424 | |||
| 1425 | movi a6, SIGSEGV | ||
| 1426 | movi a4, do_exit | ||
| 1427 | callx4 a4 | ||
| 1428 | |||
| 1429 | 1: /* Kernel space: PANIC! */ | ||
| 1430 | |||
| 1431 | wsr a0, excsave1 | ||
| 1432 | movi a0, unrecoverable_exception | ||
| 1433 | callx0 a0 # should not return | ||
| 1434 | 1: j 1b | ||
| 1435 | |||
| 1436 | ENDPROC(_spill_registers) | ||
| 1437 | |||
| 1438 | #ifdef CONFIG_MMU | 1403 | #ifdef CONFIG_MMU |
| 1439 | /* | 1404 | /* |
| 1440 | * We should never get here. Bail out! | 1405 | * We should never get here. Bail out! |
