diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 271 |
1 files changed, 168 insertions, 103 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 594f7e63b432..be5568839442 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -1109,27 +1109,10 @@ xlog_verify_head( | |||
1109 | bool tmp_wrapped; | 1109 | bool tmp_wrapped; |
1110 | 1110 | ||
1111 | /* | 1111 | /* |
1112 | * Search backwards through the log looking for the log record header | 1112 | * Check the head of the log for torn writes. Search backwards from the |
1113 | * block. This wraps all the way back around to the head so something is | 1113 | * head until we hit the tail or the maximum number of log record I/Os |
1114 | * seriously wrong if we can't find it. | 1114 | * that could have been in flight at one time. Use a temporary buffer so |
1115 | */ | 1115 | * we don't trash the rhead/bp pointers from the caller. |
1116 | found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk, | ||
1117 | rhead, wrapped); | ||
1118 | if (found < 0) | ||
1119 | return found; | ||
1120 | if (!found) { | ||
1121 | xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); | ||
1122 | return -EIO; | ||
1123 | } | ||
1124 | |||
1125 | *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn)); | ||
1126 | |||
1127 | /* | ||
1128 | * Now that we have a tail block, check the head of the log for torn | ||
1129 | * writes. Search again until we hit the tail or the maximum number of | ||
1130 | * log record I/Os that could have been in flight at one time. Use a | ||
1131 | * temporary buffer so we don't trash the rhead/bp pointer from the | ||
1132 | * call above. | ||
1133 | */ | 1116 | */ |
1134 | tmp_bp = xlog_get_bp(log, 1); | 1117 | tmp_bp = xlog_get_bp(log, 1); |
1135 | if (!tmp_bp) | 1118 | if (!tmp_bp) |
@@ -1216,6 +1199,115 @@ xlog_verify_head( | |||
1216 | } | 1199 | } |
1217 | 1200 | ||
1218 | /* | 1201 | /* |
1202 | * Check whether the head of the log points to an unmount record. In other | ||
1203 | * words, determine whether the log is clean. If so, update the in-core state | ||
1204 | * appropriately. | ||
1205 | */ | ||
1206 | static int | ||
1207 | xlog_check_unmount_rec( | ||
1208 | struct xlog *log, | ||
1209 | xfs_daddr_t *head_blk, | ||
1210 | xfs_daddr_t *tail_blk, | ||
1211 | struct xlog_rec_header *rhead, | ||
1212 | xfs_daddr_t rhead_blk, | ||
1213 | struct xfs_buf *bp, | ||
1214 | bool *clean) | ||
1215 | { | ||
1216 | struct xlog_op_header *op_head; | ||
1217 | xfs_daddr_t umount_data_blk; | ||
1218 | xfs_daddr_t after_umount_blk; | ||
1219 | int hblks; | ||
1220 | int error; | ||
1221 | char *offset; | ||
1222 | |||
1223 | *clean = false; | ||
1224 | |||
1225 | /* | ||
1226 | * Look for unmount record. If we find it, then we know there was a | ||
1227 | * clean unmount. Since 'i' could be the last block in the physical | ||
1228 | * log, we convert to a log block before comparing to the head_blk. | ||
1229 | * | ||
1230 | * Save the current tail lsn to use to pass to xlog_clear_stale_blocks() | ||
1231 | * below. We won't want to clear the unmount record if there is one, so | ||
1232 | * we pass the lsn of the unmount record rather than the block after it. | ||
1233 | */ | ||
1234 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | ||
1235 | int h_size = be32_to_cpu(rhead->h_size); | ||
1236 | int h_version = be32_to_cpu(rhead->h_version); | ||
1237 | |||
1238 | if ((h_version & XLOG_VERSION_2) && | ||
1239 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { | ||
1240 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | ||
1241 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | ||
1242 | hblks++; | ||
1243 | } else { | ||
1244 | hblks = 1; | ||
1245 | } | ||
1246 | } else { | ||
1247 | hblks = 1; | ||
1248 | } | ||
1249 | after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len)); | ||
1250 | after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize); | ||
1251 | if (*head_blk == after_umount_blk && | ||
1252 | be32_to_cpu(rhead->h_num_logops) == 1) { | ||
1253 | umount_data_blk = rhead_blk + hblks; | ||
1254 | umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize); | ||
1255 | error = xlog_bread(log, umount_data_blk, 1, bp, &offset); | ||
1256 | if (error) | ||
1257 | return error; | ||
1258 | |||
1259 | op_head = (struct xlog_op_header *)offset; | ||
1260 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { | ||
1261 | /* | ||
1262 | * Set tail and last sync so that newly written log | ||
1263 | * records will point recovery to after the current | ||
1264 | * unmount record. | ||
1265 | */ | ||
1266 | xlog_assign_atomic_lsn(&log->l_tail_lsn, | ||
1267 | log->l_curr_cycle, after_umount_blk); | ||
1268 | xlog_assign_atomic_lsn(&log->l_last_sync_lsn, | ||
1269 | log->l_curr_cycle, after_umount_blk); | ||
1270 | *tail_blk = after_umount_blk; | ||
1271 | |||
1272 | *clean = true; | ||
1273 | } | ||
1274 | } | ||
1275 | |||
1276 | return 0; | ||
1277 | } | ||
1278 | |||
1279 | static void | ||
1280 | xlog_set_state( | ||
1281 | struct xlog *log, | ||
1282 | xfs_daddr_t head_blk, | ||
1283 | struct xlog_rec_header *rhead, | ||
1284 | xfs_daddr_t rhead_blk, | ||
1285 | bool bump_cycle) | ||
1286 | { | ||
1287 | /* | ||
1288 | * Reset log values according to the state of the log when we | ||
1289 | * crashed. In the case where head_blk == 0, we bump curr_cycle | ||
1290 | * one because the next write starts a new cycle rather than | ||
1291 | * continuing the cycle of the last good log record. At this | ||
1292 | * point we have guaranteed that all partial log records have been | ||
1293 | * accounted for. Therefore, we know that the last good log record | ||
1294 | * written was complete and ended exactly on the end boundary | ||
1295 | * of the physical log. | ||
1296 | */ | ||
1297 | log->l_prev_block = rhead_blk; | ||
1298 | log->l_curr_block = (int)head_blk; | ||
1299 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); | ||
1300 | if (bump_cycle) | ||
1301 | log->l_curr_cycle++; | ||
1302 | atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); | ||
1303 | atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); | ||
1304 | xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, | ||
1305 | BBTOB(log->l_curr_block)); | ||
1306 | xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, | ||
1307 | BBTOB(log->l_curr_block)); | ||
1308 | } | ||
1309 | |||
1310 | /* | ||
1219 | * Find the sync block number or the tail of the log. | 1311 | * Find the sync block number or the tail of the log. |
1220 | * | 1312 | * |
1221 | * This will be the block number of the last record to have its | 1313 | * This will be the block number of the last record to have its |
@@ -1238,22 +1330,20 @@ xlog_find_tail( | |||
1238 | xfs_daddr_t *tail_blk) | 1330 | xfs_daddr_t *tail_blk) |
1239 | { | 1331 | { |
1240 | xlog_rec_header_t *rhead; | 1332 | xlog_rec_header_t *rhead; |
1241 | xlog_op_header_t *op_head; | ||
1242 | char *offset = NULL; | 1333 | char *offset = NULL; |
1243 | xfs_buf_t *bp; | 1334 | xfs_buf_t *bp; |
1244 | int error; | 1335 | int error; |
1245 | xfs_daddr_t umount_data_blk; | ||
1246 | xfs_daddr_t after_umount_blk; | ||
1247 | xfs_daddr_t rhead_blk; | 1336 | xfs_daddr_t rhead_blk; |
1248 | xfs_lsn_t tail_lsn; | 1337 | xfs_lsn_t tail_lsn; |
1249 | int hblks; | ||
1250 | bool wrapped = false; | 1338 | bool wrapped = false; |
1339 | bool clean = false; | ||
1251 | 1340 | ||
1252 | /* | 1341 | /* |
1253 | * Find previous log record | 1342 | * Find previous log record |
1254 | */ | 1343 | */ |
1255 | if ((error = xlog_find_head(log, head_blk))) | 1344 | if ((error = xlog_find_head(log, head_blk))) |
1256 | return error; | 1345 | return error; |
1346 | ASSERT(*head_blk < INT_MAX); | ||
1257 | 1347 | ||
1258 | bp = xlog_get_bp(log, 1); | 1348 | bp = xlog_get_bp(log, 1); |
1259 | if (!bp) | 1349 | if (!bp) |
@@ -1271,100 +1361,75 @@ xlog_find_tail( | |||
1271 | } | 1361 | } |
1272 | 1362 | ||
1273 | /* | 1363 | /* |
1274 | * Trim the head block back to skip over torn records. We can have | 1364 | * Search backwards through the log looking for the log record header |
1275 | * multiple log I/Os in flight at any time, so we assume CRC failures | 1365 | * block. This wraps all the way back around to the head so something is |
1276 | * back through the previous several records are torn writes and skip | 1366 | * seriously wrong if we can't find it. |
1277 | * them. | ||
1278 | */ | 1367 | */ |
1279 | ASSERT(*head_blk < INT_MAX); | 1368 | error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, |
1280 | error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk, | 1369 | &rhead_blk, &rhead, &wrapped); |
1281 | &rhead, &wrapped); | 1370 | if (error < 0) |
1282 | if (error) | 1371 | return error; |
1283 | goto done; | 1372 | if (!error) { |
1373 | xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); | ||
1374 | return -EIO; | ||
1375 | } | ||
1376 | *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); | ||
1284 | 1377 | ||
1285 | /* | 1378 | /* |
1286 | * Reset log values according to the state of the log when we | 1379 | * Set the log state based on the current head record. |
1287 | * crashed. In the case where head_blk == 0, we bump curr_cycle | ||
1288 | * one because the next write starts a new cycle rather than | ||
1289 | * continuing the cycle of the last good log record. At this | ||
1290 | * point we have guaranteed that all partial log records have been | ||
1291 | * accounted for. Therefore, we know that the last good log record | ||
1292 | * written was complete and ended exactly on the end boundary | ||
1293 | * of the physical log. | ||
1294 | */ | 1380 | */ |
1295 | log->l_prev_block = rhead_blk; | 1381 | xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped); |
1296 | log->l_curr_block = (int)*head_blk; | 1382 | tail_lsn = atomic64_read(&log->l_tail_lsn); |
1297 | log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); | ||
1298 | if (wrapped) | ||
1299 | log->l_curr_cycle++; | ||
1300 | atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); | ||
1301 | atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); | ||
1302 | xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, | ||
1303 | BBTOB(log->l_curr_block)); | ||
1304 | xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, | ||
1305 | BBTOB(log->l_curr_block)); | ||
1306 | 1383 | ||
1307 | /* | 1384 | /* |
1308 | * Look for unmount record. If we find it, then we know there | 1385 | * Look for an unmount record at the head of the log. This sets the log |
1309 | * was a clean unmount. Since 'i' could be the last block in | 1386 | * state to determine whether recovery is necessary. |
1310 | * the physical log, we convert to a log block before comparing | 1387 | */ |
1311 | * to the head_blk. | 1388 | error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead, |
1389 | rhead_blk, bp, &clean); | ||
1390 | if (error) | ||
1391 | goto done; | ||
1392 | |||
1393 | /* | ||
1394 | * Verify the log head if the log is not clean (e.g., we have anything | ||
1395 | * but an unmount record at the head). This uses CRC verification to | ||
1396 | * detect and trim torn writes. If discovered, CRC failures are | ||
1397 | * considered torn writes and the log head is trimmed accordingly. | ||
1312 | * | 1398 | * |
1313 | * Save the current tail lsn to use to pass to | 1399 | * Note that we can only run CRC verification when the log is dirty |
1314 | * xlog_clear_stale_blocks() below. We won't want to clear the | 1400 | * because there's no guarantee that the log data behind an unmount |
1315 | * unmount record if there is one, so we pass the lsn of the | 1401 | * record is compatible with the current architecture. |
1316 | * unmount record rather than the block after it. | ||
1317 | */ | 1402 | */ |
1318 | if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { | 1403 | if (!clean) { |
1319 | int h_size = be32_to_cpu(rhead->h_size); | 1404 | xfs_daddr_t orig_head = *head_blk; |
1320 | int h_version = be32_to_cpu(rhead->h_version); | ||
1321 | 1405 | ||
1322 | if ((h_version & XLOG_VERSION_2) && | 1406 | error = xlog_verify_head(log, head_blk, tail_blk, bp, |
1323 | (h_size > XLOG_HEADER_CYCLE_SIZE)) { | 1407 | &rhead_blk, &rhead, &wrapped); |
1324 | hblks = h_size / XLOG_HEADER_CYCLE_SIZE; | ||
1325 | if (h_size % XLOG_HEADER_CYCLE_SIZE) | ||
1326 | hblks++; | ||
1327 | } else { | ||
1328 | hblks = 1; | ||
1329 | } | ||
1330 | } else { | ||
1331 | hblks = 1; | ||
1332 | } | ||
1333 | after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len)); | ||
1334 | after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize); | ||
1335 | tail_lsn = atomic64_read(&log->l_tail_lsn); | ||
1336 | if (*head_blk == after_umount_blk && | ||
1337 | be32_to_cpu(rhead->h_num_logops) == 1) { | ||
1338 | umount_data_blk = rhead_blk + hblks; | ||
1339 | umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize); | ||
1340 | error = xlog_bread(log, umount_data_blk, 1, bp, &offset); | ||
1341 | if (error) | 1408 | if (error) |
1342 | goto done; | 1409 | goto done; |
1343 | 1410 | ||
1344 | op_head = (xlog_op_header_t *)offset; | 1411 | /* update in-core state again if the head changed */ |
1345 | if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { | 1412 | if (*head_blk != orig_head) { |
1346 | /* | 1413 | xlog_set_state(log, *head_blk, rhead, rhead_blk, |
1347 | * Set tail and last sync so that newly written | 1414 | wrapped); |
1348 | * log records will point recovery to after the | 1415 | tail_lsn = atomic64_read(&log->l_tail_lsn); |
1349 | * current unmount record. | 1416 | error = xlog_check_unmount_rec(log, head_blk, tail_blk, |
1350 | */ | 1417 | rhead, rhead_blk, bp, |
1351 | xlog_assign_atomic_lsn(&log->l_tail_lsn, | 1418 | &clean); |
1352 | log->l_curr_cycle, after_umount_blk); | 1419 | if (error) |
1353 | xlog_assign_atomic_lsn(&log->l_last_sync_lsn, | 1420 | goto done; |
1354 | log->l_curr_cycle, after_umount_blk); | ||
1355 | *tail_blk = after_umount_blk; | ||
1356 | |||
1357 | /* | ||
1358 | * Note that the unmount was clean. If the unmount | ||
1359 | * was not clean, we need to know this to rebuild the | ||
1360 | * superblock counters from the perag headers if we | ||
1361 | * have a filesystem using non-persistent counters. | ||
1362 | */ | ||
1363 | log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; | ||
1364 | } | 1421 | } |
1365 | } | 1422 | } |
1366 | 1423 | ||
1367 | /* | 1424 | /* |
1425 | * Note that the unmount was clean. If the unmount was not clean, we | ||
1426 | * need to know this to rebuild the superblock counters from the perag | ||
1427 | * headers if we have a filesystem using non-persistent counters. | ||
1428 | */ | ||
1429 | if (clean) | ||
1430 | log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; | ||
1431 | |||
1432 | /* | ||
1368 | * Make sure that there are no blocks in front of the head | 1433 | * Make sure that there are no blocks in front of the head |
1369 | * with the same cycle number as the head. This can happen | 1434 | * with the same cycle number as the head. This can happen |
1370 | * because we allow multiple outstanding log writes concurrently, | 1435 | * because we allow multiple outstanding log writes concurrently, |