aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_log_recover.c271
1 files changed, 168 insertions, 103 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 594f7e63b432..be5568839442 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1109,27 +1109,10 @@ xlog_verify_head(
1109 bool tmp_wrapped; 1109 bool tmp_wrapped;
1110 1110
1111 /* 1111 /*
1112 * Search backwards through the log looking for the log record header 1112 * Check the head of the log for torn writes. Search backwards from the
1113 * block. This wraps all the way back around to the head so something is 1113 * head until we hit the tail or the maximum number of log record I/Os
1114 * seriously wrong if we can't find it. 1114 * that could have been in flight at one time. Use a temporary buffer so
1115 */ 1115 * we don't trash the rhead/bp pointers from the caller.
1116 found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
1117 rhead, wrapped);
1118 if (found < 0)
1119 return found;
1120 if (!found) {
1121 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
1122 return -EIO;
1123 }
1124
1125 *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
1126
1127 /*
1128 * Now that we have a tail block, check the head of the log for torn
1129 * writes. Search again until we hit the tail or the maximum number of
1130 * log record I/Os that could have been in flight at one time. Use a
1131 * temporary buffer so we don't trash the rhead/bp pointer from the
1132 * call above.
1133 */ 1116 */
1134 tmp_bp = xlog_get_bp(log, 1); 1117 tmp_bp = xlog_get_bp(log, 1);
1135 if (!tmp_bp) 1118 if (!tmp_bp)
@@ -1216,6 +1199,115 @@ xlog_verify_head(
1216} 1199}
1217 1200
1218/* 1201/*
1202 * Check whether the head of the log points to an unmount record. In other
1203 * words, determine whether the log is clean. If so, update the in-core state
1204 * appropriately.
1205 */
1206static int
1207xlog_check_unmount_rec(
1208 struct xlog *log,
1209 xfs_daddr_t *head_blk,
1210 xfs_daddr_t *tail_blk,
1211 struct xlog_rec_header *rhead,
1212 xfs_daddr_t rhead_blk,
1213 struct xfs_buf *bp,
1214 bool *clean)
1215{
1216 struct xlog_op_header *op_head;
1217 xfs_daddr_t umount_data_blk;
1218 xfs_daddr_t after_umount_blk;
1219 int hblks;
1220 int error;
1221 char *offset;
1222
1223 *clean = false;
1224
1225 /*
1226 * Look for unmount record. If we find it, then we know there was a
1227 * clean unmount. Since 'i' could be the last block in the physical
1228 * log, we convert to a log block before comparing to the head_blk.
1229 *
1230 * Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
1231 * below. We won't want to clear the unmount record if there is one, so
1232 * we pass the lsn of the unmount record rather than the block after it.
1233 */
1234 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
1235 int h_size = be32_to_cpu(rhead->h_size);
1236 int h_version = be32_to_cpu(rhead->h_version);
1237
1238 if ((h_version & XLOG_VERSION_2) &&
1239 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
1240 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1241 if (h_size % XLOG_HEADER_CYCLE_SIZE)
1242 hblks++;
1243 } else {
1244 hblks = 1;
1245 }
1246 } else {
1247 hblks = 1;
1248 }
1249 after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
1250 after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
1251 if (*head_blk == after_umount_blk &&
1252 be32_to_cpu(rhead->h_num_logops) == 1) {
1253 umount_data_blk = rhead_blk + hblks;
1254 umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
1255 error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
1256 if (error)
1257 return error;
1258
1259 op_head = (struct xlog_op_header *)offset;
1260 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
1261 /*
1262 * Set tail and last sync so that newly written log
1263 * records will point recovery to after the current
1264 * unmount record.
1265 */
1266 xlog_assign_atomic_lsn(&log->l_tail_lsn,
1267 log->l_curr_cycle, after_umount_blk);
1268 xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
1269 log->l_curr_cycle, after_umount_blk);
1270 *tail_blk = after_umount_blk;
1271
1272 *clean = true;
1273 }
1274 }
1275
1276 return 0;
1277}
1278
1279static void
1280xlog_set_state(
1281 struct xlog *log,
1282 xfs_daddr_t head_blk,
1283 struct xlog_rec_header *rhead,
1284 xfs_daddr_t rhead_blk,
1285 bool bump_cycle)
1286{
1287 /*
1288 * Reset log values according to the state of the log when we
1289 * crashed. In the case where head_blk == 0, we bump curr_cycle
1290 * one because the next write starts a new cycle rather than
1291 * continuing the cycle of the last good log record. At this
1292 * point we have guaranteed that all partial log records have been
1293 * accounted for. Therefore, we know that the last good log record
1294 * written was complete and ended exactly on the end boundary
1295 * of the physical log.
1296 */
1297 log->l_prev_block = rhead_blk;
1298 log->l_curr_block = (int)head_blk;
1299 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
1300 if (bump_cycle)
1301 log->l_curr_cycle++;
1302 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
1303 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
1304 xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
1305 BBTOB(log->l_curr_block));
1306 xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
1307 BBTOB(log->l_curr_block));
1308}
1309
1310/*
1219 * Find the sync block number or the tail of the log. 1311 * Find the sync block number or the tail of the log.
1220 * 1312 *
1221 * This will be the block number of the last record to have its 1313 * This will be the block number of the last record to have its
@@ -1238,22 +1330,20 @@ xlog_find_tail(
1238 xfs_daddr_t *tail_blk) 1330 xfs_daddr_t *tail_blk)
1239{ 1331{
1240 xlog_rec_header_t *rhead; 1332 xlog_rec_header_t *rhead;
1241 xlog_op_header_t *op_head;
1242 char *offset = NULL; 1333 char *offset = NULL;
1243 xfs_buf_t *bp; 1334 xfs_buf_t *bp;
1244 int error; 1335 int error;
1245 xfs_daddr_t umount_data_blk;
1246 xfs_daddr_t after_umount_blk;
1247 xfs_daddr_t rhead_blk; 1336 xfs_daddr_t rhead_blk;
1248 xfs_lsn_t tail_lsn; 1337 xfs_lsn_t tail_lsn;
1249 int hblks;
1250 bool wrapped = false; 1338 bool wrapped = false;
1339 bool clean = false;
1251 1340
1252 /* 1341 /*
1253 * Find previous log record 1342 * Find previous log record
1254 */ 1343 */
1255 if ((error = xlog_find_head(log, head_blk))) 1344 if ((error = xlog_find_head(log, head_blk)))
1256 return error; 1345 return error;
1346 ASSERT(*head_blk < INT_MAX);
1257 1347
1258 bp = xlog_get_bp(log, 1); 1348 bp = xlog_get_bp(log, 1);
1259 if (!bp) 1349 if (!bp)
@@ -1271,100 +1361,75 @@ xlog_find_tail(
1271 } 1361 }
1272 1362
1273 /* 1363 /*
1274 * Trim the head block back to skip over torn records. We can have 1364 * Search backwards through the log looking for the log record header
1275 * multiple log I/Os in flight at any time, so we assume CRC failures 1365 * block. This wraps all the way back around to the head so something is
1276 * back through the previous several records are torn writes and skip 1366 * seriously wrong if we can't find it.
1277 * them.
1278 */ 1367 */
1279 ASSERT(*head_blk < INT_MAX); 1368 error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
1280 error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk, 1369 &rhead_blk, &rhead, &wrapped);
1281 &rhead, &wrapped); 1370 if (error < 0)
1282 if (error) 1371 return error;
1283 goto done; 1372 if (!error) {
1373 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
1374 return -EIO;
1375 }
1376 *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
1284 1377
1285 /* 1378 /*
1286 * Reset log values according to the state of the log when we 1379 * Set the log state based on the current head record.
1287 * crashed. In the case where head_blk == 0, we bump curr_cycle
1288 * one because the next write starts a new cycle rather than
1289 * continuing the cycle of the last good log record. At this
1290 * point we have guaranteed that all partial log records have been
1291 * accounted for. Therefore, we know that the last good log record
1292 * written was complete and ended exactly on the end boundary
1293 * of the physical log.
1294 */ 1380 */
1295 log->l_prev_block = rhead_blk; 1381 xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
1296 log->l_curr_block = (int)*head_blk; 1382 tail_lsn = atomic64_read(&log->l_tail_lsn);
1297 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
1298 if (wrapped)
1299 log->l_curr_cycle++;
1300 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
1301 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
1302 xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
1303 BBTOB(log->l_curr_block));
1304 xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
1305 BBTOB(log->l_curr_block));
1306 1383
1307 /* 1384 /*
1308 * Look for unmount record. If we find it, then we know there 1385 * Look for an unmount record at the head of the log. This sets the log
1309 * was a clean unmount. Since 'i' could be the last block in 1386 * state to determine whether recovery is necessary.
1310 * the physical log, we convert to a log block before comparing 1387 */
1311 * to the head_blk. 1388 error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
1389 rhead_blk, bp, &clean);
1390 if (error)
1391 goto done;
1392
1393 /*
1394 * Verify the log head if the log is not clean (e.g., we have anything
1395 * but an unmount record at the head). This uses CRC verification to
1396 * detect and trim torn writes. If discovered, CRC failures are
1397 * considered torn writes and the log head is trimmed accordingly.
1312 * 1398 *
1313 * Save the current tail lsn to use to pass to 1399 * Note that we can only run CRC verification when the log is dirty
1314 * xlog_clear_stale_blocks() below. We won't want to clear the 1400 * because there's no guarantee that the log data behind an unmount
1315 * unmount record if there is one, so we pass the lsn of the 1401 * record is compatible with the current architecture.
1316 * unmount record rather than the block after it.
1317 */ 1402 */
1318 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 1403 if (!clean) {
1319 int h_size = be32_to_cpu(rhead->h_size); 1404 xfs_daddr_t orig_head = *head_blk;
1320 int h_version = be32_to_cpu(rhead->h_version);
1321 1405
1322 if ((h_version & XLOG_VERSION_2) && 1406 error = xlog_verify_head(log, head_blk, tail_blk, bp,
1323 (h_size > XLOG_HEADER_CYCLE_SIZE)) { 1407 &rhead_blk, &rhead, &wrapped);
1324 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1325 if (h_size % XLOG_HEADER_CYCLE_SIZE)
1326 hblks++;
1327 } else {
1328 hblks = 1;
1329 }
1330 } else {
1331 hblks = 1;
1332 }
1333 after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
1334 after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
1335 tail_lsn = atomic64_read(&log->l_tail_lsn);
1336 if (*head_blk == after_umount_blk &&
1337 be32_to_cpu(rhead->h_num_logops) == 1) {
1338 umount_data_blk = rhead_blk + hblks;
1339 umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
1340 error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
1341 if (error) 1408 if (error)
1342 goto done; 1409 goto done;
1343 1410
1344 op_head = (xlog_op_header_t *)offset; 1411 /* update in-core state again if the head changed */
1345 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { 1412 if (*head_blk != orig_head) {
1346 /* 1413 xlog_set_state(log, *head_blk, rhead, rhead_blk,
1347 * Set tail and last sync so that newly written 1414 wrapped);
1348 * log records will point recovery to after the 1415 tail_lsn = atomic64_read(&log->l_tail_lsn);
1349 * current unmount record. 1416 error = xlog_check_unmount_rec(log, head_blk, tail_blk,
1350 */ 1417 rhead, rhead_blk, bp,
1351 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1418 &clean);
1352 log->l_curr_cycle, after_umount_blk); 1419 if (error)
1353 xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1420 goto done;
1354 log->l_curr_cycle, after_umount_blk);
1355 *tail_blk = after_umount_blk;
1356
1357 /*
1358 * Note that the unmount was clean. If the unmount
1359 * was not clean, we need to know this to rebuild the
1360 * superblock counters from the perag headers if we
1361 * have a filesystem using non-persistent counters.
1362 */
1363 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
1364 } 1421 }
1365 } 1422 }
1366 1423
1367 /* 1424 /*
1425 * Note that the unmount was clean. If the unmount was not clean, we
1426 * need to know this to rebuild the superblock counters from the perag
1427 * headers if we have a filesystem using non-persistent counters.
1428 */
1429 if (clean)
1430 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
1431
1432 /*
1368 * Make sure that there are no blocks in front of the head 1433 * Make sure that there are no blocks in front of the head
1369 * with the same cycle number as the head. This can happen 1434 * with the same cycle number as the head. This can happen
1370 * because we allow multiple outstanding log writes concurrently, 1435 * because we allow multiple outstanding log writes concurrently,