aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-03-11 13:21:32 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-11 13:21:32 -0500
commit2a62ec0af2ed3758dd4535eb58527f63ba6549ad (patch)
treeb33a15c9d8e14e369688b169958292ba42200d0e /fs
parent63cf207e931894b93fe5471131e41fda567611e2 (diff)
parent7f6aff3a29b08fc4234c8136eb1ac31b4897522c (diff)
Merge tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs fixes from Dave Chinner: "This is a fix for a regression introduced in 4.5-rc1 by the new torn log write detection code. The regression only affects people moving a clean filesystem between machines/kernels of different architecture (such as changing between 32 bit and 64 bit kernels), but this is the recommended (and only!) safe way to migrate a filesystem between architectures so we really need to ensure it works. The changes are larger than I'd prefer right at the end of the release cycle, but the majority of the change is just factoring code to enable the detection of a clean log at the correct time to avoid this issue. Changes: - Only perform torn log write detection on dirty logs. This prevents failures being detected due to a clean filesystem being moved between machines or kernels of different architectures (e.g. 32 -> 64 bit, BE -> LE, etc). This fixes a regression introduced by the torn log write detection in 4.5-rc1" * tag 'xfs-for-linus-4.5-rc7' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: xfs: only run torn log write detection on dirty logs xfs: refactor in-core log state update to helper xfs: refactor unmount record detection into helper xfs: separate log head record discovery from verification
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/xfs_log_recover.c271
1 files changed, 168 insertions, 103 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 594f7e63b432..be5568839442 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1109,27 +1109,10 @@ xlog_verify_head(
1109 bool tmp_wrapped; 1109 bool tmp_wrapped;
1110 1110
1111 /* 1111 /*
1112 * Search backwards through the log looking for the log record header 1112 * Check the head of the log for torn writes. Search backwards from the
1113 * block. This wraps all the way back around to the head so something is 1113 * head until we hit the tail or the maximum number of log record I/Os
1114 * seriously wrong if we can't find it. 1114 * that could have been in flight at one time. Use a temporary buffer so
1115 */ 1115 * we don't trash the rhead/bp pointers from the caller.
1116 found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
1117 rhead, wrapped);
1118 if (found < 0)
1119 return found;
1120 if (!found) {
1121 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
1122 return -EIO;
1123 }
1124
1125 *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
1126
1127 /*
1128 * Now that we have a tail block, check the head of the log for torn
1129 * writes. Search again until we hit the tail or the maximum number of
1130 * log record I/Os that could have been in flight at one time. Use a
1131 * temporary buffer so we don't trash the rhead/bp pointer from the
1132 * call above.
1133 */ 1116 */
1134 tmp_bp = xlog_get_bp(log, 1); 1117 tmp_bp = xlog_get_bp(log, 1);
1135 if (!tmp_bp) 1118 if (!tmp_bp)
@@ -1216,6 +1199,115 @@ xlog_verify_head(
1216} 1199}
1217 1200
1218/* 1201/*
1202 * Check whether the head of the log points to an unmount record. In other
1203 * words, determine whether the log is clean. If so, update the in-core state
1204 * appropriately.
1205 */
1206static int
1207xlog_check_unmount_rec(
1208 struct xlog *log,
1209 xfs_daddr_t *head_blk,
1210 xfs_daddr_t *tail_blk,
1211 struct xlog_rec_header *rhead,
1212 xfs_daddr_t rhead_blk,
1213 struct xfs_buf *bp,
1214 bool *clean)
1215{
1216 struct xlog_op_header *op_head;
1217 xfs_daddr_t umount_data_blk;
1218 xfs_daddr_t after_umount_blk;
1219 int hblks;
1220 int error;
1221 char *offset;
1222
1223 *clean = false;
1224
1225 /*
1226 * Look for unmount record. If we find it, then we know there was a
1227 * clean unmount. Since 'i' could be the last block in the physical
1228 * log, we convert to a log block before comparing to the head_blk.
1229 *
1230 * Save the current tail lsn to use to pass to xlog_clear_stale_blocks()
1231 * below. We won't want to clear the unmount record if there is one, so
1232 * we pass the lsn of the unmount record rather than the block after it.
1233 */
1234 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
1235 int h_size = be32_to_cpu(rhead->h_size);
1236 int h_version = be32_to_cpu(rhead->h_version);
1237
1238 if ((h_version & XLOG_VERSION_2) &&
1239 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
1240 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1241 if (h_size % XLOG_HEADER_CYCLE_SIZE)
1242 hblks++;
1243 } else {
1244 hblks = 1;
1245 }
1246 } else {
1247 hblks = 1;
1248 }
1249 after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
1250 after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
1251 if (*head_blk == after_umount_blk &&
1252 be32_to_cpu(rhead->h_num_logops) == 1) {
1253 umount_data_blk = rhead_blk + hblks;
1254 umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
1255 error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
1256 if (error)
1257 return error;
1258
1259 op_head = (struct xlog_op_header *)offset;
1260 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) {
1261 /*
1262 * Set tail and last sync so that newly written log
1263 * records will point recovery to after the current
1264 * unmount record.
1265 */
1266 xlog_assign_atomic_lsn(&log->l_tail_lsn,
1267 log->l_curr_cycle, after_umount_blk);
1268 xlog_assign_atomic_lsn(&log->l_last_sync_lsn,
1269 log->l_curr_cycle, after_umount_blk);
1270 *tail_blk = after_umount_blk;
1271
1272 *clean = true;
1273 }
1274 }
1275
1276 return 0;
1277}
1278
1279static void
1280xlog_set_state(
1281 struct xlog *log,
1282 xfs_daddr_t head_blk,
1283 struct xlog_rec_header *rhead,
1284 xfs_daddr_t rhead_blk,
1285 bool bump_cycle)
1286{
1287 /*
1288 * Reset log values according to the state of the log when we
1289 * crashed. In the case where head_blk == 0, we bump curr_cycle
1290 * one because the next write starts a new cycle rather than
1291 * continuing the cycle of the last good log record. At this
1292 * point we have guaranteed that all partial log records have been
1293 * accounted for. Therefore, we know that the last good log record
1294 * written was complete and ended exactly on the end boundary
1295 * of the physical log.
1296 */
1297 log->l_prev_block = rhead_blk;
1298 log->l_curr_block = (int)head_blk;
1299 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
1300 if (bump_cycle)
1301 log->l_curr_cycle++;
1302 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
1303 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
1304 xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
1305 BBTOB(log->l_curr_block));
1306 xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
1307 BBTOB(log->l_curr_block));
1308}
1309
1310/*
1219 * Find the sync block number or the tail of the log. 1311 * Find the sync block number or the tail of the log.
1220 * 1312 *
1221 * This will be the block number of the last record to have its 1313 * This will be the block number of the last record to have its
@@ -1238,22 +1330,20 @@ xlog_find_tail(
1238 xfs_daddr_t *tail_blk) 1330 xfs_daddr_t *tail_blk)
1239{ 1331{
1240 xlog_rec_header_t *rhead; 1332 xlog_rec_header_t *rhead;
1241 xlog_op_header_t *op_head;
1242 char *offset = NULL; 1333 char *offset = NULL;
1243 xfs_buf_t *bp; 1334 xfs_buf_t *bp;
1244 int error; 1335 int error;
1245 xfs_daddr_t umount_data_blk;
1246 xfs_daddr_t after_umount_blk;
1247 xfs_daddr_t rhead_blk; 1336 xfs_daddr_t rhead_blk;
1248 xfs_lsn_t tail_lsn; 1337 xfs_lsn_t tail_lsn;
1249 int hblks;
1250 bool wrapped = false; 1338 bool wrapped = false;
1339 bool clean = false;
1251 1340
1252 /* 1341 /*
1253 * Find previous log record 1342 * Find previous log record
1254 */ 1343 */
1255 if ((error = xlog_find_head(log, head_blk))) 1344 if ((error = xlog_find_head(log, head_blk)))
1256 return error; 1345 return error;
1346 ASSERT(*head_blk < INT_MAX);
1257 1347
1258 bp = xlog_get_bp(log, 1); 1348 bp = xlog_get_bp(log, 1);
1259 if (!bp) 1349 if (!bp)
@@ -1271,100 +1361,75 @@ xlog_find_tail(
1271 } 1361 }
1272 1362
1273 /* 1363 /*
1274 * Trim the head block back to skip over torn records. We can have 1364 * Search backwards through the log looking for the log record header
1275 * multiple log I/Os in flight at any time, so we assume CRC failures 1365 * block. This wraps all the way back around to the head so something is
1276 * back through the previous several records are torn writes and skip 1366 * seriously wrong if we can't find it.
1277 * them.
1278 */ 1367 */
1279 ASSERT(*head_blk < INT_MAX); 1368 error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp,
1280 error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk, 1369 &rhead_blk, &rhead, &wrapped);
1281 &rhead, &wrapped); 1370 if (error < 0)
1282 if (error) 1371 return error;
1283 goto done; 1372 if (!error) {
1373 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
1374 return -EIO;
1375 }
1376 *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
1284 1377
1285 /* 1378 /*
1286 * Reset log values according to the state of the log when we 1379 * Set the log state based on the current head record.
1287 * crashed. In the case where head_blk == 0, we bump curr_cycle
1288 * one because the next write starts a new cycle rather than
1289 * continuing the cycle of the last good log record. At this
1290 * point we have guaranteed that all partial log records have been
1291 * accounted for. Therefore, we know that the last good log record
1292 * written was complete and ended exactly on the end boundary
1293 * of the physical log.
1294 */ 1380 */
1295 log->l_prev_block = rhead_blk; 1381 xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped);
1296 log->l_curr_block = (int)*head_blk; 1382 tail_lsn = atomic64_read(&log->l_tail_lsn);
1297 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
1298 if (wrapped)
1299 log->l_curr_cycle++;
1300 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
1301 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
1302 xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle,
1303 BBTOB(log->l_curr_block));
1304 xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle,
1305 BBTOB(log->l_curr_block));
1306 1383
1307 /* 1384 /*
1308 * Look for unmount record. If we find it, then we know there 1385 * Look for an unmount record at the head of the log. This sets the log
1309 * was a clean unmount. Since 'i' could be the last block in 1386 * state to determine whether recovery is necessary.
1310 * the physical log, we convert to a log block before comparing 1387 */
1311 * to the head_blk. 1388 error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
1389 rhead_blk, bp, &clean);
1390 if (error)
1391 goto done;
1392
1393 /*
1394 * Verify the log head if the log is not clean (e.g., we have anything
1395 * but an unmount record at the head). This uses CRC verification to
1396 * detect and trim torn writes. If discovered, CRC failures are
1397 * considered torn writes and the log head is trimmed accordingly.
1312 * 1398 *
1313 * Save the current tail lsn to use to pass to 1399 * Note that we can only run CRC verification when the log is dirty
1314 * xlog_clear_stale_blocks() below. We won't want to clear the 1400 * because there's no guarantee that the log data behind an unmount
1315 * unmount record if there is one, so we pass the lsn of the 1401 * record is compatible with the current architecture.
1316 * unmount record rather than the block after it.
1317 */ 1402 */
1318 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { 1403 if (!clean) {
1319 int h_size = be32_to_cpu(rhead->h_size); 1404 xfs_daddr_t orig_head = *head_blk;
1320 int h_version = be32_to_cpu(rhead->h_version);
1321 1405
1322 if ((h_version & XLOG_VERSION_2) && 1406 error = xlog_verify_head(log, head_blk, tail_blk, bp,
1323 (h_size > XLOG_HEADER_CYCLE_SIZE)) { 1407 &rhead_blk, &rhead, &wrapped);
1324 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
1325 if (h_size % XLOG_HEADER_CYCLE_SIZE)
1326 hblks++;
1327 } else {
1328 hblks = 1;
1329 }
1330 } else {
1331 hblks = 1;
1332 }
1333 after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
1334 after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
1335 tail_lsn = atomic64_read(&log->l_tail_lsn);
1336 if (*head_blk == after_umount_blk &&
1337 be32_to_cpu(rhead->h_num_logops) == 1) {
1338 umount_data_blk = rhead_blk + hblks;
1339 umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
1340 error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
1341 if (error) 1408 if (error)
1342 goto done; 1409 goto done;
1343 1410
1344 op_head = (xlog_op_header_t *)offset; 1411 /* update in-core state again if the head changed */
1345 if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { 1412 if (*head_blk != orig_head) {
1346 /* 1413 xlog_set_state(log, *head_blk, rhead, rhead_blk,
1347 * Set tail and last sync so that newly written 1414 wrapped);
1348 * log records will point recovery to after the 1415 tail_lsn = atomic64_read(&log->l_tail_lsn);
1349 * current unmount record. 1416 error = xlog_check_unmount_rec(log, head_blk, tail_blk,
1350 */ 1417 rhead, rhead_blk, bp,
1351 xlog_assign_atomic_lsn(&log->l_tail_lsn, 1418 &clean);
1352 log->l_curr_cycle, after_umount_blk); 1419 if (error)
1353 xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1420 goto done;
1354 log->l_curr_cycle, after_umount_blk);
1355 *tail_blk = after_umount_blk;
1356
1357 /*
1358 * Note that the unmount was clean. If the unmount
1359 * was not clean, we need to know this to rebuild the
1360 * superblock counters from the perag headers if we
1361 * have a filesystem using non-persistent counters.
1362 */
1363 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
1364 } 1421 }
1365 } 1422 }
1366 1423
1367 /* 1424 /*
1425 * Note that the unmount was clean. If the unmount was not clean, we
1426 * need to know this to rebuild the superblock counters from the perag
1427 * headers if we have a filesystem using non-persistent counters.
1428 */
1429 if (clean)
1430 log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN;
1431
1432 /*
1368 * Make sure that there are no blocks in front of the head 1433 * Make sure that there are no blocks in front of the head
1369 * with the same cycle number as the head. This can happen 1434 * with the same cycle number as the head. This can happen
1370 * because we allow multiple outstanding log writes concurrently, 1435 * because we allow multiple outstanding log writes concurrently,