aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2012-11-12 06:54:24 -0500
committerBen Myers <bpm@sgi.com>2012-11-19 21:18:41 -0500
commit0e446be44806240c779666591bb9e8cb0e86a50d (patch)
treebc88570369c1707e3adb5e32a655bfa28503ef57
parentbc02e8693d875c2a9b0037cfd37fe0b726d26403 (diff)
xfs: add CRC checks to the log
Implement CRCs for the log buffers. We re-use a field in struct xlog_rec_header that was used for a weak checksum of the log buffer payload in debug builds before. The new checksumming uses the crc32c checksum we will use elsewhere in XFS, and also protects the record header and addition cycle data. Due to this there are some interesting changes in xlog_sync, as we need to do the cycle wrapping for the split buffer case much earlier, as we would touch the buffer after generating the checksum otherwise. The CRC calculation is always enabled, even for non-CRC filesystems, as adding this CRC does not change the log format. On non-CRC filesystems, only issue an alert if a CRC mismatch is found and allow recovery to continue - this will act as an indicator that log recovery problems are a result of log corruption. On CRC enabled filesystems, however, log recovery will fail. Note that existing debug kernels will write a simple checksum value to the log, so the first time this is run on a filesystem taht was last used on a debug kernel it will through CRC mismatch warning errors. These can be ignored. Initially based on a patch from Dave Chinner, then modified significantly by Christoph Hellwig. Modified again by Dave Chinner to get to this version. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
-rw-r--r--fs/xfs/xfs_log.c132
-rw-r--r--fs/xfs/xfs_log_priv.h11
-rw-r--r--fs/xfs/xfs_log_recover.c132
3 files changed, 176 insertions, 99 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 1d6d2ee0849..c6d6e136ba7 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -35,6 +35,7 @@
35#include "xfs_inode.h" 35#include "xfs_inode.h"
36#include "xfs_trace.h" 36#include "xfs_trace.h"
37#include "xfs_fsops.h" 37#include "xfs_fsops.h"
38#include "xfs_cksum.h"
38 39
39kmem_zone_t *xfs_log_ticket_zone; 40kmem_zone_t *xfs_log_ticket_zone;
40 41
@@ -1490,6 +1491,84 @@ xlog_grant_push_ail(
1490} 1491}
1491 1492
1492/* 1493/*
1494 * Stamp cycle number in every block
1495 */
1496STATIC void
1497xlog_pack_data(
1498 struct xlog *log,
1499 struct xlog_in_core *iclog,
1500 int roundoff)
1501{
1502 int i, j, k;
1503 int size = iclog->ic_offset + roundoff;
1504 __be32 cycle_lsn;
1505 xfs_caddr_t dp;
1506
1507 cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn);
1508
1509 dp = iclog->ic_datap;
1510 for (i = 0; i < BTOBB(size); i++) {
1511 if (i >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE))
1512 break;
1513 iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp;
1514 *(__be32 *)dp = cycle_lsn;
1515 dp += BBSIZE;
1516 }
1517
1518 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
1519 xlog_in_core_2_t *xhdr = iclog->ic_data;
1520
1521 for ( ; i < BTOBB(size); i++) {
1522 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
1523 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
1524 xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp;
1525 *(__be32 *)dp = cycle_lsn;
1526 dp += BBSIZE;
1527 }
1528
1529 for (i = 1; i < log->l_iclog_heads; i++)
1530 xhdr[i].hic_xheader.xh_cycle = cycle_lsn;
1531 }
1532}
1533
1534/*
1535 * Calculate the checksum for a log buffer.
1536 *
1537 * This is a little more complicated than it should be because the various
1538 * headers and the actual data are non-contiguous.
1539 */
1540__be32
1541xlog_cksum(
1542 struct xlog *log,
1543 struct xlog_rec_header *rhead,
1544 char *dp,
1545 int size)
1546{
1547 __uint32_t crc;
1548
1549 /* first generate the crc for the record header ... */
1550 crc = xfs_start_cksum((char *)rhead,
1551 sizeof(struct xlog_rec_header),
1552 offsetof(struct xlog_rec_header, h_crc));
1553
1554 /* ... then for additional cycle data for v2 logs ... */
1555 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
1556 union xlog_in_core2 *xhdr = (union xlog_in_core2 *)rhead;
1557 int i;
1558
1559 for (i = 1; i < log->l_iclog_heads; i++) {
1560 crc = crc32c(crc, &xhdr[i].hic_xheader,
1561 sizeof(struct xlog_rec_ext_header));
1562 }
1563 }
1564
1565 /* ... and finally for the payload */
1566 crc = crc32c(crc, dp, size);
1567
1568 return xfs_end_cksum(crc);
1569}
1570
1571/*
1493 * The bdstrat callback function for log bufs. This gives us a central 1572 * The bdstrat callback function for log bufs. This gives us a central
1494 * place to trap bufs in case we get hit by a log I/O error and need to 1573 * place to trap bufs in case we get hit by a log I/O error and need to
1495 * shutdown. Actually, in practice, even when we didn't get a log error, 1574 * shutdown. Actually, in practice, even when we didn't get a log error,
@@ -1549,7 +1628,6 @@ xlog_sync(
1549 struct xlog *log, 1628 struct xlog *log,
1550 struct xlog_in_core *iclog) 1629 struct xlog_in_core *iclog)
1551{ 1630{
1552 xfs_caddr_t dptr; /* pointer to byte sized element */
1553 xfs_buf_t *bp; 1631 xfs_buf_t *bp;
1554 int i; 1632 int i;
1555 uint count; /* byte count of bwrite */ 1633 uint count; /* byte count of bwrite */
@@ -1558,6 +1636,7 @@ xlog_sync(
1558 int split = 0; /* split write into two regions */ 1636 int split = 0; /* split write into two regions */
1559 int error; 1637 int error;
1560 int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); 1638 int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
1639 int size;
1561 1640
1562 XFS_STATS_INC(xs_log_writes); 1641 XFS_STATS_INC(xs_log_writes);
1563 ASSERT(atomic_read(&iclog->ic_refcnt) == 0); 1642 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
@@ -1588,13 +1667,10 @@ xlog_sync(
1588 xlog_pack_data(log, iclog, roundoff); 1667 xlog_pack_data(log, iclog, roundoff);
1589 1668
1590 /* real byte length */ 1669 /* real byte length */
1591 if (v2) { 1670 size = iclog->ic_offset;
1592 iclog->ic_header.h_len = 1671 if (v2)
1593 cpu_to_be32(iclog->ic_offset + roundoff); 1672 size += roundoff;
1594 } else { 1673 iclog->ic_header.h_len = cpu_to_be32(size);
1595 iclog->ic_header.h_len =
1596 cpu_to_be32(iclog->ic_offset);
1597 }
1598 1674
1599 bp = iclog->ic_bp; 1675 bp = iclog->ic_bp;
1600 XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); 1676 XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
@@ -1603,12 +1679,36 @@ xlog_sync(
1603 1679
1604 /* Do we need to split this write into 2 parts? */ 1680 /* Do we need to split this write into 2 parts? */
1605 if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { 1681 if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) {
1682 char *dptr;
1683
1606 split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp))); 1684 split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)));
1607 count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)); 1685 count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp));
1608 iclog->ic_bwritecnt = 2; /* split into 2 writes */ 1686 iclog->ic_bwritecnt = 2;
1687
1688 /*
1689 * Bump the cycle numbers at the start of each block in the
1690 * part of the iclog that ends up in the buffer that gets
1691 * written to the start of the log.
1692 *
1693 * Watch out for the header magic number case, though.
1694 */
1695 dptr = (char *)&iclog->ic_header + count;
1696 for (i = 0; i < split; i += BBSIZE) {
1697 __uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
1698 if (++cycle == XLOG_HEADER_MAGIC_NUM)
1699 cycle++;
1700 *(__be32 *)dptr = cpu_to_be32(cycle);
1701
1702 dptr += BBSIZE;
1703 }
1609 } else { 1704 } else {
1610 iclog->ic_bwritecnt = 1; 1705 iclog->ic_bwritecnt = 1;
1611 } 1706 }
1707
1708 /* calculcate the checksum */
1709 iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
1710 iclog->ic_datap, size);
1711
1612 bp->b_io_length = BTOBB(count); 1712 bp->b_io_length = BTOBB(count);
1613 bp->b_fspriv = iclog; 1713 bp->b_fspriv = iclog;
1614 XFS_BUF_ZEROFLAGS(bp); 1714 XFS_BUF_ZEROFLAGS(bp);
@@ -1662,19 +1762,6 @@ xlog_sync(
1662 bp->b_flags |= XBF_SYNCIO; 1762 bp->b_flags |= XBF_SYNCIO;
1663 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) 1763 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1664 bp->b_flags |= XBF_FUA; 1764 bp->b_flags |= XBF_FUA;
1665 dptr = bp->b_addr;
1666 /*
1667 * Bump the cycle numbers at the start of each block
1668 * since this part of the buffer is at the start of
1669 * a new cycle. Watch out for the header magic number
1670 * case, though.
1671 */
1672 for (i = 0; i < split; i += BBSIZE) {
1673 be32_add_cpu((__be32 *)dptr, 1);
1674 if (be32_to_cpu(*(__be32 *)dptr) == XLOG_HEADER_MAGIC_NUM)
1675 be32_add_cpu((__be32 *)dptr, 1);
1676 dptr += BBSIZE;
1677 }
1678 1765
1679 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1766 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1680 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); 1767 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
@@ -1691,7 +1778,6 @@ xlog_sync(
1691 return 0; 1778 return 0;
1692} /* xlog_sync */ 1779} /* xlog_sync */
1693 1780
1694
1695/* 1781/*
1696 * Deallocate a log structure 1782 * Deallocate a log structure
1697 */ 1783 */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 9a4e0e5ec32..dc3498bf17c 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -139,7 +139,6 @@ static inline uint xlog_get_client_id(__be32 i)
139/* 139/*
140 * Flags for log structure 140 * Flags for log structure
141 */ 141 */
142#define XLOG_CHKSUM_MISMATCH 0x1 /* used only during recovery */
143#define XLOG_ACTIVE_RECOVERY 0x2 /* in the middle of recovery */ 142#define XLOG_ACTIVE_RECOVERY 0x2 /* in the middle of recovery */
144#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */ 143#define XLOG_RECOVERY_NEEDED 0x4 /* log was recovered */
145#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being 144#define XLOG_IO_ERROR 0x8 /* log hit an I/O error, and being
@@ -291,7 +290,7 @@ typedef struct xlog_rec_header {
291 __be32 h_len; /* len in bytes; should be 64-bit aligned: 4 */ 290 __be32 h_len; /* len in bytes; should be 64-bit aligned: 4 */
292 __be64 h_lsn; /* lsn of this LR : 8 */ 291 __be64 h_lsn; /* lsn of this LR : 8 */
293 __be64 h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */ 292 __be64 h_tail_lsn; /* lsn of 1st LR w/ buffers not committed: 8 */
294 __be32 h_chksum; /* may not be used; non-zero if used : 4 */ 293 __le32 h_crc; /* crc of log record : 4 */
295 __be32 h_prev_block; /* block number to previous LR : 4 */ 294 __be32 h_prev_block; /* block number to previous LR : 4 */
296 __be32 h_num_logops; /* number of log operations in this LR : 4 */ 295 __be32 h_num_logops; /* number of log operations in this LR : 4 */
297 __be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE]; 296 __be32 h_cycle_data[XLOG_HEADER_CYCLE_SIZE / BBSIZE];
@@ -555,11 +554,9 @@ xlog_recover(
555extern int 554extern int
556xlog_recover_finish( 555xlog_recover_finish(
557 struct xlog *log); 556 struct xlog *log);
558extern void 557
559xlog_pack_data( 558extern __be32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
560 struct xlog *log, 559 char *dp, int size);
561 struct xlog_in_core *iclog,
562 int);
563 560
564extern kmem_zone_t *xfs_log_ticket_zone; 561extern kmem_zone_t *xfs_log_ticket_zone;
565struct xlog_ticket * 562struct xlog_ticket *
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 931e8e23f19..9c3651c9e75 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -41,6 +41,7 @@
41#include "xfs_trans_priv.h" 41#include "xfs_trans_priv.h"
42#include "xfs_quota.h" 42#include "xfs_quota.h"
43#include "xfs_utils.h" 43#include "xfs_utils.h"
44#include "xfs_cksum.h"
44#include "xfs_trace.h" 45#include "xfs_trace.h"
45#include "xfs_icache.h" 46#include "xfs_icache.h"
46 47
@@ -3216,80 +3217,58 @@ xlog_recover_process_iunlinks(
3216 mp->m_dmevmask = mp_dmevmask; 3217 mp->m_dmevmask = mp_dmevmask;
3217} 3218}
3218 3219
3219
3220#ifdef DEBUG
3221STATIC void
3222xlog_pack_data_checksum(
3223 struct xlog *log,
3224 struct xlog_in_core *iclog,
3225 int size)
3226{
3227 int i;
3228 __be32 *up;
3229 uint chksum = 0;
3230
3231 up = (__be32 *)iclog->ic_datap;
3232 /* divide length by 4 to get # words */
3233 for (i = 0; i < (size >> 2); i++) {
3234 chksum ^= be32_to_cpu(*up);
3235 up++;
3236 }
3237 iclog->ic_header.h_chksum = cpu_to_be32(chksum);
3238}
3239#else
3240#define xlog_pack_data_checksum(log, iclog, size)
3241#endif
3242
3243/* 3220/*
3244 * Stamp cycle number in every block 3221 * Upack the log buffer data and crc check it. If the check fails, issue a
3222 * warning if and only if the CRC in the header is non-zero. This makes the
3223 * check an advisory warning, and the zero CRC check will prevent failure
3224 * warnings from being emitted when upgrading the kernel from one that does not
3225 * add CRCs by default.
3226 *
3227 * When filesystems are CRC enabled, this CRC mismatch becomes a fatal log
3228 * corruption failure
3245 */ 3229 */
3246void 3230STATIC int
3247xlog_pack_data( 3231xlog_unpack_data_crc(
3248 struct xlog *log, 3232 struct xlog_rec_header *rhead,
3249 struct xlog_in_core *iclog, 3233 xfs_caddr_t dp,
3250 int roundoff) 3234 struct xlog *log)
3251{ 3235{
3252 int i, j, k; 3236 __be32 crc;
3253 int size = iclog->ic_offset + roundoff; 3237
3254 __be32 cycle_lsn; 3238 crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
3255 xfs_caddr_t dp; 3239 if (crc != rhead->h_crc) {
3256 3240 if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
3257 xlog_pack_data_checksum(log, iclog, size); 3241 xfs_alert(log->l_mp,
3258 3242 "log record CRC mismatch: found 0x%x, expected 0x%x.\n",
3259 cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); 3243 be32_to_cpu(rhead->h_crc),
3260 3244 be32_to_cpu(crc));
3261 dp = iclog->ic_datap; 3245 xfs_hex_dump(dp, 32);
3262 for (i = 0; i < BTOBB(size) &&
3263 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
3264 iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp;
3265 *(__be32 *)dp = cycle_lsn;
3266 dp += BBSIZE;
3267 }
3268
3269 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
3270 xlog_in_core_2_t *xhdr = iclog->ic_data;
3271
3272 for ( ; i < BTOBB(size); i++) {
3273 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3274 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
3275 xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp;
3276 *(__be32 *)dp = cycle_lsn;
3277 dp += BBSIZE;
3278 } 3246 }
3279 3247
3280 for (i = 1; i < log->l_iclog_heads; i++) { 3248 /*
3281 xhdr[i].hic_xheader.xh_cycle = cycle_lsn; 3249 * If we've detected a log record corruption, then we can't
3282 } 3250 * recover past this point. Abort recovery if we are enforcing
3251 * CRC protection by punting an error back up the stack.
3252 */
3253 if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
3254 return EFSCORRUPTED;
3283 } 3255 }
3256
3257 return 0;
3284} 3258}
3285 3259
3286STATIC void 3260STATIC int
3287xlog_unpack_data( 3261xlog_unpack_data(
3288 struct xlog_rec_header *rhead, 3262 struct xlog_rec_header *rhead,
3289 xfs_caddr_t dp, 3263 xfs_caddr_t dp,
3290 struct xlog *log) 3264 struct xlog *log)
3291{ 3265{
3292 int i, j, k; 3266 int i, j, k;
3267 int error;
3268
3269 error = xlog_unpack_data_crc(rhead, dp, log);
3270 if (error)
3271 return error;
3293 3272
3294 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && 3273 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
3295 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { 3274 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
@@ -3306,6 +3285,8 @@ xlog_unpack_data(
3306 dp += BBSIZE; 3285 dp += BBSIZE;
3307 } 3286 }
3308 } 3287 }
3288
3289 return 0;
3309} 3290}
3310 3291
3311STATIC int 3292STATIC int
@@ -3437,9 +3418,13 @@ xlog_do_recovery_pass(
3437 if (error) 3418 if (error)
3438 goto bread_err2; 3419 goto bread_err2;
3439 3420
3440 xlog_unpack_data(rhead, offset, log); 3421 error = xlog_unpack_data(rhead, offset, log);
3441 if ((error = xlog_recover_process_data(log, 3422 if (error)
3442 rhash, rhead, offset, pass))) 3423 goto bread_err2;
3424
3425 error = xlog_recover_process_data(log,
3426 rhash, rhead, offset, pass);
3427 if (error)
3443 goto bread_err2; 3428 goto bread_err2;
3444 blk_no += bblks + hblks; 3429 blk_no += bblks + hblks;
3445 } 3430 }
@@ -3549,9 +3534,14 @@ xlog_do_recovery_pass(
3549 if (error) 3534 if (error)
3550 goto bread_err2; 3535 goto bread_err2;
3551 } 3536 }
3552 xlog_unpack_data(rhead, offset, log); 3537
3553 if ((error = xlog_recover_process_data(log, rhash, 3538 error = xlog_unpack_data(rhead, offset, log);
3554 rhead, offset, pass))) 3539 if (error)
3540 goto bread_err2;
3541
3542 error = xlog_recover_process_data(log, rhash,
3543 rhead, offset, pass);
3544 if (error)
3555 goto bread_err2; 3545 goto bread_err2;
3556 blk_no += bblks; 3546 blk_no += bblks;
3557 } 3547 }
@@ -3576,9 +3566,13 @@ xlog_do_recovery_pass(
3576 if (error) 3566 if (error)
3577 goto bread_err2; 3567 goto bread_err2;
3578 3568
3579 xlog_unpack_data(rhead, offset, log); 3569 error = xlog_unpack_data(rhead, offset, log);
3580 if ((error = xlog_recover_process_data(log, rhash, 3570 if (error)
3581 rhead, offset, pass))) 3571 goto bread_err2;
3572
3573 error = xlog_recover_process_data(log, rhash,
3574 rhead, offset, pass);
3575 if (error)
3582 goto bread_err2; 3576 goto bread_err2;
3583 blk_no += bblks + hblks; 3577 blk_no += bblks + hblks;
3584 } 3578 }