aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-01-14 00:15:18 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-01-14 00:15:18 -0500
commit7fdec82af6a9e190e53d07a1463d2a9ac49a8750 (patch)
treeb44b29c421a2eff0aaa11249adfbaa1b310cb271
parentf9a03ae123c92c1f45cd2ca88d0f6edd787be78c (diff)
parentdde7f55bd000696acc38296c21241971e1840142 (diff)
Merge tag 'xfs-for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs updates from Dave Chinner: "There's not a lot in this - the main addition is the CRC validation of the entire region of the log that the will be recovered, along with several log recovery fixes. Most of the rest is small bug fixes and cleanups. I have three bug fixes still pending, all that address recently fixed regressions that I will send to next week after they've had some time in for-next. Summary: - extensive CRC validation during log recovery - several log recovery bug fixes - Various DAX support fixes - AGFL size calculation fix - various cleanups in preparation for new functionality - project quota ENOSPC notification via netlink - tracing and debug improvements" * tag 'xfs-for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (26 commits) xfs: handle dquot buffer readahead in log recovery correctly xfs: inode recovery readahead can race with inode buffer creation xfs: eliminate committed arg from xfs_bmap_finish xfs: bmapbt checking on debug kernels too expensive xfs: add tracepoints to readpage calls xfs: debug mode log record crc error injection xfs: detect and trim torn writes during log recovery xfs: fix recursive splice read locking with DAX xfs: Don't use reserved blocks for data blocks with DAX XFS: Use a signed return type for suffix_kstrtoint() libxfs: refactor short btree block verification libxfs: pack the agfl header structure so XFS_AGFL_SIZE is correct libxfs: use a convenience variable instead of open-coding the fork xfs: fix log ticket type printing libxfs: make xfs_alloc_fix_freelist non-static xfs: make xfs_buf_ioend_async() static xfs: send warning of project quota to userspace via netlink xfs: get mp from bma->ip in xfs_bmap code xfs: print name of verifier if it fails libxfs: Optimize the loop for xfs_bitmap_empty ...
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h1
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c35
-rw-r--r--fs/xfs/libxfs/xfs_attr.c141
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c1
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c32
-rw-r--r--fs/xfs/libxfs/xfs_bit.c6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c43
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h2
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_btree.c58
-rw-r--r--fs/xfs/libxfs/xfs_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c1
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c1
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c37
-rw-r--r--fs/xfs/libxfs/xfs_format.h2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c1
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c27
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c16
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h1
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h2
-rw-r--r--fs/xfs/libxfs/xfs_sb.c2
-rw-r--r--fs/xfs/libxfs/xfs_shared.h1
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c1
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_bmap_util.c43
-rw-r--r--fs/xfs/xfs_buf.c9
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_dquot.c13
-rw-r--r--fs/xfs/xfs_error.c4
-rw-r--r--fs/xfs/xfs_file.c25
-rw-r--r--fs/xfs/xfs_inode.c25
-rw-r--r--fs/xfs/xfs_iomap.c21
-rw-r--r--fs/xfs/xfs_log.c51
-rw-r--r--fs/xfs/xfs_log_priv.h3
-rw-r--r--fs/xfs/xfs_log_recover.c581
-rw-r--r--fs/xfs/xfs_rtalloc.c3
-rw-r--r--fs/xfs/xfs_super.c2
-rw-r--r--fs/xfs/xfs_symlink.c12
-rw-r--r--fs/xfs/xfs_sysfs.c36
-rw-r--r--fs/xfs/xfs_trace.h26
-rw-r--r--fs/xfs/xfs_trans_dquot.c14
45 files changed, 852 insertions, 443 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 3479294c1d58..a708e38b494c 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -535,6 +535,7 @@ xfs_agfl_write_verify(
535} 535}
536 536
537const struct xfs_buf_ops xfs_agfl_buf_ops = { 537const struct xfs_buf_ops xfs_agfl_buf_ops = {
538 .name = "xfs_agfl",
538 .verify_read = xfs_agfl_read_verify, 539 .verify_read = xfs_agfl_read_verify,
539 .verify_write = xfs_agfl_write_verify, 540 .verify_write = xfs_agfl_write_verify,
540}; 541};
@@ -1926,7 +1927,7 @@ xfs_alloc_space_available(
1926 * Decide whether to use this allocation group for this allocation. 1927 * Decide whether to use this allocation group for this allocation.
1927 * If so, fix up the btree freelist's size. 1928 * If so, fix up the btree freelist's size.
1928 */ 1929 */
1929STATIC int /* error */ 1930int /* error */
1930xfs_alloc_fix_freelist( 1931xfs_alloc_fix_freelist(
1931 struct xfs_alloc_arg *args, /* allocation argument structure */ 1932 struct xfs_alloc_arg *args, /* allocation argument structure */
1932 int flags) /* XFS_ALLOC_FLAG_... */ 1933 int flags) /* XFS_ALLOC_FLAG_... */
@@ -2339,6 +2340,7 @@ xfs_agf_write_verify(
2339} 2340}
2340 2341
2341const struct xfs_buf_ops xfs_agf_buf_ops = { 2342const struct xfs_buf_ops xfs_agf_buf_ops = {
2343 .name = "xfs_agf",
2342 .verify_read = xfs_agf_read_verify, 2344 .verify_read = xfs_agf_read_verify,
2343 .verify_write = xfs_agf_write_verify, 2345 .verify_write = xfs_agf_write_verify,
2344}; 2346};
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 0ecde4d5cac8..135eb3d24db7 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -235,5 +235,6 @@ xfs_alloc_get_rec(
235 235
236int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, 236int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
237 xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); 237 xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
238int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
238 239
239#endif /* __XFS_ALLOC_H__ */ 240#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 90de071dd4c2..444626ddbd1b 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -293,14 +293,7 @@ xfs_allocbt_verify(
293 level = be16_to_cpu(block->bb_level); 293 level = be16_to_cpu(block->bb_level);
294 switch (block->bb_magic) { 294 switch (block->bb_magic) {
295 case cpu_to_be32(XFS_ABTB_CRC_MAGIC): 295 case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
296 if (!xfs_sb_version_hascrc(&mp->m_sb)) 296 if (!xfs_btree_sblock_v5hdr_verify(bp))
297 return false;
298 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
299 return false;
300 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
301 return false;
302 if (pag &&
303 be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
304 return false; 297 return false;
305 /* fall through */ 298 /* fall through */
306 case cpu_to_be32(XFS_ABTB_MAGIC): 299 case cpu_to_be32(XFS_ABTB_MAGIC):
@@ -311,14 +304,7 @@ xfs_allocbt_verify(
311 return false; 304 return false;
312 break; 305 break;
313 case cpu_to_be32(XFS_ABTC_CRC_MAGIC): 306 case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
314 if (!xfs_sb_version_hascrc(&mp->m_sb)) 307 if (!xfs_btree_sblock_v5hdr_verify(bp))
315 return false;
316 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
317 return false;
318 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
319 return false;
320 if (pag &&
321 be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
322 return false; 308 return false;
323 /* fall through */ 309 /* fall through */
324 case cpu_to_be32(XFS_ABTC_MAGIC): 310 case cpu_to_be32(XFS_ABTC_MAGIC):
@@ -332,21 +318,7 @@ xfs_allocbt_verify(
332 return false; 318 return false;
333 } 319 }
334 320
335 /* numrecs verification */ 321 return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]);
336 if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
337 return false;
338
339 /* sibling pointer verification */
340 if (!block->bb_u.s.bb_leftsib ||
341 (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
342 block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
343 return false;
344 if (!block->bb_u.s.bb_rightsib ||
345 (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
346 block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
347 return false;
348
349 return true;
350} 322}
351 323
352static void 324static void
@@ -379,6 +351,7 @@ xfs_allocbt_write_verify(
379} 351}
380 352
381const struct xfs_buf_ops xfs_allocbt_buf_ops = { 353const struct xfs_buf_ops xfs_allocbt_buf_ops = {
354 .name = "xfs_allocbt",
382 .verify_read = xfs_allocbt_read_verify, 355 .verify_read = xfs_allocbt_read_verify,
383 .verify_write = xfs_allocbt_write_verify, 356 .verify_write = xfs_allocbt_write_verify,
384}; 357};
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index f949818fa1c7..fa3b948ef9c2 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -207,7 +207,7 @@ xfs_attr_set(
207 struct xfs_trans_res tres; 207 struct xfs_trans_res tres;
208 xfs_fsblock_t firstblock; 208 xfs_fsblock_t firstblock;
209 int rsvd = (flags & ATTR_ROOT) != 0; 209 int rsvd = (flags & ATTR_ROOT) != 0;
210 int error, err2, committed, local; 210 int error, err2, local;
211 211
212 XFS_STATS_INC(mp, xs_attr_set); 212 XFS_STATS_INC(mp, xs_attr_set);
213 213
@@ -334,25 +334,15 @@ xfs_attr_set(
334 */ 334 */
335 xfs_bmap_init(args.flist, args.firstblock); 335 xfs_bmap_init(args.flist, args.firstblock);
336 error = xfs_attr_shortform_to_leaf(&args); 336 error = xfs_attr_shortform_to_leaf(&args);
337 if (!error) { 337 if (!error)
338 error = xfs_bmap_finish(&args.trans, args.flist, 338 error = xfs_bmap_finish(&args.trans, args.flist, dp);
339 &committed);
340 }
341 if (error) { 339 if (error) {
342 ASSERT(committed);
343 args.trans = NULL; 340 args.trans = NULL;
344 xfs_bmap_cancel(&flist); 341 xfs_bmap_cancel(&flist);
345 goto out; 342 goto out;
346 } 343 }
347 344
348 /* 345 /*
349 * bmap_finish() may have committed the last trans and started
350 * a new one. We need the inode to be in all transactions.
351 */
352 if (committed)
353 xfs_trans_ijoin(args.trans, dp, 0);
354
355 /*
356 * Commit the leaf transformation. We'll need another (linked) 346 * Commit the leaf transformation. We'll need another (linked)
357 * transaction to add the new attribute to the leaf. 347 * transaction to add the new attribute to the leaf.
358 */ 348 */
@@ -568,7 +558,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
568{ 558{
569 xfs_inode_t *dp; 559 xfs_inode_t *dp;
570 struct xfs_buf *bp; 560 struct xfs_buf *bp;
571 int retval, error, committed, forkoff; 561 int retval, error, forkoff;
572 562
573 trace_xfs_attr_leaf_addname(args); 563 trace_xfs_attr_leaf_addname(args);
574 564
@@ -628,25 +618,15 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
628 */ 618 */
629 xfs_bmap_init(args->flist, args->firstblock); 619 xfs_bmap_init(args->flist, args->firstblock);
630 error = xfs_attr3_leaf_to_node(args); 620 error = xfs_attr3_leaf_to_node(args);
631 if (!error) { 621 if (!error)
632 error = xfs_bmap_finish(&args->trans, args->flist, 622 error = xfs_bmap_finish(&args->trans, args->flist, dp);
633 &committed);
634 }
635 if (error) { 623 if (error) {
636 ASSERT(committed);
637 args->trans = NULL; 624 args->trans = NULL;
638 xfs_bmap_cancel(args->flist); 625 xfs_bmap_cancel(args->flist);
639 return error; 626 return error;
640 } 627 }
641 628
642 /* 629 /*
643 * bmap_finish() may have committed the last trans and started
644 * a new one. We need the inode to be in all transactions.
645 */
646 if (committed)
647 xfs_trans_ijoin(args->trans, dp, 0);
648
649 /*
650 * Commit the current trans (including the inode) and start 630 * Commit the current trans (including the inode) and start
651 * a new one. 631 * a new one.
652 */ 632 */
@@ -729,25 +709,14 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
729 xfs_bmap_init(args->flist, args->firstblock); 709 xfs_bmap_init(args->flist, args->firstblock);
730 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 710 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
731 /* bp is gone due to xfs_da_shrink_inode */ 711 /* bp is gone due to xfs_da_shrink_inode */
732 if (!error) { 712 if (!error)
733 error = xfs_bmap_finish(&args->trans, 713 error = xfs_bmap_finish(&args->trans,
734 args->flist, 714 args->flist, dp);
735 &committed);
736 }
737 if (error) { 715 if (error) {
738 ASSERT(committed);
739 args->trans = NULL; 716 args->trans = NULL;
740 xfs_bmap_cancel(args->flist); 717 xfs_bmap_cancel(args->flist);
741 return error; 718 return error;
742 } 719 }
743
744 /*
745 * bmap_finish() may have committed the last trans
746 * and started a new one. We need the inode to be
747 * in all transactions.
748 */
749 if (committed)
750 xfs_trans_ijoin(args->trans, dp, 0);
751 } 720 }
752 721
753 /* 722 /*
@@ -775,7 +744,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
775{ 744{
776 xfs_inode_t *dp; 745 xfs_inode_t *dp;
777 struct xfs_buf *bp; 746 struct xfs_buf *bp;
778 int error, committed, forkoff; 747 int error, forkoff;
779 748
780 trace_xfs_attr_leaf_removename(args); 749 trace_xfs_attr_leaf_removename(args);
781 750
@@ -803,23 +772,13 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
803 xfs_bmap_init(args->flist, args->firstblock); 772 xfs_bmap_init(args->flist, args->firstblock);
804 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 773 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
805 /* bp is gone due to xfs_da_shrink_inode */ 774 /* bp is gone due to xfs_da_shrink_inode */
806 if (!error) { 775 if (!error)
807 error = xfs_bmap_finish(&args->trans, args->flist, 776 error = xfs_bmap_finish(&args->trans, args->flist, dp);
808 &committed);
809 }
810 if (error) { 777 if (error) {
811 ASSERT(committed);
812 args->trans = NULL; 778 args->trans = NULL;
813 xfs_bmap_cancel(args->flist); 779 xfs_bmap_cancel(args->flist);
814 return error; 780 return error;
815 } 781 }
816
817 /*
818 * bmap_finish() may have committed the last trans and started
819 * a new one. We need the inode to be in all transactions.
820 */
821 if (committed)
822 xfs_trans_ijoin(args->trans, dp, 0);
823 } 782 }
824 return 0; 783 return 0;
825} 784}
@@ -877,7 +836,7 @@ xfs_attr_node_addname(xfs_da_args_t *args)
877 xfs_da_state_blk_t *blk; 836 xfs_da_state_blk_t *blk;
878 xfs_inode_t *dp; 837 xfs_inode_t *dp;
879 xfs_mount_t *mp; 838 xfs_mount_t *mp;
880 int committed, retval, error; 839 int retval, error;
881 840
882 trace_xfs_attr_node_addname(args); 841 trace_xfs_attr_node_addname(args);
883 842
@@ -938,27 +897,16 @@ restart:
938 state = NULL; 897 state = NULL;
939 xfs_bmap_init(args->flist, args->firstblock); 898 xfs_bmap_init(args->flist, args->firstblock);
940 error = xfs_attr3_leaf_to_node(args); 899 error = xfs_attr3_leaf_to_node(args);
941 if (!error) { 900 if (!error)
942 error = xfs_bmap_finish(&args->trans, 901 error = xfs_bmap_finish(&args->trans,
943 args->flist, 902 args->flist, dp);
944 &committed);
945 }
946 if (error) { 903 if (error) {
947 ASSERT(committed);
948 args->trans = NULL; 904 args->trans = NULL;
949 xfs_bmap_cancel(args->flist); 905 xfs_bmap_cancel(args->flist);
950 goto out; 906 goto out;
951 } 907 }
952 908
953 /* 909 /*
954 * bmap_finish() may have committed the last trans
955 * and started a new one. We need the inode to be
956 * in all transactions.
957 */
958 if (committed)
959 xfs_trans_ijoin(args->trans, dp, 0);
960
961 /*
962 * Commit the node conversion and start the next 910 * Commit the node conversion and start the next
963 * trans in the chain. 911 * trans in the chain.
964 */ 912 */
@@ -977,23 +925,13 @@ restart:
977 */ 925 */
978 xfs_bmap_init(args->flist, args->firstblock); 926 xfs_bmap_init(args->flist, args->firstblock);
979 error = xfs_da3_split(state); 927 error = xfs_da3_split(state);
980 if (!error) { 928 if (!error)
981 error = xfs_bmap_finish(&args->trans, args->flist, 929 error = xfs_bmap_finish(&args->trans, args->flist, dp);
982 &committed);
983 }
984 if (error) { 930 if (error) {
985 ASSERT(committed);
986 args->trans = NULL; 931 args->trans = NULL;
987 xfs_bmap_cancel(args->flist); 932 xfs_bmap_cancel(args->flist);
988 goto out; 933 goto out;
989 } 934 }
990
991 /*
992 * bmap_finish() may have committed the last trans and started
993 * a new one. We need the inode to be in all transactions.
994 */
995 if (committed)
996 xfs_trans_ijoin(args->trans, dp, 0);
997 } else { 935 } else {
998 /* 936 /*
999 * Addition succeeded, update Btree hashvals. 937 * Addition succeeded, update Btree hashvals.
@@ -1086,25 +1024,14 @@ restart:
1086 if (retval && (state->path.active > 1)) { 1024 if (retval && (state->path.active > 1)) {
1087 xfs_bmap_init(args->flist, args->firstblock); 1025 xfs_bmap_init(args->flist, args->firstblock);
1088 error = xfs_da3_join(state); 1026 error = xfs_da3_join(state);
1089 if (!error) { 1027 if (!error)
1090 error = xfs_bmap_finish(&args->trans, 1028 error = xfs_bmap_finish(&args->trans,
1091 args->flist, 1029 args->flist, dp);
1092 &committed);
1093 }
1094 if (error) { 1030 if (error) {
1095 ASSERT(committed);
1096 args->trans = NULL; 1031 args->trans = NULL;
1097 xfs_bmap_cancel(args->flist); 1032 xfs_bmap_cancel(args->flist);
1098 goto out; 1033 goto out;
1099 } 1034 }
1100
1101 /*
1102 * bmap_finish() may have committed the last trans
1103 * and started a new one. We need the inode to be
1104 * in all transactions.
1105 */
1106 if (committed)
1107 xfs_trans_ijoin(args->trans, dp, 0);
1108 } 1035 }
1109 1036
1110 /* 1037 /*
@@ -1146,7 +1073,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1146 xfs_da_state_blk_t *blk; 1073 xfs_da_state_blk_t *blk;
1147 xfs_inode_t *dp; 1074 xfs_inode_t *dp;
1148 struct xfs_buf *bp; 1075 struct xfs_buf *bp;
1149 int retval, error, committed, forkoff; 1076 int retval, error, forkoff;
1150 1077
1151 trace_xfs_attr_node_removename(args); 1078 trace_xfs_attr_node_removename(args);
1152 1079
@@ -1220,24 +1147,13 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1220 if (retval && (state->path.active > 1)) { 1147 if (retval && (state->path.active > 1)) {
1221 xfs_bmap_init(args->flist, args->firstblock); 1148 xfs_bmap_init(args->flist, args->firstblock);
1222 error = xfs_da3_join(state); 1149 error = xfs_da3_join(state);
1223 if (!error) { 1150 if (!error)
1224 error = xfs_bmap_finish(&args->trans, args->flist, 1151 error = xfs_bmap_finish(&args->trans, args->flist, dp);
1225 &committed);
1226 }
1227 if (error) { 1152 if (error) {
1228 ASSERT(committed);
1229 args->trans = NULL; 1153 args->trans = NULL;
1230 xfs_bmap_cancel(args->flist); 1154 xfs_bmap_cancel(args->flist);
1231 goto out; 1155 goto out;
1232 } 1156 }
1233
1234 /*
1235 * bmap_finish() may have committed the last trans and started
1236 * a new one. We need the inode to be in all transactions.
1237 */
1238 if (committed)
1239 xfs_trans_ijoin(args->trans, dp, 0);
1240
1241 /* 1157 /*
1242 * Commit the Btree join operation and start a new trans. 1158 * Commit the Btree join operation and start a new trans.
1243 */ 1159 */
@@ -1265,25 +1181,14 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1265 xfs_bmap_init(args->flist, args->firstblock); 1181 xfs_bmap_init(args->flist, args->firstblock);
1266 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff); 1182 error = xfs_attr3_leaf_to_shortform(bp, args, forkoff);
1267 /* bp is gone due to xfs_da_shrink_inode */ 1183 /* bp is gone due to xfs_da_shrink_inode */
1268 if (!error) { 1184 if (!error)
1269 error = xfs_bmap_finish(&args->trans, 1185 error = xfs_bmap_finish(&args->trans,
1270 args->flist, 1186 args->flist, dp);
1271 &committed);
1272 }
1273 if (error) { 1187 if (error) {
1274 ASSERT(committed);
1275 args->trans = NULL; 1188 args->trans = NULL;
1276 xfs_bmap_cancel(args->flist); 1189 xfs_bmap_cancel(args->flist);
1277 goto out; 1190 goto out;
1278 } 1191 }
1279
1280 /*
1281 * bmap_finish() may have committed the last trans
1282 * and started a new one. We need the inode to be
1283 * in all transactions.
1284 */
1285 if (committed)
1286 xfs_trans_ijoin(args->trans, dp, 0);
1287 } else 1192 } else
1288 xfs_trans_brelse(args->trans, bp); 1193 xfs_trans_brelse(args->trans, bp);
1289 } 1194 }
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index aa187f7ba2dd..01a5ecfedfcf 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -328,6 +328,7 @@ xfs_attr3_leaf_read_verify(
328} 328}
329 329
330const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { 330const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
331 .name = "xfs_attr3_leaf",
331 .verify_read = xfs_attr3_leaf_read_verify, 332 .verify_read = xfs_attr3_leaf_read_verify,
332 .verify_write = xfs_attr3_leaf_write_verify, 333 .verify_write = xfs_attr3_leaf_write_verify,
333}; 334};
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 5ab95ffa4ae9..a572532a55cd 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -201,6 +201,7 @@ xfs_attr3_rmt_write_verify(
201} 201}
202 202
203const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = { 203const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
204 .name = "xfs_attr3_rmt",
204 .verify_read = xfs_attr3_rmt_read_verify, 205 .verify_read = xfs_attr3_rmt_read_verify,
205 .verify_write = xfs_attr3_rmt_write_verify, 206 .verify_write = xfs_attr3_rmt_write_verify,
206}; 207};
@@ -447,8 +448,6 @@ xfs_attr_rmtval_set(
447 * Roll through the "value", allocating blocks on disk as required. 448 * Roll through the "value", allocating blocks on disk as required.
448 */ 449 */
449 while (blkcnt > 0) { 450 while (blkcnt > 0) {
450 int committed;
451
452 /* 451 /*
453 * Allocate a single extent, up to the size of the value. 452 * Allocate a single extent, up to the size of the value.
454 * 453 *
@@ -466,24 +465,14 @@ xfs_attr_rmtval_set(
466 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno, 465 error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
467 blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock, 466 blkcnt, XFS_BMAPI_ATTRFORK, args->firstblock,
468 args->total, &map, &nmap, args->flist); 467 args->total, &map, &nmap, args->flist);
469 if (!error) { 468 if (!error)
470 error = xfs_bmap_finish(&args->trans, args->flist, 469 error = xfs_bmap_finish(&args->trans, args->flist, dp);
471 &committed);
472 }
473 if (error) { 470 if (error) {
474 ASSERT(committed);
475 args->trans = NULL; 471 args->trans = NULL;
476 xfs_bmap_cancel(args->flist); 472 xfs_bmap_cancel(args->flist);
477 return error; 473 return error;
478 } 474 }
479 475
480 /*
481 * bmap_finish() may have committed the last trans and started
482 * a new one. We need the inode to be in all transactions.
483 */
484 if (committed)
485 xfs_trans_ijoin(args->trans, dp, 0);
486
487 ASSERT(nmap == 1); 476 ASSERT(nmap == 1);
488 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 477 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
489 (map.br_startblock != HOLESTARTBLOCK)); 478 (map.br_startblock != HOLESTARTBLOCK));
@@ -614,31 +603,20 @@ xfs_attr_rmtval_remove(
614 blkcnt = args->rmtblkcnt; 603 blkcnt = args->rmtblkcnt;
615 done = 0; 604 done = 0;
616 while (!done) { 605 while (!done) {
617 int committed;
618
619 xfs_bmap_init(args->flist, args->firstblock); 606 xfs_bmap_init(args->flist, args->firstblock);
620 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt, 607 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
621 XFS_BMAPI_ATTRFORK, 1, args->firstblock, 608 XFS_BMAPI_ATTRFORK, 1, args->firstblock,
622 args->flist, &done); 609 args->flist, &done);
623 if (!error) { 610 if (!error)
624 error = xfs_bmap_finish(&args->trans, args->flist, 611 error = xfs_bmap_finish(&args->trans, args->flist,
625 &committed); 612 args->dp);
626 }
627 if (error) { 613 if (error) {
628 ASSERT(committed);
629 args->trans = NULL; 614 args->trans = NULL;
630 xfs_bmap_cancel(args->flist); 615 xfs_bmap_cancel(args->flist);
631 return error; 616 return error;
632 } 617 }
633 618
634 /* 619 /*
635 * bmap_finish() may have committed the last trans and started
636 * a new one. We need the inode to be in all transactions.
637 */
638 if (committed)
639 xfs_trans_ijoin(args->trans, args->dp, 0);
640
641 /*
642 * Close out trans and start the next one in the chain. 620 * Close out trans and start the next one in the chain.
643 */ 621 */
644 error = xfs_trans_roll(&args->trans, args->dp); 622 error = xfs_trans_roll(&args->trans, args->dp);
diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c
index 0e8885a59646..0a94cce5ea35 100644
--- a/fs/xfs/libxfs/xfs_bit.c
+++ b/fs/xfs/libxfs/xfs_bit.c
@@ -32,13 +32,13 @@ int
32xfs_bitmap_empty(uint *map, uint size) 32xfs_bitmap_empty(uint *map, uint size)
33{ 33{
34 uint i; 34 uint i;
35 uint ret = 0;
36 35
37 for (i = 0; i < size; i++) { 36 for (i = 0; i < size; i++) {
38 ret |= map[i]; 37 if (map[i] != 0)
38 return 0;
39 } 39 }
40 40
41 return (ret == 0); 41 return 1;
42} 42}
43 43
44/* 44/*
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 119c2422aac7..ef00156f4f96 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -325,9 +325,11 @@ xfs_check_block(
325 325
326/* 326/*
327 * Check that the extents for the inode ip are in the right order in all 327 * Check that the extents for the inode ip are in the right order in all
328 * btree leaves. 328 * btree leaves. THis becomes prohibitively expensive for large extent count
329 * files, so don't bother with inodes that have more than 10,000 extents in
330 * them. The btree record ordering checks will still be done, so for such large
331 * bmapbt constructs that is going to catch most corruptions.
329 */ 332 */
330
331STATIC void 333STATIC void
332xfs_bmap_check_leaf_extents( 334xfs_bmap_check_leaf_extents(
333 xfs_btree_cur_t *cur, /* btree cursor or null */ 335 xfs_btree_cur_t *cur, /* btree cursor or null */
@@ -352,6 +354,10 @@ xfs_bmap_check_leaf_extents(
352 return; 354 return;
353 } 355 }
354 356
357 /* skip large extent count inodes */
358 if (ip->i_d.di_nextents > 10000)
359 return;
360
355 bno = NULLFSBLOCK; 361 bno = NULLFSBLOCK;
356 mp = ip->i_mount; 362 mp = ip->i_mount;
357 ifp = XFS_IFORK_PTR(ip, whichfork); 363 ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -1111,7 +1117,6 @@ xfs_bmap_add_attrfork(
1111 xfs_trans_t *tp; /* transaction pointer */ 1117 xfs_trans_t *tp; /* transaction pointer */
1112 int blks; /* space reservation */ 1118 int blks; /* space reservation */
1113 int version = 1; /* superblock attr version */ 1119 int version = 1; /* superblock attr version */
1114 int committed; /* xaction was committed */
1115 int logflags; /* logging flags */ 1120 int logflags; /* logging flags */
1116 int error; /* error return value */ 1121 int error; /* error return value */
1117 1122
@@ -1214,7 +1219,7 @@ xfs_bmap_add_attrfork(
1214 xfs_log_sb(tp); 1219 xfs_log_sb(tp);
1215 } 1220 }
1216 1221
1217 error = xfs_bmap_finish(&tp, &flist, &committed); 1222 error = xfs_bmap_finish(&tp, &flist, NULL);
1218 if (error) 1223 if (error)
1219 goto bmap_cancel; 1224 goto bmap_cancel;
1220 error = xfs_trans_commit(tp); 1225 error = xfs_trans_commit(tp);
@@ -1723,10 +1728,11 @@ xfs_bmap_add_extent_delay_real(
1723 xfs_filblks_t temp=0; /* value for da_new calculations */ 1728 xfs_filblks_t temp=0; /* value for da_new calculations */
1724 xfs_filblks_t temp2=0;/* value for da_new calculations */ 1729 xfs_filblks_t temp2=0;/* value for da_new calculations */
1725 int tmp_rval; /* partial logging flags */ 1730 int tmp_rval; /* partial logging flags */
1731 int whichfork = XFS_DATA_FORK;
1726 struct xfs_mount *mp; 1732 struct xfs_mount *mp;
1727 1733
1728 mp = bma->tp ? bma->tp->t_mountp : NULL; 1734 mp = bma->ip->i_mount;
1729 ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK); 1735 ifp = XFS_IFORK_PTR(bma->ip, whichfork);
1730 1736
1731 ASSERT(bma->idx >= 0); 1737 ASSERT(bma->idx >= 0);
1732 ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); 1738 ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
@@ -1785,7 +1791,7 @@ xfs_bmap_add_extent_delay_real(
1785 * Don't set contiguous if the combined extent would be too large. 1791 * Don't set contiguous if the combined extent would be too large.
1786 * Also check for all-three-contiguous being too large. 1792 * Also check for all-three-contiguous being too large.
1787 */ 1793 */
1788 if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) { 1794 if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
1789 state |= BMAP_RIGHT_VALID; 1795 state |= BMAP_RIGHT_VALID;
1790 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT); 1796 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
1791 1797
@@ -2016,10 +2022,10 @@ xfs_bmap_add_extent_delay_real(
2016 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2022 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2017 } 2023 }
2018 2024
2019 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { 2025 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2020 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2026 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2021 bma->firstblock, bma->flist, 2027 bma->firstblock, bma->flist,
2022 &bma->cur, 1, &tmp_rval, XFS_DATA_FORK); 2028 &bma->cur, 1, &tmp_rval, whichfork);
2023 rval |= tmp_rval; 2029 rval |= tmp_rval;
2024 if (error) 2030 if (error)
2025 goto done; 2031 goto done;
@@ -2100,10 +2106,10 @@ xfs_bmap_add_extent_delay_real(
2100 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2106 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2101 } 2107 }
2102 2108
2103 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { 2109 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2104 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2110 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2105 bma->firstblock, bma->flist, &bma->cur, 1, 2111 bma->firstblock, bma->flist, &bma->cur, 1,
2106 &tmp_rval, XFS_DATA_FORK); 2112 &tmp_rval, whichfork);
2107 rval |= tmp_rval; 2113 rval |= tmp_rval;
2108 if (error) 2114 if (error)
2109 goto done; 2115 goto done;
@@ -2169,10 +2175,10 @@ xfs_bmap_add_extent_delay_real(
2169 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done); 2175 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
2170 } 2176 }
2171 2177
2172 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { 2178 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2173 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2179 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2174 bma->firstblock, bma->flist, &bma->cur, 2180 bma->firstblock, bma->flist, &bma->cur,
2175 1, &tmp_rval, XFS_DATA_FORK); 2181 1, &tmp_rval, whichfork);
2176 rval |= tmp_rval; 2182 rval |= tmp_rval;
2177 if (error) 2183 if (error)
2178 goto done; 2184 goto done;
@@ -2215,13 +2221,13 @@ xfs_bmap_add_extent_delay_real(
2215 } 2221 }
2216 2222
2217 /* convert to a btree if necessary */ 2223 /* convert to a btree if necessary */
2218 if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) { 2224 if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
2219 int tmp_logflags; /* partial log flag return val */ 2225 int tmp_logflags; /* partial log flag return val */
2220 2226
2221 ASSERT(bma->cur == NULL); 2227 ASSERT(bma->cur == NULL);
2222 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip, 2228 error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
2223 bma->firstblock, bma->flist, &bma->cur, 2229 bma->firstblock, bma->flist, &bma->cur,
2224 da_old > 0, &tmp_logflags, XFS_DATA_FORK); 2230 da_old > 0, &tmp_logflags, whichfork);
2225 bma->logflags |= tmp_logflags; 2231 bma->logflags |= tmp_logflags;
2226 if (error) 2232 if (error)
2227 goto done; 2233 goto done;
@@ -2242,7 +2248,7 @@ xfs_bmap_add_extent_delay_real(
2242 if (bma->cur) 2248 if (bma->cur)
2243 bma->cur->bc_private.b.allocated = 0; 2249 bma->cur->bc_private.b.allocated = 0;
2244 2250
2245 xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK); 2251 xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
2246done: 2252done:
2247 bma->logflags |= rval; 2253 bma->logflags |= rval;
2248 return error; 2254 return error;
@@ -2939,7 +2945,7 @@ xfs_bmap_add_extent_hole_real(
2939 int state; /* state bits, accessed thru macros */ 2945 int state; /* state bits, accessed thru macros */
2940 struct xfs_mount *mp; 2946 struct xfs_mount *mp;
2941 2947
2942 mp = bma->tp ? bma->tp->t_mountp : NULL; 2948 mp = bma->ip->i_mount;
2943 ifp = XFS_IFORK_PTR(bma->ip, whichfork); 2949 ifp = XFS_IFORK_PTR(bma->ip, whichfork);
2944 2950
2945 ASSERT(bma->idx >= 0); 2951 ASSERT(bma->idx >= 0);
@@ -5950,7 +5956,6 @@ xfs_bmap_split_extent(
5950 struct xfs_trans *tp; 5956 struct xfs_trans *tp;
5951 struct xfs_bmap_free free_list; 5957 struct xfs_bmap_free free_list;
5952 xfs_fsblock_t firstfsb; 5958 xfs_fsblock_t firstfsb;
5953 int committed;
5954 int error; 5959 int error;
5955 5960
5956 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 5961 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
@@ -5971,7 +5976,7 @@ xfs_bmap_split_extent(
5971 if (error) 5976 if (error)
5972 goto out; 5977 goto out;
5973 5978
5974 error = xfs_bmap_finish(&tp, &free_list, &committed); 5979 error = xfs_bmap_finish(&tp, &free_list, NULL);
5975 if (error) 5980 if (error)
5976 goto out; 5981 goto out;
5977 5982
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index a160f8a5a3fc..423a34e832bd 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -195,7 +195,7 @@ void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len,
195 struct xfs_bmap_free *flist, struct xfs_mount *mp); 195 struct xfs_bmap_free *flist, struct xfs_mount *mp);
196void xfs_bmap_cancel(struct xfs_bmap_free *flist); 196void xfs_bmap_cancel(struct xfs_bmap_free *flist);
197int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist, 197int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
198 int *committed); 198 struct xfs_inode *ip);
199void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); 199void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
200int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, 200int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
201 xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); 201 xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 6b0cf6546a82..1637c37bfbaa 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -720,6 +720,7 @@ xfs_bmbt_write_verify(
720} 720}
721 721
722const struct xfs_buf_ops xfs_bmbt_buf_ops = { 722const struct xfs_buf_ops xfs_bmbt_buf_ops = {
723 .name = "xfs_bmbt",
723 .verify_read = xfs_bmbt_read_verify, 724 .verify_read = xfs_bmbt_read_verify,
724 .verify_write = xfs_bmbt_write_verify, 725 .verify_write = xfs_bmbt_write_verify,
725}; 726};
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index af1bbee5586e..a0eb18ce3ad3 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4080,3 +4080,61 @@ xfs_btree_change_owner(
4080 4080
4081 return 0; 4081 return 0;
4082} 4082}
4083
4084/**
4085 * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
4086 * btree block
4087 *
4088 * @bp: buffer containing the btree block
4089 * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
4090 * @pag_max_level: pointer to the per-ag max level field
4091 */
4092bool
4093xfs_btree_sblock_v5hdr_verify(
4094 struct xfs_buf *bp)
4095{
4096 struct xfs_mount *mp = bp->b_target->bt_mount;
4097 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4098 struct xfs_perag *pag = bp->b_pag;
4099
4100 if (!xfs_sb_version_hascrc(&mp->m_sb))
4101 return false;
4102 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
4103 return false;
4104 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
4105 return false;
4106 if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
4107 return false;
4108 return true;
4109}
4110
4111/**
4112 * xfs_btree_sblock_verify() -- verify a short-format btree block
4113 *
4114 * @bp: buffer containing the btree block
4115 * @max_recs: maximum records allowed in this btree node
4116 */
4117bool
4118xfs_btree_sblock_verify(
4119 struct xfs_buf *bp,
4120 unsigned int max_recs)
4121{
4122 struct xfs_mount *mp = bp->b_target->bt_mount;
4123 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4124
4125 /* numrecs verification */
4126 if (be16_to_cpu(block->bb_numrecs) > max_recs)
4127 return false;
4128
4129 /* sibling pointer verification */
4130 if (!block->bb_u.s.bb_leftsib ||
4131 (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
4132 block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
4133 return false;
4134 if (!block->bb_u.s.bb_rightsib ||
4135 (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
4136 block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
4137 return false;
4138
4139 return true;
4140}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 992dec0638f3..2e874be70209 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -472,4 +472,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
472#define XFS_BTREE_TRACE_ARGR(c, r) 472#define XFS_BTREE_TRACE_ARGR(c, r)
473#define XFS_BTREE_TRACE_CURSOR(c, t) 473#define XFS_BTREE_TRACE_CURSOR(c, t)
474 474
475bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
476bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
477
475#endif /* __XFS_BTREE_H__ */ 478#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e89a0f8f827c..097bf7717d80 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -245,6 +245,7 @@ xfs_da3_node_read_verify(
245} 245}
246 246
247const struct xfs_buf_ops xfs_da3_node_buf_ops = { 247const struct xfs_buf_ops xfs_da3_node_buf_ops = {
248 .name = "xfs_da3_node",
248 .verify_read = xfs_da3_node_read_verify, 249 .verify_read = xfs_da3_node_read_verify,
249 .verify_write = xfs_da3_node_write_verify, 250 .verify_write = xfs_da3_node_write_verify,
250}; 251};
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 9c10e2b8cfcb..aa17cb788946 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -123,6 +123,7 @@ xfs_dir3_block_write_verify(
123} 123}
124 124
125const struct xfs_buf_ops xfs_dir3_block_buf_ops = { 125const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
126 .name = "xfs_dir3_block",
126 .verify_read = xfs_dir3_block_read_verify, 127 .verify_read = xfs_dir3_block_read_verify,
127 .verify_write = xfs_dir3_block_write_verify, 128 .verify_write = xfs_dir3_block_write_verify,
128}; 129};
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index af71a84f343c..725fc7841fde 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -305,11 +305,13 @@ xfs_dir3_data_write_verify(
305} 305}
306 306
307const struct xfs_buf_ops xfs_dir3_data_buf_ops = { 307const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
308 .name = "xfs_dir3_data",
308 .verify_read = xfs_dir3_data_read_verify, 309 .verify_read = xfs_dir3_data_read_verify,
309 .verify_write = xfs_dir3_data_write_verify, 310 .verify_write = xfs_dir3_data_write_verify,
310}; 311};
311 312
312static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = { 313static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
314 .name = "xfs_dir3_data_reada",
313 .verify_read = xfs_dir3_data_reada_verify, 315 .verify_read = xfs_dir3_data_reada_verify,
314 .verify_write = xfs_dir3_data_write_verify, 316 .verify_write = xfs_dir3_data_write_verify,
315}; 317};
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 3923e1f94697..b887fb2a2bcf 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -245,11 +245,13 @@ xfs_dir3_leafn_write_verify(
245} 245}
246 246
247const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = { 247const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
248 .name = "xfs_dir3_leaf1",
248 .verify_read = xfs_dir3_leaf1_read_verify, 249 .verify_read = xfs_dir3_leaf1_read_verify,
249 .verify_write = xfs_dir3_leaf1_write_verify, 250 .verify_write = xfs_dir3_leaf1_write_verify,
250}; 251};
251 252
252const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = { 253const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
254 .name = "xfs_dir3_leafn",
253 .verify_read = xfs_dir3_leafn_read_verify, 255 .verify_read = xfs_dir3_leafn_read_verify,
254 .verify_write = xfs_dir3_leafn_write_verify, 256 .verify_write = xfs_dir3_leafn_write_verify,
255}; 257};
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 70b0cb2fd556..63ee03db796c 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -150,6 +150,7 @@ xfs_dir3_free_write_verify(
150} 150}
151 151
152const struct xfs_buf_ops xfs_dir3_free_buf_ops = { 152const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
153 .name = "xfs_dir3_free",
153 .verify_read = xfs_dir3_free_read_verify, 154 .verify_read = xfs_dir3_free_read_verify,
154 .verify_write = xfs_dir3_free_write_verify, 155 .verify_write = xfs_dir3_free_write_verify,
155}; 156};
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 5331b7f0460c..3cc3cf767474 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -54,7 +54,7 @@ xfs_dqcheck(
54 xfs_dqid_t id, 54 xfs_dqid_t id,
55 uint type, /* used only when IO_dorepair is true */ 55 uint type, /* used only when IO_dorepair is true */
56 uint flags, 56 uint flags,
57 char *str) 57 const char *str)
58{ 58{
59 xfs_dqblk_t *d = (xfs_dqblk_t *)ddq; 59 xfs_dqblk_t *d = (xfs_dqblk_t *)ddq;
60 int errs = 0; 60 int errs = 0;
@@ -207,7 +207,8 @@ xfs_dquot_buf_verify_crc(
207STATIC bool 207STATIC bool
208xfs_dquot_buf_verify( 208xfs_dquot_buf_verify(
209 struct xfs_mount *mp, 209 struct xfs_mount *mp,
210 struct xfs_buf *bp) 210 struct xfs_buf *bp,
211 int warn)
211{ 212{
212 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr; 213 struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
213 xfs_dqid_t id = 0; 214 xfs_dqid_t id = 0;
@@ -240,8 +241,7 @@ xfs_dquot_buf_verify(
240 if (i == 0) 241 if (i == 0)
241 id = be32_to_cpu(ddq->d_id); 242 id = be32_to_cpu(ddq->d_id);
242 243
243 error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, 244 error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__);
244 "xfs_dquot_buf_verify");
245 if (error) 245 if (error)
246 return false; 246 return false;
247 } 247 }
@@ -256,7 +256,7 @@ xfs_dquot_buf_read_verify(
256 256
257 if (!xfs_dquot_buf_verify_crc(mp, bp)) 257 if (!xfs_dquot_buf_verify_crc(mp, bp))
258 xfs_buf_ioerror(bp, -EFSBADCRC); 258 xfs_buf_ioerror(bp, -EFSBADCRC);
259 else if (!xfs_dquot_buf_verify(mp, bp)) 259 else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN))
260 xfs_buf_ioerror(bp, -EFSCORRUPTED); 260 xfs_buf_ioerror(bp, -EFSCORRUPTED);
261 261
262 if (bp->b_error) 262 if (bp->b_error)
@@ -264,6 +264,25 @@ xfs_dquot_buf_read_verify(
264} 264}
265 265
266/* 266/*
267 * readahead errors are silent and simply leave the buffer as !done so a real
268 * read will then be run with the xfs_dquot_buf_ops verifier. See
269 * xfs_inode_buf_verify() for why we use EIO and ~XBF_DONE here rather than
270 * reporting the failure.
271 */
272static void
273xfs_dquot_buf_readahead_verify(
274 struct xfs_buf *bp)
275{
276 struct xfs_mount *mp = bp->b_target->bt_mount;
277
278 if (!xfs_dquot_buf_verify_crc(mp, bp) ||
279 !xfs_dquot_buf_verify(mp, bp, 0)) {
280 xfs_buf_ioerror(bp, -EIO);
281 bp->b_flags &= ~XBF_DONE;
282 }
283}
284
285/*
267 * we don't calculate the CRC here as that is done when the dquot is flushed to 286 * we don't calculate the CRC here as that is done when the dquot is flushed to
268 * the buffer after the update is done. This ensures that the dquot in the 287 * the buffer after the update is done. This ensures that the dquot in the
269 * buffer always has an up-to-date CRC value. 288 * buffer always has an up-to-date CRC value.
@@ -274,7 +293,7 @@ xfs_dquot_buf_write_verify(
274{ 293{
275 struct xfs_mount *mp = bp->b_target->bt_mount; 294 struct xfs_mount *mp = bp->b_target->bt_mount;
276 295
277 if (!xfs_dquot_buf_verify(mp, bp)) { 296 if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) {
278 xfs_buf_ioerror(bp, -EFSCORRUPTED); 297 xfs_buf_ioerror(bp, -EFSCORRUPTED);
279 xfs_verifier_error(bp); 298 xfs_verifier_error(bp);
280 return; 299 return;
@@ -282,7 +301,13 @@ xfs_dquot_buf_write_verify(
282} 301}
283 302
284const struct xfs_buf_ops xfs_dquot_buf_ops = { 303const struct xfs_buf_ops xfs_dquot_buf_ops = {
304 .name = "xfs_dquot",
285 .verify_read = xfs_dquot_buf_read_verify, 305 .verify_read = xfs_dquot_buf_read_verify,
286 .verify_write = xfs_dquot_buf_write_verify, 306 .verify_write = xfs_dquot_buf_write_verify,
287}; 307};
288 308
309const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {
310 .name = "xfs_dquot_ra",
311 .verify_read = xfs_dquot_buf_readahead_verify,
312 .verify_write = xfs_dquot_buf_write_verify,
313};
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 8774498ce0ff..e2536bb1c760 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -786,7 +786,7 @@ typedef struct xfs_agfl {
786 __be64 agfl_lsn; 786 __be64 agfl_lsn;
787 __be32 agfl_crc; 787 __be32 agfl_crc;
788 __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ 788 __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
789} xfs_agfl_t; 789} __attribute__((packed)) xfs_agfl_t;
790 790
791#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) 791#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
792 792
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 70c1db99f6a7..66d702e6b9ff 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2572,6 +2572,7 @@ xfs_agi_write_verify(
2572} 2572}
2573 2573
2574const struct xfs_buf_ops xfs_agi_buf_ops = { 2574const struct xfs_buf_ops xfs_agi_buf_ops = {
2575 .name = "xfs_agi",
2575 .verify_read = xfs_agi_read_verify, 2576 .verify_read = xfs_agi_read_verify,
2576 .verify_write = xfs_agi_write_verify, 2577 .verify_write = xfs_agi_write_verify,
2577}; 2578};
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index f39b285beb19..c679f3c05b63 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -221,7 +221,6 @@ xfs_inobt_verify(
221{ 221{
222 struct xfs_mount *mp = bp->b_target->bt_mount; 222 struct xfs_mount *mp = bp->b_target->bt_mount;
223 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 223 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
224 struct xfs_perag *pag = bp->b_pag;
225 unsigned int level; 224 unsigned int level;
226 225
227 /* 226 /*
@@ -237,14 +236,7 @@ xfs_inobt_verify(
237 switch (block->bb_magic) { 236 switch (block->bb_magic) {
238 case cpu_to_be32(XFS_IBT_CRC_MAGIC): 237 case cpu_to_be32(XFS_IBT_CRC_MAGIC):
239 case cpu_to_be32(XFS_FIBT_CRC_MAGIC): 238 case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
240 if (!xfs_sb_version_hascrc(&mp->m_sb)) 239 if (!xfs_btree_sblock_v5hdr_verify(bp))
241 return false;
242 if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
243 return false;
244 if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
245 return false;
246 if (pag &&
247 be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
248 return false; 240 return false;
249 /* fall through */ 241 /* fall through */
250 case cpu_to_be32(XFS_IBT_MAGIC): 242 case cpu_to_be32(XFS_IBT_MAGIC):
@@ -254,24 +246,12 @@ xfs_inobt_verify(
254 return 0; 246 return 0;
255 } 247 }
256 248
257 /* numrecs and level verification */ 249 /* level verification */
258 level = be16_to_cpu(block->bb_level); 250 level = be16_to_cpu(block->bb_level);
259 if (level >= mp->m_in_maxlevels) 251 if (level >= mp->m_in_maxlevels)
260 return false; 252 return false;
261 if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
262 return false;
263
264 /* sibling pointer verification */
265 if (!block->bb_u.s.bb_leftsib ||
266 (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
267 block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
268 return false;
269 if (!block->bb_u.s.bb_rightsib ||
270 (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
271 block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
272 return false;
273 253
274 return true; 254 return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]);
275} 255}
276 256
277static void 257static void
@@ -304,6 +284,7 @@ xfs_inobt_write_verify(
304} 284}
305 285
306const struct xfs_buf_ops xfs_inobt_buf_ops = { 286const struct xfs_buf_ops xfs_inobt_buf_ops = {
287 .name = "xfs_inobt",
307 .verify_read = xfs_inobt_read_verify, 288 .verify_read = xfs_inobt_read_verify,
308 .verify_write = xfs_inobt_write_verify, 289 .verify_write = xfs_inobt_write_verify,
309}; 290};
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 268c00f4f83a..1aabfda669b0 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -62,11 +62,14 @@ xfs_inobp_check(
62 * has not had the inode cores stamped into it. Hence for readahead, the buffer 62 * has not had the inode cores stamped into it. Hence for readahead, the buffer
63 * may be potentially invalid. 63 * may be potentially invalid.
64 * 64 *
65 * If the readahead buffer is invalid, we don't want to mark it with an error, 65 * If the readahead buffer is invalid, we need to mark it with an error and
66 * but we do want to clear the DONE status of the buffer so that a followup read 66 * clear the DONE status of the buffer so that a followup read will re-read it
67 * will re-read it from disk. This will ensure that we don't get an unnecessary 67 * from disk. We don't report the error otherwise to avoid warnings during log
68 * warnings during log recovery and we don't get unnecssary panics on debug 68 * recovery and we don't get unnecssary panics on debug kernels. We use EIO here
69 * kernels. 69 * because all we want to do is say readahead failed; there is no-one to report
70 * the error to, so this will distinguish it from a non-ra verifier failure.
71 * Changes to this readahead error behavour also need to be reflected in
72 * xfs_dquot_buf_readahead_verify().
70 */ 73 */
71static void 74static void
72xfs_inode_buf_verify( 75xfs_inode_buf_verify(
@@ -93,6 +96,7 @@ xfs_inode_buf_verify(
93 XFS_RANDOM_ITOBP_INOTOBP))) { 96 XFS_RANDOM_ITOBP_INOTOBP))) {
94 if (readahead) { 97 if (readahead) {
95 bp->b_flags &= ~XBF_DONE; 98 bp->b_flags &= ~XBF_DONE;
99 xfs_buf_ioerror(bp, -EIO);
96 return; 100 return;
97 } 101 }
98 102
@@ -132,11 +136,13 @@ xfs_inode_buf_write_verify(
132} 136}
133 137
134const struct xfs_buf_ops xfs_inode_buf_ops = { 138const struct xfs_buf_ops xfs_inode_buf_ops = {
139 .name = "xfs_inode",
135 .verify_read = xfs_inode_buf_read_verify, 140 .verify_read = xfs_inode_buf_read_verify,
136 .verify_write = xfs_inode_buf_write_verify, 141 .verify_write = xfs_inode_buf_write_verify,
137}; 142};
138 143
139const struct xfs_buf_ops xfs_inode_buf_ra_ops = { 144const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
145 .name = "xxfs_inode_ra",
140 .verify_read = xfs_inode_buf_readahead_verify, 146 .verify_read = xfs_inode_buf_readahead_verify,
141 .verify_write = xfs_inode_buf_write_verify, 147 .verify_write = xfs_inode_buf_write_verify,
142}; 148};
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 1c55ccbb379d..8e385f91d660 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -60,6 +60,7 @@ typedef struct xlog_recover {
60 */ 60 */
61#define XLOG_BC_TABLE_SIZE 64 61#define XLOG_BC_TABLE_SIZE 64
62 62
63#define XLOG_RECOVER_CRCPASS 0
63#define XLOG_RECOVER_PASS1 1 64#define XLOG_RECOVER_PASS1 1
64#define XLOG_RECOVER_PASS2 2 65#define XLOG_RECOVER_PASS2 2
65 66
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index 1b0a08379759..f51078f1e92a 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -153,7 +153,7 @@ typedef __uint16_t xfs_qwarncnt_t;
153#define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) 153#define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
154 154
155extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, 155extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
156 xfs_dqid_t id, uint type, uint flags, char *str); 156 xfs_dqid_t id, uint type, uint flags, const char *str);
157extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); 157extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
158 158
159#endif /* __XFS_QUOTA_H__ */ 159#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index a0b071d881a0..8a53eaa349f4 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -679,11 +679,13 @@ xfs_sb_write_verify(
679} 679}
680 680
681const struct xfs_buf_ops xfs_sb_buf_ops = { 681const struct xfs_buf_ops xfs_sb_buf_ops = {
682 .name = "xfs_sb",
682 .verify_read = xfs_sb_read_verify, 683 .verify_read = xfs_sb_read_verify,
683 .verify_write = xfs_sb_write_verify, 684 .verify_write = xfs_sb_write_verify,
684}; 685};
685 686
686const struct xfs_buf_ops xfs_sb_quiet_buf_ops = { 687const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
688 .name = "xfs_sb_quiet",
687 .verify_read = xfs_sb_quiet_read_verify, 689 .verify_read = xfs_sb_quiet_read_verify,
688 .verify_write = xfs_sb_write_verify, 690 .verify_write = xfs_sb_write_verify,
689}; 691};
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 5be529707903..15c3ceb845b9 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -49,6 +49,7 @@ extern const struct xfs_buf_ops xfs_inobt_buf_ops;
49extern const struct xfs_buf_ops xfs_inode_buf_ops; 49extern const struct xfs_buf_ops xfs_inode_buf_ops;
50extern const struct xfs_buf_ops xfs_inode_buf_ra_ops; 50extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
51extern const struct xfs_buf_ops xfs_dquot_buf_ops; 51extern const struct xfs_buf_ops xfs_dquot_buf_ops;
52extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops;
52extern const struct xfs_buf_ops xfs_sb_buf_ops; 53extern const struct xfs_buf_ops xfs_sb_buf_ops;
53extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; 54extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
54extern const struct xfs_buf_ops xfs_symlink_buf_ops; 55extern const struct xfs_buf_ops xfs_symlink_buf_ops;
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index cb6fd20a4d3d..2e2c6716b623 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -168,6 +168,7 @@ xfs_symlink_write_verify(
168} 168}
169 169
170const struct xfs_buf_ops xfs_symlink_buf_ops = { 170const struct xfs_buf_ops xfs_symlink_buf_ops = {
171 .name = "xfs_symlink",
171 .verify_read = xfs_symlink_read_verify, 172 .verify_read = xfs_symlink_read_verify,
172 .verify_write = xfs_symlink_write_verify, 173 .verify_write = xfs_symlink_write_verify,
173}; 174};
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 29e7e5dd5178..379c089fb051 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1917,6 +1917,7 @@ xfs_vm_readpage(
1917 struct file *unused, 1917 struct file *unused,
1918 struct page *page) 1918 struct page *page)
1919{ 1919{
1920 trace_xfs_vm_readpage(page->mapping->host, 1);
1920 return mpage_readpage(page, xfs_get_blocks); 1921 return mpage_readpage(page, xfs_get_blocks);
1921} 1922}
1922 1923
@@ -1927,6 +1928,7 @@ xfs_vm_readpages(
1927 struct list_head *pages, 1928 struct list_head *pages,
1928 unsigned nr_pages) 1929 unsigned nr_pages)
1929{ 1930{
1931 trace_xfs_vm_readpages(mapping->host, nr_pages);
1930 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1932 return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
1931} 1933}
1932 1934
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index dbae6490a79a..45ec9e40150c 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -91,32 +91,32 @@ xfs_zero_extent(
91 * last due to locking considerations. We never free any extents in 91 * last due to locking considerations. We never free any extents in
92 * the first transaction. 92 * the first transaction.
93 * 93 *
94 * Return 1 if the given transaction was committed and a new one 94 * If an inode *ip is provided, rejoin it to the transaction if
95 * started, and 0 otherwise in the committed parameter. 95 * the transaction was committed.
96 */ 96 */
97int /* error */ 97int /* error */
98xfs_bmap_finish( 98xfs_bmap_finish(
99 struct xfs_trans **tp, /* transaction pointer addr */ 99 struct xfs_trans **tp, /* transaction pointer addr */
100 struct xfs_bmap_free *flist, /* i/o: list extents to free */ 100 struct xfs_bmap_free *flist, /* i/o: list extents to free */
101 int *committed)/* xact committed or not */ 101 struct xfs_inode *ip)
102{ 102{
103 struct xfs_efd_log_item *efd; /* extent free data */ 103 struct xfs_efd_log_item *efd; /* extent free data */
104 struct xfs_efi_log_item *efi; /* extent free intention */ 104 struct xfs_efi_log_item *efi; /* extent free intention */
105 int error; /* error return value */ 105 int error; /* error return value */
106 int committed;/* xact committed or not */
106 struct xfs_bmap_free_item *free; /* free extent item */ 107 struct xfs_bmap_free_item *free; /* free extent item */
107 struct xfs_bmap_free_item *next; /* next item on free list */ 108 struct xfs_bmap_free_item *next; /* next item on free list */
108 109
109 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES); 110 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
110 if (flist->xbf_count == 0) { 111 if (flist->xbf_count == 0)
111 *committed = 0;
112 return 0; 112 return 0;
113 } 113
114 efi = xfs_trans_get_efi(*tp, flist->xbf_count); 114 efi = xfs_trans_get_efi(*tp, flist->xbf_count);
115 for (free = flist->xbf_first; free; free = free->xbfi_next) 115 for (free = flist->xbf_first; free; free = free->xbfi_next)
116 xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock, 116 xfs_trans_log_efi_extent(*tp, efi, free->xbfi_startblock,
117 free->xbfi_blockcount); 117 free->xbfi_blockcount);
118 118
119 error = __xfs_trans_roll(tp, NULL, committed); 119 error = __xfs_trans_roll(tp, ip, &committed);
120 if (error) { 120 if (error) {
121 /* 121 /*
122 * If the transaction was committed, drop the EFD reference 122 * If the transaction was committed, drop the EFD reference
@@ -128,16 +128,13 @@ xfs_bmap_finish(
128 * transaction so we should return committed=1 even though we're 128 * transaction so we should return committed=1 even though we're
129 * returning an error. 129 * returning an error.
130 */ 130 */
131 if (*committed) { 131 if (committed) {
132 xfs_efi_release(efi); 132 xfs_efi_release(efi);
133 xfs_force_shutdown((*tp)->t_mountp, 133 xfs_force_shutdown((*tp)->t_mountp,
134 (error == -EFSCORRUPTED) ? 134 (error == -EFSCORRUPTED) ?
135 SHUTDOWN_CORRUPT_INCORE : 135 SHUTDOWN_CORRUPT_INCORE :
136 SHUTDOWN_META_IO_ERROR); 136 SHUTDOWN_META_IO_ERROR);
137 } else {
138 *committed = 1;
139 } 137 }
140
141 return error; 138 return error;
142 } 139 }
143 140
@@ -969,7 +966,6 @@ xfs_alloc_file_space(
969 xfs_bmbt_irec_t imaps[1], *imapp; 966 xfs_bmbt_irec_t imaps[1], *imapp;
970 xfs_bmap_free_t free_list; 967 xfs_bmap_free_t free_list;
971 uint qblocks, resblks, resrtextents; 968 uint qblocks, resblks, resrtextents;
972 int committed;
973 int error; 969 int error;
974 970
975 trace_xfs_alloc_file_space(ip); 971 trace_xfs_alloc_file_space(ip);
@@ -1064,23 +1060,20 @@ xfs_alloc_file_space(
1064 error = xfs_bmapi_write(tp, ip, startoffset_fsb, 1060 error = xfs_bmapi_write(tp, ip, startoffset_fsb,
1065 allocatesize_fsb, alloc_type, &firstfsb, 1061 allocatesize_fsb, alloc_type, &firstfsb,
1066 resblks, imapp, &nimaps, &free_list); 1062 resblks, imapp, &nimaps, &free_list);
1067 if (error) { 1063 if (error)
1068 goto error0; 1064 goto error0;
1069 }
1070 1065
1071 /* 1066 /*
1072 * Complete the transaction 1067 * Complete the transaction
1073 */ 1068 */
1074 error = xfs_bmap_finish(&tp, &free_list, &committed); 1069 error = xfs_bmap_finish(&tp, &free_list, NULL);
1075 if (error) { 1070 if (error)
1076 goto error0; 1071 goto error0;
1077 }
1078 1072
1079 error = xfs_trans_commit(tp); 1073 error = xfs_trans_commit(tp);
1080 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1074 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1081 if (error) { 1075 if (error)
1082 break; 1076 break;
1083 }
1084 1077
1085 allocated_fsb = imapp->br_blockcount; 1078 allocated_fsb = imapp->br_blockcount;
1086 1079
@@ -1206,7 +1199,6 @@ xfs_free_file_space(
1206 xfs_off_t offset, 1199 xfs_off_t offset,
1207 xfs_off_t len) 1200 xfs_off_t len)
1208{ 1201{
1209 int committed;
1210 int done; 1202 int done;
1211 xfs_fileoff_t endoffset_fsb; 1203 xfs_fileoff_t endoffset_fsb;
1212 int error; 1204 int error;
@@ -1346,17 +1338,15 @@ xfs_free_file_space(
1346 error = xfs_bunmapi(tp, ip, startoffset_fsb, 1338 error = xfs_bunmapi(tp, ip, startoffset_fsb,
1347 endoffset_fsb - startoffset_fsb, 1339 endoffset_fsb - startoffset_fsb,
1348 0, 2, &firstfsb, &free_list, &done); 1340 0, 2, &firstfsb, &free_list, &done);
1349 if (error) { 1341 if (error)
1350 goto error0; 1342 goto error0;
1351 }
1352 1343
1353 /* 1344 /*
1354 * complete the transaction 1345 * complete the transaction
1355 */ 1346 */
1356 error = xfs_bmap_finish(&tp, &free_list, &committed); 1347 error = xfs_bmap_finish(&tp, &free_list, NULL);
1357 if (error) { 1348 if (error)
1358 goto error0; 1349 goto error0;
1359 }
1360 1350
1361 error = xfs_trans_commit(tp); 1351 error = xfs_trans_commit(tp);
1362 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1352 xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -1434,7 +1424,6 @@ xfs_shift_file_space(
1434 int error; 1424 int error;
1435 struct xfs_bmap_free free_list; 1425 struct xfs_bmap_free free_list;
1436 xfs_fsblock_t first_block; 1426 xfs_fsblock_t first_block;
1437 int committed;
1438 xfs_fileoff_t stop_fsb; 1427 xfs_fileoff_t stop_fsb;
1439 xfs_fileoff_t next_fsb; 1428 xfs_fileoff_t next_fsb;
1440 xfs_fileoff_t shift_fsb; 1429 xfs_fileoff_t shift_fsb;
@@ -1526,7 +1515,7 @@ xfs_shift_file_space(
1526 if (error) 1515 if (error)
1527 goto out_bmap_cancel; 1516 goto out_bmap_cancel;
1528 1517
1529 error = xfs_bmap_finish(&tp, &free_list, &committed); 1518 error = xfs_bmap_finish(&tp, &free_list, NULL);
1530 if (error) 1519 if (error)
1531 goto out_bmap_cancel; 1520 goto out_bmap_cancel;
1532 1521
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index ace91e7c713e..daed4bfb85b2 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -604,6 +604,13 @@ found:
604 } 604 }
605 } 605 }
606 606
607 /*
608 * Clear b_error if this is a lookup from a caller that doesn't expect
609 * valid data to be found in the buffer.
610 */
611 if (!(flags & XBF_READ))
612 xfs_buf_ioerror(bp, 0);
613
607 XFS_STATS_INC(target->bt_mount, xb_get); 614 XFS_STATS_INC(target->bt_mount, xb_get);
608 trace_xfs_buf_get(bp, flags, _RET_IP_); 615 trace_xfs_buf_get(bp, flags, _RET_IP_);
609 return bp; 616 return bp;
@@ -1045,7 +1052,7 @@ xfs_buf_ioend_work(
1045 xfs_buf_ioend(bp); 1052 xfs_buf_ioend(bp);
1046} 1053}
1047 1054
1048void 1055static void
1049xfs_buf_ioend_async( 1056xfs_buf_ioend_async(
1050 struct xfs_buf *bp) 1057 struct xfs_buf *bp)
1051{ 1058{
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index c79b717d9b88..c75721acd867 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -132,6 +132,7 @@ struct xfs_buf_map {
132 struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) }; 132 struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) };
133 133
134struct xfs_buf_ops { 134struct xfs_buf_ops {
135 char *name;
135 void (*verify_read)(struct xfs_buf *); 136 void (*verify_read)(struct xfs_buf *);
136 void (*verify_write)(struct xfs_buf *); 137 void (*verify_write)(struct xfs_buf *);
137}; 138};
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 7ac6c5c586cb..9c44d38dcd1f 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -306,7 +306,7 @@ xfs_qm_dqalloc(
306 xfs_fsblock_t firstblock; 306 xfs_fsblock_t firstblock;
307 xfs_bmap_free_t flist; 307 xfs_bmap_free_t flist;
308 xfs_bmbt_irec_t map; 308 xfs_bmbt_irec_t map;
309 int nmaps, error, committed; 309 int nmaps, error;
310 xfs_buf_t *bp; 310 xfs_buf_t *bp;
311 xfs_trans_t *tp = *tpp; 311 xfs_trans_t *tp = *tpp;
312 312
@@ -379,11 +379,12 @@ xfs_qm_dqalloc(
379 379
380 xfs_trans_bhold(tp, bp); 380 xfs_trans_bhold(tp, bp);
381 381
382 if ((error = xfs_bmap_finish(tpp, &flist, &committed))) { 382 error = xfs_bmap_finish(tpp, &flist, NULL);
383 if (error)
383 goto error1; 384 goto error1;
384 }
385 385
386 if (committed) { 386 /* Transaction was committed? */
387 if (*tpp != tp) {
387 tp = *tpp; 388 tp = *tpp;
388 xfs_trans_bjoin(tp, bp); 389 xfs_trans_bjoin(tp, bp);
389 } else { 390 } else {
@@ -393,9 +394,9 @@ xfs_qm_dqalloc(
393 *O_bpp = bp; 394 *O_bpp = bp;
394 return 0; 395 return 0;
395 396
396 error1: 397error1:
397 xfs_bmap_cancel(&flist); 398 xfs_bmap_cancel(&flist);
398 error0: 399error0:
399 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 400 xfs_iunlock(quotip, XFS_ILOCK_EXCL);
400 401
401 return error; 402 return error;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 74d0e5966ebc..88693a98fac5 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -164,9 +164,9 @@ xfs_verifier_error(
164{ 164{
165 struct xfs_mount *mp = bp->b_target->bt_mount; 165 struct xfs_mount *mp = bp->b_target->bt_mount;
166 166
167 xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx", 167 xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
168 bp->b_error == -EFSBADCRC ? "CRC error" : "corruption", 168 bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
169 __return_address, bp->b_bn); 169 __return_address, bp->b_ops->name, bp->b_bn);
170 170
171 xfs_alert(mp, "Unmount and run xfs_repair"); 171 xfs_alert(mp, "Unmount and run xfs_repair");
172 172
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f5392ab2def1..ebe9b8290a70 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -402,19 +402,26 @@ xfs_file_splice_read(
402 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 402 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
403 return -EIO; 403 return -EIO;
404 404
405 xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
406
407 trace_xfs_file_splice_read(ip, count, *ppos, ioflags); 405 trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
408 406
409 /* for dax, we need to avoid the page cache */ 407 /*
410 if (IS_DAX(VFS_I(ip))) 408 * DAX inodes cannot ues the page cache for splice, so we have to push
411 ret = default_file_splice_read(infilp, ppos, pipe, count, flags); 409 * them through the VFS IO path. This means it goes through
412 else 410 * ->read_iter, which for us takes the XFS_IOLOCK_SHARED. Hence we
413 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); 411 * cannot lock the splice operation at this level for DAX inodes.
414 if (ret > 0) 412 */
415 XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret); 413 if (IS_DAX(VFS_I(ip))) {
414 ret = default_file_splice_read(infilp, ppos, pipe, count,
415 flags);
416 goto out;
417 }
416 418
419 xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
420 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
417 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); 421 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
422out:
423 if (ret > 0)
424 XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
418 return ret; 425 return ret;
419} 426}
420 427
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8ee393996b7d..ae3758a90ed6 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1143,7 +1143,6 @@ xfs_create(
1143 xfs_bmap_free_t free_list; 1143 xfs_bmap_free_t free_list;
1144 xfs_fsblock_t first_block; 1144 xfs_fsblock_t first_block;
1145 bool unlock_dp_on_error = false; 1145 bool unlock_dp_on_error = false;
1146 int committed;
1147 prid_t prid; 1146 prid_t prid;
1148 struct xfs_dquot *udqp = NULL; 1147 struct xfs_dquot *udqp = NULL;
1149 struct xfs_dquot *gdqp = NULL; 1148 struct xfs_dquot *gdqp = NULL;
@@ -1226,7 +1225,7 @@ xfs_create(
1226 * pointing to itself. 1225 * pointing to itself.
1227 */ 1226 */
1228 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, 1227 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev,
1229 prid, resblks > 0, &ip, &committed); 1228 prid, resblks > 0, &ip, NULL);
1230 if (error) 1229 if (error)
1231 goto out_trans_cancel; 1230 goto out_trans_cancel;
1232 1231
@@ -1275,7 +1274,7 @@ xfs_create(
1275 */ 1274 */
1276 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); 1275 xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
1277 1276
1278 error = xfs_bmap_finish(&tp, &free_list, &committed); 1277 error = xfs_bmap_finish(&tp, &free_list, NULL);
1279 if (error) 1278 if (error)
1280 goto out_bmap_cancel; 1279 goto out_bmap_cancel;
1281 1280
@@ -1427,7 +1426,6 @@ xfs_link(
1427 int error; 1426 int error;
1428 xfs_bmap_free_t free_list; 1427 xfs_bmap_free_t free_list;
1429 xfs_fsblock_t first_block; 1428 xfs_fsblock_t first_block;
1430 int committed;
1431 int resblks; 1429 int resblks;
1432 1430
1433 trace_xfs_link(tdp, target_name); 1431 trace_xfs_link(tdp, target_name);
@@ -1502,11 +1500,10 @@ xfs_link(
1502 * link transaction goes to disk before returning to 1500 * link transaction goes to disk before returning to
1503 * the user. 1501 * the user.
1504 */ 1502 */
1505 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { 1503 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
1506 xfs_trans_set_sync(tp); 1504 xfs_trans_set_sync(tp);
1507 }
1508 1505
1509 error = xfs_bmap_finish (&tp, &free_list, &committed); 1506 error = xfs_bmap_finish(&tp, &free_list, NULL);
1510 if (error) { 1507 if (error) {
1511 xfs_bmap_cancel(&free_list); 1508 xfs_bmap_cancel(&free_list);
1512 goto error_return; 1509 goto error_return;
@@ -1555,7 +1552,6 @@ xfs_itruncate_extents(
1555 xfs_fileoff_t first_unmap_block; 1552 xfs_fileoff_t first_unmap_block;
1556 xfs_fileoff_t last_block; 1553 xfs_fileoff_t last_block;
1557 xfs_filblks_t unmap_len; 1554 xfs_filblks_t unmap_len;
1558 int committed;
1559 int error = 0; 1555 int error = 0;
1560 int done = 0; 1556 int done = 0;
1561 1557
@@ -1601,9 +1597,7 @@ xfs_itruncate_extents(
1601 * Duplicate the transaction that has the permanent 1597 * Duplicate the transaction that has the permanent
1602 * reservation and commit the old transaction. 1598 * reservation and commit the old transaction.
1603 */ 1599 */
1604 error = xfs_bmap_finish(&tp, &free_list, &committed); 1600 error = xfs_bmap_finish(&tp, &free_list, ip);
1605 if (committed)
1606 xfs_trans_ijoin(tp, ip, 0);
1607 if (error) 1601 if (error)
1608 goto out_bmap_cancel; 1602 goto out_bmap_cancel;
1609 1603
@@ -1774,7 +1768,6 @@ xfs_inactive_ifree(
1774{ 1768{
1775 xfs_bmap_free_t free_list; 1769 xfs_bmap_free_t free_list;
1776 xfs_fsblock_t first_block; 1770 xfs_fsblock_t first_block;
1777 int committed;
1778 struct xfs_mount *mp = ip->i_mount; 1771 struct xfs_mount *mp = ip->i_mount;
1779 struct xfs_trans *tp; 1772 struct xfs_trans *tp;
1780 int error; 1773 int error;
@@ -1841,7 +1834,7 @@ xfs_inactive_ifree(
1841 * Just ignore errors at this point. There is nothing we can do except 1834 * Just ignore errors at this point. There is nothing we can do except
1842 * to try to keep going. Make sure it's not a silent error. 1835 * to try to keep going. Make sure it's not a silent error.
1843 */ 1836 */
1844 error = xfs_bmap_finish(&tp, &free_list, &committed); 1837 error = xfs_bmap_finish(&tp, &free_list, NULL);
1845 if (error) { 1838 if (error) {
1846 xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", 1839 xfs_notice(mp, "%s: xfs_bmap_finish returned error %d",
1847 __func__, error); 1840 __func__, error);
@@ -2523,7 +2516,6 @@ xfs_remove(
2523 int error = 0; 2516 int error = 0;
2524 xfs_bmap_free_t free_list; 2517 xfs_bmap_free_t free_list;
2525 xfs_fsblock_t first_block; 2518 xfs_fsblock_t first_block;
2526 int committed;
2527 uint resblks; 2519 uint resblks;
2528 2520
2529 trace_xfs_remove(dp, name); 2521 trace_xfs_remove(dp, name);
@@ -2624,7 +2616,7 @@ xfs_remove(
2624 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 2616 if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
2625 xfs_trans_set_sync(tp); 2617 xfs_trans_set_sync(tp);
2626 2618
2627 error = xfs_bmap_finish(&tp, &free_list, &committed); 2619 error = xfs_bmap_finish(&tp, &free_list, NULL);
2628 if (error) 2620 if (error)
2629 goto out_bmap_cancel; 2621 goto out_bmap_cancel;
2630 2622
@@ -2701,7 +2693,6 @@ xfs_finish_rename(
2701 struct xfs_trans *tp, 2693 struct xfs_trans *tp,
2702 struct xfs_bmap_free *free_list) 2694 struct xfs_bmap_free *free_list)
2703{ 2695{
2704 int committed = 0;
2705 int error; 2696 int error;
2706 2697
2707 /* 2698 /*
@@ -2711,7 +2702,7 @@ xfs_finish_rename(
2711 if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) 2702 if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
2712 xfs_trans_set_sync(tp); 2703 xfs_trans_set_sync(tp);
2713 2704
2714 error = xfs_bmap_finish(&tp, free_list, &committed); 2705 error = xfs_bmap_finish(&tp, free_list, NULL);
2715 if (error) { 2706 if (error) {
2716 xfs_bmap_cancel(free_list); 2707 xfs_bmap_cancel(free_list);
2717 xfs_trans_cancel(tp); 2708 xfs_trans_cancel(tp);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index f4f5b43cf647..d81bdc080370 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -129,7 +129,6 @@ xfs_iomap_write_direct(
129 xfs_trans_t *tp; 129 xfs_trans_t *tp;
130 xfs_bmap_free_t free_list; 130 xfs_bmap_free_t free_list;
131 uint qblocks, resblks, resrtextents; 131 uint qblocks, resblks, resrtextents;
132 int committed;
133 int error; 132 int error;
134 int lockmode; 133 int lockmode;
135 int bmapi_flags = XFS_BMAPI_PREALLOC; 134 int bmapi_flags = XFS_BMAPI_PREALLOC;
@@ -203,15 +202,20 @@ xfs_iomap_write_direct(
203 * this outside the transaction context, but if we commit and then crash 202 * this outside the transaction context, but if we commit and then crash
204 * we may not have zeroed the blocks and this will be exposed on 203 * we may not have zeroed the blocks and this will be exposed on
205 * recovery of the allocation. Hence we must zero before commit. 204 * recovery of the allocation. Hence we must zero before commit.
205 *
206 * Further, if we are mapping unwritten extents here, we need to zero 206 * Further, if we are mapping unwritten extents here, we need to zero
207 * and convert them to written so that we don't need an unwritten extent 207 * and convert them to written so that we don't need an unwritten extent
208 * callback for DAX. This also means that we need to be able to dip into 208 * callback for DAX. This also means that we need to be able to dip into
209 * the reserve block pool if there is no space left but we need to do 209 * the reserve block pool for bmbt block allocation if there is no space
210 * unwritten extent conversion. 210 * left but we need to do unwritten extent conversion.
211 */ 211 */
212
212 if (IS_DAX(VFS_I(ip))) { 213 if (IS_DAX(VFS_I(ip))) {
213 bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO; 214 bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
214 tp->t_flags |= XFS_TRANS_RESERVE; 215 if (ISUNWRITTEN(imap)) {
216 tp->t_flags |= XFS_TRANS_RESERVE;
217 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
218 }
215 } 219 }
216 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 220 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
217 resblks, resrtextents); 221 resblks, resrtextents);
@@ -247,7 +251,7 @@ xfs_iomap_write_direct(
247 /* 251 /*
248 * Complete the transaction 252 * Complete the transaction
249 */ 253 */
250 error = xfs_bmap_finish(&tp, &free_list, &committed); 254 error = xfs_bmap_finish(&tp, &free_list, NULL);
251 if (error) 255 if (error)
252 goto out_bmap_cancel; 256 goto out_bmap_cancel;
253 257
@@ -693,7 +697,7 @@ xfs_iomap_write_allocate(
693 xfs_bmap_free_t free_list; 697 xfs_bmap_free_t free_list;
694 xfs_filblks_t count_fsb; 698 xfs_filblks_t count_fsb;
695 xfs_trans_t *tp; 699 xfs_trans_t *tp;
696 int nimaps, committed; 700 int nimaps;
697 int error = 0; 701 int error = 0;
698 int nres; 702 int nres;
699 703
@@ -794,7 +798,7 @@ xfs_iomap_write_allocate(
794 if (error) 798 if (error)
795 goto trans_cancel; 799 goto trans_cancel;
796 800
797 error = xfs_bmap_finish(&tp, &free_list, &committed); 801 error = xfs_bmap_finish(&tp, &free_list, NULL);
798 if (error) 802 if (error)
799 goto trans_cancel; 803 goto trans_cancel;
800 804
@@ -852,7 +856,6 @@ xfs_iomap_write_unwritten(
852 xfs_bmap_free_t free_list; 856 xfs_bmap_free_t free_list;
853 xfs_fsize_t i_size; 857 xfs_fsize_t i_size;
854 uint resblks; 858 uint resblks;
855 int committed;
856 int error; 859 int error;
857 860
858 trace_xfs_unwritten_convert(ip, offset, count); 861 trace_xfs_unwritten_convert(ip, offset, count);
@@ -924,7 +927,7 @@ xfs_iomap_write_unwritten(
924 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 927 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
925 } 928 }
926 929
927 error = xfs_bmap_finish(&tp, &free_list, &committed); 930 error = xfs_bmap_finish(&tp, &free_list, NULL);
928 if (error) 931 if (error)
929 goto error_on_bmapi_transaction; 932 goto error_on_bmapi_transaction;
930 933
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f52c72a1a06f..9c9a1c9bcc7f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1188,10 +1188,16 @@ xlog_iodone(xfs_buf_t *bp)
1188 int aborted = 0; 1188 int aborted = 0;
1189 1189
1190 /* 1190 /*
1191 * Race to shutdown the filesystem if we see an error. 1191 * Race to shutdown the filesystem if we see an error or the iclog is in
1192 * IOABORT state. The IOABORT state is only set in DEBUG mode to inject
1193 * CRC errors into log recovery.
1192 */ 1194 */
1193 if (XFS_TEST_ERROR(bp->b_error, l->l_mp, 1195 if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR,
1194 XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) { 1196 XFS_RANDOM_IODONE_IOERR) ||
1197 iclog->ic_state & XLOG_STATE_IOABORT) {
1198 if (iclog->ic_state & XLOG_STATE_IOABORT)
1199 iclog->ic_state &= ~XLOG_STATE_IOABORT;
1200
1195 xfs_buf_ioerror_alert(bp, __func__); 1201 xfs_buf_ioerror_alert(bp, __func__);
1196 xfs_buf_stale(bp); 1202 xfs_buf_stale(bp);
1197 xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR); 1203 xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
@@ -1838,6 +1844,23 @@ xlog_sync(
1838 /* calculcate the checksum */ 1844 /* calculcate the checksum */
1839 iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header, 1845 iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
1840 iclog->ic_datap, size); 1846 iclog->ic_datap, size);
1847#ifdef DEBUG
1848 /*
1849 * Intentionally corrupt the log record CRC based on the error injection
1850 * frequency, if defined. This facilitates testing log recovery in the
1851 * event of torn writes. Hence, set the IOABORT state to abort the log
1852 * write on I/O completion and shutdown the fs. The subsequent mount
1853 * detects the bad CRC and attempts to recover.
1854 */
1855 if (log->l_badcrc_factor &&
1856 (prandom_u32() % log->l_badcrc_factor == 0)) {
1857 iclog->ic_header.h_crc &= 0xAAAAAAAA;
1858 iclog->ic_state |= XLOG_STATE_IOABORT;
1859 xfs_warn(log->l_mp,
1860 "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
1861 be64_to_cpu(iclog->ic_header.h_lsn));
1862 }
1863#endif
1841 1864
1842 bp->b_io_length = BTOBB(count); 1865 bp->b_io_length = BTOBB(count);
1843 bp->b_fspriv = iclog; 1866 bp->b_fspriv = iclog;
@@ -2045,12 +2068,14 @@ xlog_print_tic_res(
2045 "QM_DQCLUSTER", 2068 "QM_DQCLUSTER",
2046 "QM_QINOCREATE", 2069 "QM_QINOCREATE",
2047 "QM_QUOTAOFF_END", 2070 "QM_QUOTAOFF_END",
2048 "SB_UNIT",
2049 "FSYNC_TS", 2071 "FSYNC_TS",
2050 "GROWFSRT_ALLOC", 2072 "GROWFSRT_ALLOC",
2051 "GROWFSRT_ZERO", 2073 "GROWFSRT_ZERO",
2052 "GROWFSRT_FREE", 2074 "GROWFSRT_FREE",
2053 "SWAPEXT" 2075 "SWAPEXT",
2076 "CHECKPOINT",
2077 "ICREATE",
2078 "CREATE_TMPFILE"
2054 }; 2079 };
2055 2080
2056 xfs_warn(mp, "xlog_write: reservation summary:"); 2081 xfs_warn(mp, "xlog_write: reservation summary:");
@@ -2791,11 +2816,19 @@ xlog_state_do_callback(
2791 } 2816 }
2792 } while (!ioerrors && loopdidcallbacks); 2817 } while (!ioerrors && loopdidcallbacks);
2793 2818
2819#ifdef DEBUG
2794 /* 2820 /*
2795 * make one last gasp attempt to see if iclogs are being left in 2821 * Make one last gasp attempt to see if iclogs are being left in limbo.
2796 * limbo.. 2822 * If the above loop finds an iclog earlier than the current iclog and
2823 * in one of the syncing states, the current iclog is put into
2824 * DO_CALLBACK and the callbacks are deferred to the completion of the
2825 * earlier iclog. Walk the iclogs in order and make sure that no iclog
2826 * is in DO_CALLBACK unless an earlier iclog is in one of the syncing
2827 * states.
2828 *
2829 * Note that SYNCING|IOABORT is a valid state so we cannot just check
2830 * for ic_state == SYNCING.
2797 */ 2831 */
2798#ifdef DEBUG
2799 if (funcdidcallbacks) { 2832 if (funcdidcallbacks) {
2800 first_iclog = iclog = log->l_iclog; 2833 first_iclog = iclog = log->l_iclog;
2801 do { 2834 do {
@@ -2810,7 +2843,7 @@ xlog_state_do_callback(
2810 * IOERROR - give up hope all ye who enter here 2843 * IOERROR - give up hope all ye who enter here
2811 */ 2844 */
2812 if (iclog->ic_state == XLOG_STATE_WANT_SYNC || 2845 if (iclog->ic_state == XLOG_STATE_WANT_SYNC ||
2813 iclog->ic_state == XLOG_STATE_SYNCING || 2846 iclog->ic_state & XLOG_STATE_SYNCING ||
2814 iclog->ic_state == XLOG_STATE_DONE_SYNC || 2847 iclog->ic_state == XLOG_STATE_DONE_SYNC ||
2815 iclog->ic_state == XLOG_STATE_IOERROR ) 2848 iclog->ic_state == XLOG_STATE_IOERROR )
2816 break; 2849 break;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8daba7491b13..ed8896310c00 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -62,6 +62,7 @@ static inline uint xlog_get_client_id(__be32 i)
62#define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */ 62#define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */
63#define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/ 63#define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/
64#define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */ 64#define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */
65#define XLOG_STATE_IOABORT 0x0100 /* force abort on I/O completion (debug) */
65#define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */ 66#define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */
66#define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */ 67#define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */
67 68
@@ -410,6 +411,8 @@ struct xlog {
410 /* The following field are used for debugging; need to hold icloglock */ 411 /* The following field are used for debugging; need to hold icloglock */
411#ifdef DEBUG 412#ifdef DEBUG
412 void *l_iclog_bak[XLOG_MAX_ICLOGS]; 413 void *l_iclog_bak[XLOG_MAX_ICLOGS];
414 /* log record crc error injection factor */
415 uint32_t l_badcrc_factor;
413#endif 416#endif
414 417
415}; 418};
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index c5ecaacdd218..da37beb76f6e 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -61,6 +61,9 @@ xlog_recover_check_summary(
61#else 61#else
62#define xlog_recover_check_summary(log) 62#define xlog_recover_check_summary(log)
63#endif 63#endif
64STATIC int
65xlog_do_recovery_pass(
66 struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *);
64 67
65/* 68/*
66 * This structure is used during recovery to record the buf log items which 69 * This structure is used during recovery to record the buf log items which
@@ -868,6 +871,351 @@ validate_head:
868} 871}
869 872
870/* 873/*
874 * Seek backwards in the log for log record headers.
875 *
876 * Given a starting log block, walk backwards until we find the provided number
877 * of records or hit the provided tail block. The return value is the number of
878 * records encountered or a negative error code. The log block and buffer
879 * pointer of the last record seen are returned in rblk and rhead respectively.
880 */
881STATIC int
882xlog_rseek_logrec_hdr(
883 struct xlog *log,
884 xfs_daddr_t head_blk,
885 xfs_daddr_t tail_blk,
886 int count,
887 struct xfs_buf *bp,
888 xfs_daddr_t *rblk,
889 struct xlog_rec_header **rhead,
890 bool *wrapped)
891{
892 int i;
893 int error;
894 int found = 0;
895 char *offset = NULL;
896 xfs_daddr_t end_blk;
897
898 *wrapped = false;
899
900 /*
901 * Walk backwards from the head block until we hit the tail or the first
902 * block in the log.
903 */
904 end_blk = head_blk > tail_blk ? tail_blk : 0;
905 for (i = (int) head_blk - 1; i >= end_blk; i--) {
906 error = xlog_bread(log, i, 1, bp, &offset);
907 if (error)
908 goto out_error;
909
910 if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
911 *rblk = i;
912 *rhead = (struct xlog_rec_header *) offset;
913 if (++found == count)
914 break;
915 }
916 }
917
918 /*
919 * If we haven't hit the tail block or the log record header count,
920 * start looking again from the end of the physical log. Note that
921 * callers can pass head == tail if the tail is not yet known.
922 */
923 if (tail_blk >= head_blk && found != count) {
924 for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) {
925 error = xlog_bread(log, i, 1, bp, &offset);
926 if (error)
927 goto out_error;
928
929 if (*(__be32 *)offset ==
930 cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
931 *wrapped = true;
932 *rblk = i;
933 *rhead = (struct xlog_rec_header *) offset;
934 if (++found == count)
935 break;
936 }
937 }
938 }
939
940 return found;
941
942out_error:
943 return error;
944}
945
946/*
947 * Seek forward in the log for log record headers.
948 *
949 * Given head and tail blocks, walk forward from the tail block until we find
950 * the provided number of records or hit the head block. The return value is the
951 * number of records encountered or a negative error code. The log block and
952 * buffer pointer of the last record seen are returned in rblk and rhead
953 * respectively.
954 */
955STATIC int
956xlog_seek_logrec_hdr(
957 struct xlog *log,
958 xfs_daddr_t head_blk,
959 xfs_daddr_t tail_blk,
960 int count,
961 struct xfs_buf *bp,
962 xfs_daddr_t *rblk,
963 struct xlog_rec_header **rhead,
964 bool *wrapped)
965{
966 int i;
967 int error;
968 int found = 0;
969 char *offset = NULL;
970 xfs_daddr_t end_blk;
971
972 *wrapped = false;
973
974 /*
975 * Walk forward from the tail block until we hit the head or the last
976 * block in the log.
977 */
978 end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1;
979 for (i = (int) tail_blk; i <= end_blk; i++) {
980 error = xlog_bread(log, i, 1, bp, &offset);
981 if (error)
982 goto out_error;
983
984 if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
985 *rblk = i;
986 *rhead = (struct xlog_rec_header *) offset;
987 if (++found == count)
988 break;
989 }
990 }
991
992 /*
993 * If we haven't hit the head block or the log record header count,
994 * start looking again from the start of the physical log.
995 */
996 if (tail_blk > head_blk && found != count) {
997 for (i = 0; i < (int) head_blk; i++) {
998 error = xlog_bread(log, i, 1, bp, &offset);
999 if (error)
1000 goto out_error;
1001
1002 if (*(__be32 *)offset ==
1003 cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
1004 *wrapped = true;
1005 *rblk = i;
1006 *rhead = (struct xlog_rec_header *) offset;
1007 if (++found == count)
1008 break;
1009 }
1010 }
1011 }
1012
1013 return found;
1014
1015out_error:
1016 return error;
1017}
1018
1019/*
1020 * Check the log tail for torn writes. This is required when torn writes are
1021 * detected at the head and the head had to be walked back to a previous record.
1022 * The tail of the previous record must now be verified to ensure the torn
1023 * writes didn't corrupt the previous tail.
1024 *
1025 * Return an error if CRC verification fails as recovery cannot proceed.
1026 */
1027STATIC int
1028xlog_verify_tail(
1029 struct xlog *log,
1030 xfs_daddr_t head_blk,
1031 xfs_daddr_t tail_blk)
1032{
1033 struct xlog_rec_header *thead;
1034 struct xfs_buf *bp;
1035 xfs_daddr_t first_bad;
1036 int count;
1037 int error = 0;
1038 bool wrapped;
1039 xfs_daddr_t tmp_head;
1040
1041 bp = xlog_get_bp(log, 1);
1042 if (!bp)
1043 return -ENOMEM;
1044
1045 /*
1046 * Seek XLOG_MAX_ICLOGS + 1 records past the current tail record to get
1047 * a temporary head block that points after the last possible
1048 * concurrently written record of the tail.
1049 */
1050 count = xlog_seek_logrec_hdr(log, head_blk, tail_blk,
1051 XLOG_MAX_ICLOGS + 1, bp, &tmp_head, &thead,
1052 &wrapped);
1053 if (count < 0) {
1054 error = count;
1055 goto out;
1056 }
1057
1058 /*
1059 * If the call above didn't find XLOG_MAX_ICLOGS + 1 records, we ran
1060 * into the actual log head. tmp_head points to the start of the record
1061 * so update it to the actual head block.
1062 */
1063 if (count < XLOG_MAX_ICLOGS + 1)
1064 tmp_head = head_blk;
1065
1066 /*
1067 * We now have a tail and temporary head block that covers at least
1068 * XLOG_MAX_ICLOGS records from the tail. We need to verify that these
1069 * records were completely written. Run a CRC verification pass from
1070 * tail to head and return the result.
1071 */
1072 error = xlog_do_recovery_pass(log, tmp_head, tail_blk,
1073 XLOG_RECOVER_CRCPASS, &first_bad);
1074
1075out:
1076 xlog_put_bp(bp);
1077 return error;
1078}
1079
1080/*
1081 * Detect and trim torn writes from the head of the log.
1082 *
1083 * Storage without sector atomicity guarantees can result in torn writes in the
1084 * log in the event of a crash. Our only means to detect this scenario is via
1085 * CRC verification. While we can't always be certain that CRC verification
1086 * failure is due to a torn write vs. an unrelated corruption, we do know that
1087 * only a certain number (XLOG_MAX_ICLOGS) of log records can be written out at
1088 * one time. Therefore, CRC verify up to XLOG_MAX_ICLOGS records at the head of
1089 * the log and treat failures in this range as torn writes as a matter of
1090 * policy. In the event of CRC failure, the head is walked back to the last good
1091 * record in the log and the tail is updated from that record and verified.
1092 */
1093STATIC int
1094xlog_verify_head(
1095 struct xlog *log,
1096 xfs_daddr_t *head_blk, /* in/out: unverified head */
1097 xfs_daddr_t *tail_blk, /* out: tail block */
1098 struct xfs_buf *bp,
1099 xfs_daddr_t *rhead_blk, /* start blk of last record */
1100 struct xlog_rec_header **rhead, /* ptr to last record */
1101 bool *wrapped) /* last rec. wraps phys. log */
1102{
1103 struct xlog_rec_header *tmp_rhead;
1104 struct xfs_buf *tmp_bp;
1105 xfs_daddr_t first_bad;
1106 xfs_daddr_t tmp_rhead_blk;
1107 int found;
1108 int error;
1109 bool tmp_wrapped;
1110
1111 /*
1112 * Search backwards through the log looking for the log record header
1113 * block. This wraps all the way back around to the head so something is
1114 * seriously wrong if we can't find it.
1115 */
1116 found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
1117 rhead, wrapped);
1118 if (found < 0)
1119 return found;
1120 if (!found) {
1121 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
1122 return -EIO;
1123 }
1124
1125 *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
1126
1127 /*
1128 * Now that we have a tail block, check the head of the log for torn
1129 * writes. Search again until we hit the tail or the maximum number of
1130 * log record I/Os that could have been in flight at one time. Use a
1131 * temporary buffer so we don't trash the rhead/bp pointer from the
1132 * call above.
1133 */
1134 tmp_bp = xlog_get_bp(log, 1);
1135 if (!tmp_bp)
1136 return -ENOMEM;
1137 error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk,
1138 XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk,
1139 &tmp_rhead, &tmp_wrapped);
1140 xlog_put_bp(tmp_bp);
1141 if (error < 0)
1142 return error;
1143
1144 /*
1145 * Now run a CRC verification pass over the records starting at the
1146 * block found above to the current head. If a CRC failure occurs, the
1147 * log block of the first bad record is saved in first_bad.
1148 */
1149 error = xlog_do_recovery_pass(log, *head_blk, tmp_rhead_blk,
1150 XLOG_RECOVER_CRCPASS, &first_bad);
1151 if (error == -EFSBADCRC) {
1152 /*
1153 * We've hit a potential torn write. Reset the error and warn
1154 * about it.
1155 */
1156 error = 0;
1157 xfs_warn(log->l_mp,
1158"Torn write (CRC failure) detected at log block 0x%llx. Truncating head block from 0x%llx.",
1159 first_bad, *head_blk);
1160
1161 /*
1162 * Get the header block and buffer pointer for the last good
1163 * record before the bad record.
1164 *
1165 * Note that xlog_find_tail() clears the blocks at the new head
1166 * (i.e., the records with invalid CRC) if the cycle number
1167 * matches the the current cycle.
1168 */
1169 found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp,
1170 rhead_blk, rhead, wrapped);
1171 if (found < 0)
1172 return found;
1173 if (found == 0) /* XXX: right thing to do here? */
1174 return -EIO;
1175
1176 /*
1177 * Reset the head block to the starting block of the first bad
1178 * log record and set the tail block based on the last good
1179 * record.
1180 *
1181 * Bail out if the updated head/tail match as this indicates
1182 * possible corruption outside of the acceptable
1183 * (XLOG_MAX_ICLOGS) range. This is a job for xfs_repair...
1184 */
1185 *head_blk = first_bad;
1186 *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
1187 if (*head_blk == *tail_blk) {
1188 ASSERT(0);
1189 return 0;
1190 }
1191
1192 /*
1193 * Now verify the tail based on the updated head. This is
1194 * required because the torn writes trimmed from the head could
1195 * have been written over the tail of a previous record. Return
1196 * any errors since recovery cannot proceed if the tail is
1197 * corrupt.
1198 *
1199 * XXX: This leaves a gap in truly robust protection from torn
1200 * writes in the log. If the head is behind the tail, the tail
1201 * pushes forward to create some space and then a crash occurs
1202 * causing the writes into the previous record's tail region to
1203 * tear, log recovery isn't able to recover.
1204 *
1205 * How likely is this to occur? If possible, can we do something
1206 * more intelligent here? Is it safe to push the tail forward if
1207 * we can determine that the tail is within the range of the
1208 * torn write (e.g., the kernel can only overwrite the tail if
1209 * it has actually been pushed forward)? Alternatively, could we
1210 * somehow prevent this condition at runtime?
1211 */
1212 error = xlog_verify_tail(log, *head_blk, *tail_blk);
1213 }
1214
1215 return error;
1216}
1217
1218/*
871 * Find the sync block number or the tail of the log. 1219 * Find the sync block number or the tail of the log.
872 * 1220 *
873 * This will be the block number of the last record to have its 1221 * This will be the block number of the last record to have its
@@ -893,13 +1241,13 @@ xlog_find_tail(
893 xlog_op_header_t *op_head; 1241 xlog_op_header_t *op_head;
894 char *offset = NULL; 1242 char *offset = NULL;
895 xfs_buf_t *bp; 1243 xfs_buf_t *bp;
896 int error, i, found; 1244 int error;
897 xfs_daddr_t umount_data_blk; 1245 xfs_daddr_t umount_data_blk;
898 xfs_daddr_t after_umount_blk; 1246 xfs_daddr_t after_umount_blk;
1247 xfs_daddr_t rhead_blk;
899 xfs_lsn_t tail_lsn; 1248 xfs_lsn_t tail_lsn;
900 int hblks; 1249 int hblks;
901 1250 bool wrapped = false;
902 found = 0;
903 1251
904 /* 1252 /*
905 * Find previous log record 1253 * Find previous log record
@@ -923,48 +1271,16 @@ xlog_find_tail(
923 } 1271 }
924 1272
925 /* 1273 /*
926 * Search backwards looking for log record header block 1274 * Trim the head block back to skip over torn records. We can have
1275 * multiple log I/Os in flight at any time, so we assume CRC failures
1276 * back through the previous several records are torn writes and skip
1277 * them.
927 */ 1278 */
928 ASSERT(*head_blk < INT_MAX); 1279 ASSERT(*head_blk < INT_MAX);
929 for (i = (int)(*head_blk) - 1; i >= 0; i--) { 1280 error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk,
930 error = xlog_bread(log, i, 1, bp, &offset); 1281 &rhead, &wrapped);
931 if (error) 1282 if (error)
932 goto done; 1283 goto done;
933
934 if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
935 found = 1;
936 break;
937 }
938 }
939 /*
940 * If we haven't found the log record header block, start looking
941 * again from the end of the physical log. XXXmiken: There should be
942 * a check here to make sure we didn't search more than N blocks in
943 * the previous code.
944 */
945 if (!found) {
946 for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
947 error = xlog_bread(log, i, 1, bp, &offset);
948 if (error)
949 goto done;
950
951 if (*(__be32 *)offset ==
952 cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
953 found = 2;
954 break;
955 }
956 }
957 }
958 if (!found) {
959 xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
960 xlog_put_bp(bp);
961 ASSERT(0);
962 return -EIO;
963 }
964
965 /* find blk_no of tail of log */
966 rhead = (xlog_rec_header_t *)offset;
967 *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
968 1284
969 /* 1285 /*
970 * Reset log values according to the state of the log when we 1286 * Reset log values according to the state of the log when we
@@ -976,10 +1292,10 @@ xlog_find_tail(
976 * written was complete and ended exactly on the end boundary 1292 * written was complete and ended exactly on the end boundary
977 * of the physical log. 1293 * of the physical log.
978 */ 1294 */
979 log->l_prev_block = i; 1295 log->l_prev_block = rhead_blk;
980 log->l_curr_block = (int)*head_blk; 1296 log->l_curr_block = (int)*head_blk;
981 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); 1297 log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
982 if (found == 2) 1298 if (wrapped)
983 log->l_curr_cycle++; 1299 log->l_curr_cycle++;
984 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); 1300 atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
985 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); 1301 atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
@@ -1014,12 +1330,13 @@ xlog_find_tail(
1014 } else { 1330 } else {
1015 hblks = 1; 1331 hblks = 1;
1016 } 1332 }
1017 after_umount_blk = (i + hblks + (int) 1333 after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
1018 BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize; 1334 after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
1019 tail_lsn = atomic64_read(&log->l_tail_lsn); 1335 tail_lsn = atomic64_read(&log->l_tail_lsn);
1020 if (*head_blk == after_umount_blk && 1336 if (*head_blk == after_umount_blk &&
1021 be32_to_cpu(rhead->h_num_logops) == 1) { 1337 be32_to_cpu(rhead->h_num_logops) == 1) {
1022 umount_data_blk = (i + hblks) % log->l_logBBsize; 1338 umount_data_blk = rhead_blk + hblks;
1339 umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
1023 error = xlog_bread(log, umount_data_blk, 1, bp, &offset); 1340 error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
1024 if (error) 1341 if (error)
1025 goto done; 1342 goto done;
@@ -3204,6 +3521,7 @@ xlog_recover_dquot_ra_pass2(
3204 struct xfs_disk_dquot *recddq; 3521 struct xfs_disk_dquot *recddq;
3205 struct xfs_dq_logformat *dq_f; 3522 struct xfs_dq_logformat *dq_f;
3206 uint type; 3523 uint type;
3524 int len;
3207 3525
3208 3526
3209 if (mp->m_qflags == 0) 3527 if (mp->m_qflags == 0)
@@ -3224,8 +3542,12 @@ xlog_recover_dquot_ra_pass2(
3224 ASSERT(dq_f); 3542 ASSERT(dq_f);
3225 ASSERT(dq_f->qlf_len == 1); 3543 ASSERT(dq_f->qlf_len == 1);
3226 3544
3227 xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, 3545 len = XFS_FSB_TO_BB(mp, dq_f->qlf_len);
3228 XFS_FSB_TO_BB(mp, dq_f->qlf_len), NULL); 3546 if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0))
3547 return;
3548
3549 xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len,
3550 &xfs_dquot_buf_ra_ops);
3229} 3551}
3230 3552
3231STATIC void 3553STATIC void
@@ -4118,26 +4440,69 @@ xlog_recover_process_iunlinks(
4118 mp->m_dmevmask = mp_dmevmask; 4440 mp->m_dmevmask = mp_dmevmask;
4119} 4441}
4120 4442
4443STATIC int
4444xlog_unpack_data(
4445 struct xlog_rec_header *rhead,
4446 char *dp,
4447 struct xlog *log)
4448{
4449 int i, j, k;
4450
4451 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
4452 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
4453 *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
4454 dp += BBSIZE;
4455 }
4456
4457 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
4458 xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
4459 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
4460 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
4461 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
4462 *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
4463 dp += BBSIZE;
4464 }
4465 }
4466
4467 return 0;
4468}
4469
4121/* 4470/*
4122 * Upack the log buffer data and crc check it. If the check fails, issue a 4471 * CRC check, unpack and process a log record.
4123 * warning if and only if the CRC in the header is non-zero. This makes the
4124 * check an advisory warning, and the zero CRC check will prevent failure
4125 * warnings from being emitted when upgrading the kernel from one that does not
4126 * add CRCs by default.
4127 *
4128 * When filesystems are CRC enabled, this CRC mismatch becomes a fatal log
4129 * corruption failure
4130 */ 4472 */
4131STATIC int 4473STATIC int
4132xlog_unpack_data_crc( 4474xlog_recover_process(
4475 struct xlog *log,
4476 struct hlist_head rhash[],
4133 struct xlog_rec_header *rhead, 4477 struct xlog_rec_header *rhead,
4134 char *dp, 4478 char *dp,
4135 struct xlog *log) 4479 int pass)
4136{ 4480{
4481 int error;
4137 __le32 crc; 4482 __le32 crc;
4138 4483
4139 crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); 4484 crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
4140 if (crc != rhead->h_crc) { 4485
4486 /*
4487 * Nothing else to do if this is a CRC verification pass. Just return
4488 * if this a record with a non-zero crc. Unfortunately, mkfs always
4489 * sets h_crc to 0 so we must consider this valid even on v5 supers.
4490 * Otherwise, return EFSBADCRC on failure so the callers up the stack
4491 * know precisely what failed.
4492 */
4493 if (pass == XLOG_RECOVER_CRCPASS) {
4494 if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc))
4495 return -EFSBADCRC;
4496 return 0;
4497 }
4498
4499 /*
4500 * We're in the normal recovery path. Issue a warning if and only if the
4501 * CRC in the header is non-zero. This is an advisory warning and the
4502 * zero CRC check prevents warnings from being emitted when upgrading
4503 * the kernel from one that does not add CRCs by default.
4504 */
4505 if (crc != le32_to_cpu(rhead->h_crc)) {
4141 if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { 4506 if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
4142 xfs_alert(log->l_mp, 4507 xfs_alert(log->l_mp,
4143 "log record CRC mismatch: found 0x%x, expected 0x%x.", 4508 "log record CRC mismatch: found 0x%x, expected 0x%x.",
@@ -4147,47 +4512,18 @@ xlog_unpack_data_crc(
4147 } 4512 }
4148 4513
4149 /* 4514 /*
4150 * If we've detected a log record corruption, then we can't 4515 * If the filesystem is CRC enabled, this mismatch becomes a
4151 * recover past this point. Abort recovery if we are enforcing 4516 * fatal log corruption failure.
4152 * CRC protection by punting an error back up the stack.
4153 */ 4517 */
4154 if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) 4518 if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
4155 return -EFSCORRUPTED; 4519 return -EFSCORRUPTED;
4156 } 4520 }
4157 4521
4158 return 0; 4522 error = xlog_unpack_data(rhead, dp, log);
4159}
4160
4161STATIC int
4162xlog_unpack_data(
4163 struct xlog_rec_header *rhead,
4164 char *dp,
4165 struct xlog *log)
4166{
4167 int i, j, k;
4168 int error;
4169
4170 error = xlog_unpack_data_crc(rhead, dp, log);
4171 if (error) 4523 if (error)
4172 return error; 4524 return error;
4173 4525
4174 for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && 4526 return xlog_recover_process_data(log, rhash, rhead, dp, pass);
4175 i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
4176 *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
4177 dp += BBSIZE;
4178 }
4179
4180 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
4181 xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
4182 for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
4183 j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
4184 k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
4185 *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
4186 dp += BBSIZE;
4187 }
4188 }
4189
4190 return 0;
4191} 4527}
4192 4528
4193STATIC int 4529STATIC int
@@ -4239,18 +4575,21 @@ xlog_do_recovery_pass(
4239 struct xlog *log, 4575 struct xlog *log,
4240 xfs_daddr_t head_blk, 4576 xfs_daddr_t head_blk,
4241 xfs_daddr_t tail_blk, 4577 xfs_daddr_t tail_blk,
4242 int pass) 4578 int pass,
4579 xfs_daddr_t *first_bad) /* out: first bad log rec */
4243{ 4580{
4244 xlog_rec_header_t *rhead; 4581 xlog_rec_header_t *rhead;
4245 xfs_daddr_t blk_no; 4582 xfs_daddr_t blk_no;
4583 xfs_daddr_t rhead_blk;
4246 char *offset; 4584 char *offset;
4247 xfs_buf_t *hbp, *dbp; 4585 xfs_buf_t *hbp, *dbp;
4248 int error = 0, h_size; 4586 int error = 0, h_size, h_len;
4249 int bblks, split_bblks; 4587 int bblks, split_bblks;
4250 int hblks, split_hblks, wrapped_hblks; 4588 int hblks, split_hblks, wrapped_hblks;
4251 struct hlist_head rhash[XLOG_RHASH_SIZE]; 4589 struct hlist_head rhash[XLOG_RHASH_SIZE];
4252 4590
4253 ASSERT(head_blk != tail_blk); 4591 ASSERT(head_blk != tail_blk);
4592 rhead_blk = 0;
4254 4593
4255 /* 4594 /*
4256 * Read the header of the tail block and get the iclog buffer size from 4595 * Read the header of the tail block and get the iclog buffer size from
@@ -4274,7 +4613,31 @@ xlog_do_recovery_pass(
4274 error = xlog_valid_rec_header(log, rhead, tail_blk); 4613 error = xlog_valid_rec_header(log, rhead, tail_blk);
4275 if (error) 4614 if (error)
4276 goto bread_err1; 4615 goto bread_err1;
4616
4617 /*
4618 * xfsprogs has a bug where record length is based on lsunit but
4619 * h_size (iclog size) is hardcoded to 32k. Now that we
4620 * unconditionally CRC verify the unmount record, this means the
4621 * log buffer can be too small for the record and cause an
4622 * overrun.
4623 *
4624 * Detect this condition here. Use lsunit for the buffer size as
4625 * long as this looks like the mkfs case. Otherwise, return an
4626 * error to avoid a buffer overrun.
4627 */
4277 h_size = be32_to_cpu(rhead->h_size); 4628 h_size = be32_to_cpu(rhead->h_size);
4629 h_len = be32_to_cpu(rhead->h_len);
4630 if (h_len > h_size) {
4631 if (h_len <= log->l_mp->m_logbsize &&
4632 be32_to_cpu(rhead->h_num_logops) == 1) {
4633 xfs_warn(log->l_mp,
4634 "invalid iclog size (%d bytes), using lsunit (%d bytes)",
4635 h_size, log->l_mp->m_logbsize);
4636 h_size = log->l_mp->m_logbsize;
4637 } else
4638 return -EFSCORRUPTED;
4639 }
4640
4278 if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) && 4641 if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
4279 (h_size > XLOG_HEADER_CYCLE_SIZE)) { 4642 (h_size > XLOG_HEADER_CYCLE_SIZE)) {
4280 hblks = h_size / XLOG_HEADER_CYCLE_SIZE; 4643 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
@@ -4301,7 +4664,7 @@ xlog_do_recovery_pass(
4301 } 4664 }
4302 4665
4303 memset(rhash, 0, sizeof(rhash)); 4666 memset(rhash, 0, sizeof(rhash));
4304 blk_no = tail_blk; 4667 blk_no = rhead_blk = tail_blk;
4305 if (tail_blk > head_blk) { 4668 if (tail_blk > head_blk) {
4306 /* 4669 /*
4307 * Perform recovery around the end of the physical log. 4670 * Perform recovery around the end of the physical log.
@@ -4408,19 +4771,18 @@ xlog_do_recovery_pass(
4408 goto bread_err2; 4771 goto bread_err2;
4409 } 4772 }
4410 4773
4411 error = xlog_unpack_data(rhead, offset, log); 4774 error = xlog_recover_process(log, rhash, rhead, offset,
4775 pass);
4412 if (error) 4776 if (error)
4413 goto bread_err2; 4777 goto bread_err2;
4414 4778
4415 error = xlog_recover_process_data(log, rhash,
4416 rhead, offset, pass);
4417 if (error)
4418 goto bread_err2;
4419 blk_no += bblks; 4779 blk_no += bblks;
4780 rhead_blk = blk_no;
4420 } 4781 }
4421 4782
4422 ASSERT(blk_no >= log->l_logBBsize); 4783 ASSERT(blk_no >= log->l_logBBsize);
4423 blk_no -= log->l_logBBsize; 4784 blk_no -= log->l_logBBsize;
4785 rhead_blk = blk_no;
4424 } 4786 }
4425 4787
4426 /* read first part of physical log */ 4788 /* read first part of physical log */
@@ -4441,21 +4803,22 @@ xlog_do_recovery_pass(
4441 if (error) 4803 if (error)
4442 goto bread_err2; 4804 goto bread_err2;
4443 4805
4444 error = xlog_unpack_data(rhead, offset, log); 4806 error = xlog_recover_process(log, rhash, rhead, offset, pass);
4445 if (error) 4807 if (error)
4446 goto bread_err2; 4808 goto bread_err2;
4447 4809
4448 error = xlog_recover_process_data(log, rhash,
4449 rhead, offset, pass);
4450 if (error)
4451 goto bread_err2;
4452 blk_no += bblks + hblks; 4810 blk_no += bblks + hblks;
4811 rhead_blk = blk_no;
4453 } 4812 }
4454 4813
4455 bread_err2: 4814 bread_err2:
4456 xlog_put_bp(dbp); 4815 xlog_put_bp(dbp);
4457 bread_err1: 4816 bread_err1:
4458 xlog_put_bp(hbp); 4817 xlog_put_bp(hbp);
4818
4819 if (error && first_bad)
4820 *first_bad = rhead_blk;
4821
4459 return error; 4822 return error;
4460} 4823}
4461 4824
@@ -4493,7 +4856,7 @@ xlog_do_log_recovery(
4493 INIT_LIST_HEAD(&log->l_buf_cancel_table[i]); 4856 INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
4494 4857
4495 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 4858 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
4496 XLOG_RECOVER_PASS1); 4859 XLOG_RECOVER_PASS1, NULL);
4497 if (error != 0) { 4860 if (error != 0) {
4498 kmem_free(log->l_buf_cancel_table); 4861 kmem_free(log->l_buf_cancel_table);
4499 log->l_buf_cancel_table = NULL; 4862 log->l_buf_cancel_table = NULL;
@@ -4504,7 +4867,7 @@ xlog_do_log_recovery(
4504 * When it is complete free the table of buf cancel items. 4867 * When it is complete free the table of buf cancel items.
4505 */ 4868 */
4506 error = xlog_do_recovery_pass(log, head_blk, tail_blk, 4869 error = xlog_do_recovery_pass(log, head_blk, tail_blk,
4507 XLOG_RECOVER_PASS2); 4870 XLOG_RECOVER_PASS2, NULL);
4508#ifdef DEBUG 4871#ifdef DEBUG
4509 if (!error) { 4872 if (!error) {
4510 int i; 4873 int i;
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index ab1bac6a3a1c..be02a68b2fe2 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -766,7 +766,6 @@ xfs_growfs_rt_alloc(
766{ 766{
767 xfs_fileoff_t bno; /* block number in file */ 767 xfs_fileoff_t bno; /* block number in file */
768 struct xfs_buf *bp; /* temporary buffer for zeroing */ 768 struct xfs_buf *bp; /* temporary buffer for zeroing */
769 int committed; /* transaction committed flag */
770 xfs_daddr_t d; /* disk block address */ 769 xfs_daddr_t d; /* disk block address */
771 int error; /* error return value */ 770 int error; /* error return value */
772 xfs_fsblock_t firstblock;/* first block allocated in xaction */ 771 xfs_fsblock_t firstblock;/* first block allocated in xaction */
@@ -811,7 +810,7 @@ xfs_growfs_rt_alloc(
811 /* 810 /*
812 * Free any blocks freed up in the transaction, then commit. 811 * Free any blocks freed up in the transaction, then commit.
813 */ 812 */
814 error = xfs_bmap_finish(&tp, &flist, &committed); 813 error = xfs_bmap_finish(&tp, &flist, NULL);
815 if (error) 814 if (error)
816 goto out_bmap_cancel; 815 goto out_bmap_cancel;
817 error = xfs_trans_commit(tp); 816 error = xfs_trans_commit(tp);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 36bd8825bfb0..b35775752b74 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -137,7 +137,7 @@ static const match_table_t tokens = {
137}; 137};
138 138
139 139
140STATIC unsigned long 140STATIC int
141suffix_kstrtoint(char *s, unsigned int base, int *res) 141suffix_kstrtoint(char *s, unsigned int base, int *res)
142{ 142{
143 int last, shift_left_factor = 0, _res; 143 int last, shift_left_factor = 0, _res;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 996481eeb491..b44284c1adda 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -178,7 +178,6 @@ xfs_symlink(
178 struct xfs_bmap_free free_list; 178 struct xfs_bmap_free free_list;
179 xfs_fsblock_t first_block; 179 xfs_fsblock_t first_block;
180 bool unlock_dp_on_error = false; 180 bool unlock_dp_on_error = false;
181 int committed;
182 xfs_fileoff_t first_fsb; 181 xfs_fileoff_t first_fsb;
183 xfs_filblks_t fs_blocks; 182 xfs_filblks_t fs_blocks;
184 int nmaps; 183 int nmaps;
@@ -387,7 +386,7 @@ xfs_symlink(
387 xfs_trans_set_sync(tp); 386 xfs_trans_set_sync(tp);
388 } 387 }
389 388
390 error = xfs_bmap_finish(&tp, &free_list, &committed); 389 error = xfs_bmap_finish(&tp, &free_list, NULL);
391 if (error) 390 if (error)
392 goto out_bmap_cancel; 391 goto out_bmap_cancel;
393 392
@@ -434,7 +433,6 @@ xfs_inactive_symlink_rmt(
434 struct xfs_inode *ip) 433 struct xfs_inode *ip)
435{ 434{
436 xfs_buf_t *bp; 435 xfs_buf_t *bp;
437 int committed;
438 int done; 436 int done;
439 int error; 437 int error;
440 xfs_fsblock_t first_block; 438 xfs_fsblock_t first_block;
@@ -510,16 +508,10 @@ xfs_inactive_symlink_rmt(
510 /* 508 /*
511 * Commit the first transaction. This logs the EFI and the inode. 509 * Commit the first transaction. This logs the EFI and the inode.
512 */ 510 */
513 error = xfs_bmap_finish(&tp, &free_list, &committed); 511 error = xfs_bmap_finish(&tp, &free_list, ip);
514 if (error) 512 if (error)
515 goto error_bmap_cancel; 513 goto error_bmap_cancel;
516 /* 514 /*
517 * The transaction must have been committed, since there were
518 * actually extents freed by xfs_bunmapi. See xfs_bmap_finish.
519 * The new tp has the extent freeing and EFDs.
520 */
521 ASSERT(committed);
522 /*
523 * The first xact was committed, so add the inode to the new one. 515 * The first xact was committed, so add the inode to the new one.
524 * Mark it dirty so it will be logged and moved forward in the log as 516 * Mark it dirty so it will be logged and moved forward in the log as
525 * part of every commit. 517 * part of every commit.
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index ee70f5dec9dc..641d625eb334 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -255,11 +255,47 @@ write_grant_head_show(
255} 255}
256XFS_SYSFS_ATTR_RO(write_grant_head); 256XFS_SYSFS_ATTR_RO(write_grant_head);
257 257
258#ifdef DEBUG
259STATIC ssize_t
260log_badcrc_factor_store(
261 struct kobject *kobject,
262 const char *buf,
263 size_t count)
264{
265 struct xlog *log = to_xlog(kobject);
266 int ret;
267 uint32_t val;
268
269 ret = kstrtouint(buf, 0, &val);
270 if (ret)
271 return ret;
272
273 log->l_badcrc_factor = val;
274
275 return count;
276}
277
278STATIC ssize_t
279log_badcrc_factor_show(
280 struct kobject *kobject,
281 char *buf)
282{
283 struct xlog *log = to_xlog(kobject);
284
285 return snprintf(buf, PAGE_SIZE, "%d\n", log->l_badcrc_factor);
286}
287
288XFS_SYSFS_ATTR_RW(log_badcrc_factor);
289#endif /* DEBUG */
290
258static struct attribute *xfs_log_attrs[] = { 291static struct attribute *xfs_log_attrs[] = {
259 ATTR_LIST(log_head_lsn), 292 ATTR_LIST(log_head_lsn),
260 ATTR_LIST(log_tail_lsn), 293 ATTR_LIST(log_tail_lsn),
261 ATTR_LIST(reserve_grant_head), 294 ATTR_LIST(reserve_grant_head),
262 ATTR_LIST(write_grant_head), 295 ATTR_LIST(write_grant_head),
296#ifdef DEBUG
297 ATTR_LIST(log_badcrc_factor),
298#endif
263 NULL, 299 NULL,
264}; 300};
265 301
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 877079eb0f8f..391d797cb53f 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1222,6 +1222,32 @@ DEFINE_PAGE_EVENT(xfs_writepage);
1222DEFINE_PAGE_EVENT(xfs_releasepage); 1222DEFINE_PAGE_EVENT(xfs_releasepage);
1223DEFINE_PAGE_EVENT(xfs_invalidatepage); 1223DEFINE_PAGE_EVENT(xfs_invalidatepage);
1224 1224
1225DECLARE_EVENT_CLASS(xfs_readpage_class,
1226 TP_PROTO(struct inode *inode, int nr_pages),
1227 TP_ARGS(inode, nr_pages),
1228 TP_STRUCT__entry(
1229 __field(dev_t, dev)
1230 __field(xfs_ino_t, ino)
1231 __field(int, nr_pages)
1232 ),
1233 TP_fast_assign(
1234 __entry->dev = inode->i_sb->s_dev;
1235 __entry->ino = inode->i_ino;
1236 __entry->nr_pages = nr_pages;
1237 ),
1238 TP_printk("dev %d:%d ino 0x%llx nr_pages %d",
1239 MAJOR(__entry->dev), MINOR(__entry->dev),
1240 __entry->ino,
1241 __entry->nr_pages)
1242)
1243
1244#define DEFINE_READPAGE_EVENT(name) \
1245DEFINE_EVENT(xfs_readpage_class, name, \
1246 TP_PROTO(struct inode *inode, int nr_pages), \
1247 TP_ARGS(inode, nr_pages))
1248DEFINE_READPAGE_EVENT(xfs_vm_readpage);
1249DEFINE_READPAGE_EVENT(xfs_vm_readpages);
1250
1225DECLARE_EVENT_CLASS(xfs_imap_class, 1251DECLARE_EVENT_CLASS(xfs_imap_class,
1226 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, 1252 TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
1227 int type, struct xfs_bmbt_irec *irec), 1253 int type, struct xfs_bmbt_irec *irec),
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index ce78534a047e..995170194df0 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -572,12 +572,16 @@ xfs_quota_warn(
572 struct xfs_dquot *dqp, 572 struct xfs_dquot *dqp,
573 int type) 573 int type)
574{ 574{
575 /* no warnings for project quotas - we just return ENOSPC later */ 575 enum quota_type qtype;
576
576 if (dqp->dq_flags & XFS_DQ_PROJ) 577 if (dqp->dq_flags & XFS_DQ_PROJ)
577 return; 578 qtype = PRJQUOTA;
578 quota_send_warning(make_kqid(&init_user_ns, 579 else if (dqp->dq_flags & XFS_DQ_USER)
579 (dqp->dq_flags & XFS_DQ_USER) ? 580 qtype = USRQUOTA;
580 USRQUOTA : GRPQUOTA, 581 else
582 qtype = GRPQUOTA;
583
584 quota_send_warning(make_kqid(&init_user_ns, qtype,
581 be32_to_cpu(dqp->q_core.d_id)), 585 be32_to_cpu(dqp->q_core.d_id)),
582 mp->m_super->s_dev, type); 586 mp->m_super->s_dev, type);
583} 587}