aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-12 20:17:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-12 20:17:51 -0400
commit4ce9d181ebe53abbca5f450b8a2984b8c3a38f26 (patch)
treeb563ac755c99ddf430402b2850199fdb625f1f7c /fs
parent5010fe9f095414b959fd6fda63986dc90fd0c419 (diff)
parent488ca3d8d088ec4658c87aaec6a91e98acccdd54 (diff)
Merge tag 'xfs-5.3-merge-12' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs updates from Darrick Wong: "In this release there are a significant amounts of consolidations and cleanups in the log code; restructuring of the log to issue struct bios directly; new bulkstat ioctls to return v5 fs inode information (and fix all the padding problems of the old ioctl); the beginnings of multithreaded inode walks (e.g. quotacheck); and a reduction in memory usage in the online scrub code leading to reduced runtimes. - Refactor inode geometry calculation into a single structure instead of open-coding pieces everywhere. - Add online repair to build options. - Remove unnecessary function call flags and functions. - Claim maintainership of various loose xfs documentation and header files. - Use struct bio directly for log buffer IOs instead of struct xfs_buf. - Reduce log item boilerplate code requirements. - Merge log item code spread across too many files. - Further distinguish between log item commits and cancellations. - Various small cleanups to the ag small allocator. - Support cgroup-aware writeback - libxfs refactoring for mkfs cleanup - Remove unneeded #includes - Fix a memory allocation miscalculation in the new log bio code - Fix bisection problems - Fix a crash in ioend processing caused by tripping over freeing of preallocated transactions - Split out a generic inode walk mechanism from the bulkstat code, hook up all the internal users to use the walking code, then clean up bulkstat to serve only the bulkstat ioctls. - Add a multithreaded iwalk implementation to speed up quotacheck on fast storage with many CPUs. - Remove unnecessary return values in logging teardown functions. - Supplement the bstat and inogrp structures with new bulkstat and inumbers structures that have all the fields we need for v5 filesystem features and none of the padding problems of their predecessors. - Wire up new ioctls that use the new structures with a much simpler bulk_ireq structure at the head instead of the pointerhappy mess we had before. - Enable userspace to constrain bulkstat returns to a single AG or a single special inode so that we can phase out a lot of geometry guesswork in userspace. - Reduce memory consumption and zeroing overhead in extended attribute scrub code. - Fix some behavioral regressions in the new bulkstat backend code. - Fix some behavioral regressions in the new log bio code" * tag 'xfs-5.3-merge-12' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: (100 commits) xfs: chain bios the right way around in xfs_rw_bdev xfs: bump INUMBERS cursor correctly in xfs_inumbers_walk xfs: don't update lastino for FSBULKSTAT_SINGLE xfs: online scrub needn't bother zeroing its temporary buffer xfs: only allocate memory for scrubbing attributes when we need it xfs: refactor attr scrub memory allocation function xfs: refactor extended attribute buffer pointer functions xfs: attribute scrub should use seen_enough to pass error values xfs: allow single bulkstat of special inodes xfs: specify AG in bulk req xfs: wire up the v5 inumbers ioctl xfs: wire up new v5 bulkstat ioctls xfs: introduce v5 inode group structure xfs: introduce new v5 bulkstat structure xfs: rename bulkstat functions xfs: remove various bulk request typedef usage fs: xfs: xfs_log: Change return type from int to void xfs: poll waiting for quotacheck xfs: multithreaded iwalk implementation xfs: refactor INUMBERS to use iwalk functions ...
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/Makefile9
-rw-r--r--fs/xfs/kmem.c5
-rw-r--r--fs/xfs/kmem.h8
-rw-r--r--fs/xfs/libxfs/xfs_ag.c100
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c8
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c227
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c3
-rw-r--r--fs/xfs/libxfs/xfs_attr.c5
-rw-r--r--fs/xfs/libxfs/xfs_attr.h8
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c15
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c14
-rw-r--r--fs/xfs/libxfs/xfs_bit.c1
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c19
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c5
-rw-r--r--fs/xfs/libxfs/xfs_btree.c49
-rw-r--r--fs/xfs/libxfs/xfs_btree.h14
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c12
-rw-r--r--fs/xfs/libxfs/xfs_da_format.c3
-rw-r--r--fs/xfs/libxfs/xfs_defer.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c6
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c11
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c14
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c11
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c10
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c5
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c10
-rw-r--r--fs/xfs/libxfs/xfs_format.h2
-rw-r--r--fs/xfs/libxfs/xfs_fs.h124
-rw-r--r--fs/xfs/libxfs/xfs_health.h2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c245
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h18
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c56
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c6
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c9
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c4
-rw-r--r--fs/xfs/libxfs/xfs_log_rlimit.c2
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c2
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c4
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c7
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c6
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c8
-rw-r--r--fs/xfs/libxfs/xfs_sb.c39
-rw-r--r--fs/xfs/libxfs/xfs_shared.h49
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c10
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c17
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h7
-rw-r--r--fs/xfs/libxfs/xfs_types.c13
-rw-r--r--fs/xfs/scrub/agheader.c11
-rw-r--r--fs/xfs/scrub/agheader_repair.c5
-rw-r--r--fs/xfs/scrub/alloc.c7
-rw-r--r--fs/xfs/scrub/attr.c122
-rw-r--r--fs/xfs/scrub/attr.h71
-rw-r--r--fs/xfs/scrub/bitmap.c5
-rw-r--r--fs/xfs/scrub/bmap.c8
-rw-r--r--fs/xfs/scrub/btree.c7
-rw-r--r--fs/xfs/scrub/common.c8
-rw-r--r--fs/xfs/scrub/dabtree.c8
-rw-r--r--fs/xfs/scrub/dir.c10
-rw-r--r--fs/xfs/scrub/fscounters.c12
-rw-r--r--fs/xfs/scrub/health.c8
-rw-r--r--fs/xfs/scrub/ialloc.c28
-rw-r--r--fs/xfs/scrub/inode.c10
-rw-r--r--fs/xfs/scrub/parent.c8
-rw-r--r--fs/xfs/scrub/quota.c13
-rw-r--r--fs/xfs/scrub/refcount.c10
-rw-r--r--fs/xfs/scrub/repair.c14
-rw-r--r--fs/xfs/scrub/rmap.c9
-rw-r--r--fs/xfs/scrub/rtbitmap.c7
-rw-r--r--fs/xfs/scrub/scrub.c20
-rw-r--r--fs/xfs/scrub/symlink.c8
-rw-r--r--fs/xfs/scrub/trace.c6
-rw-r--r--fs/xfs/xfs_acl.c4
-rw-r--r--fs/xfs/xfs_aops.c121
-rw-r--r--fs/xfs/xfs_aops.h1
-rw-r--r--fs/xfs/xfs_attr_inactive.c7
-rw-r--r--fs/xfs/xfs_attr_list.c7
-rw-r--r--fs/xfs/xfs_bio_io.c61
-rw-r--r--fs/xfs/xfs_bmap_item.c350
-rw-r--r--fs/xfs/xfs_bmap_item.h2
-rw-r--r--fs/xfs/xfs_bmap_util.c11
-rw-r--r--fs/xfs/xfs_buf.c171
-rw-r--r--fs/xfs/xfs_buf.h53
-rw-r--r--fs/xfs/xfs_buf_item.c40
-rw-r--r--fs/xfs/xfs_buf_item.h6
-rw-r--r--fs/xfs/xfs_dir2_readdir.c5
-rw-r--r--fs/xfs/xfs_discard.c4
-rw-r--r--fs/xfs/xfs_dquot.c6
-rw-r--r--fs/xfs/xfs_dquot.h1
-rw-r--r--fs/xfs/xfs_dquot_item.c118
-rw-r--r--fs/xfs/xfs_dquot_item.h4
-rw-r--r--fs/xfs/xfs_error.c3
-rw-r--r--fs/xfs/xfs_export.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c410
-rw-r--r--fs/xfs/xfs_extfree_item.h6
-rw-r--r--fs/xfs/xfs_file.c23
-rw-r--r--fs/xfs/xfs_filestream.c5
-rw-r--r--fs/xfs/xfs_fsmap.c4
-rw-r--r--fs/xfs/xfs_fsops.c8
-rw-r--r--fs/xfs/xfs_globals.c4
-rw-r--r--fs/xfs/xfs_health.c6
-rw-r--r--fs/xfs/xfs_icache.c4
-rw-r--r--fs/xfs/xfs_icreate_item.c75
-rw-r--r--fs/xfs/xfs_inode.c42
-rw-r--r--fs/xfs/xfs_inode_item.c16
-rw-r--r--fs/xfs/xfs_inode_item.h2
-rw-r--r--fs/xfs/xfs_ioctl.c294
-rw-r--r--fs/xfs/xfs_ioctl.h8
-rw-r--r--fs/xfs/xfs_ioctl32.c161
-rw-r--r--fs/xfs/xfs_ioctl32.h14
-rw-r--r--fs/xfs/xfs_iomap.c5
-rw-r--r--fs/xfs/xfs_iops.c10
-rw-r--r--fs/xfs/xfs_itable.c749
-rw-r--r--fs/xfs/xfs_itable.h106
-rw-r--r--fs/xfs/xfs_iwalk.c720
-rw-r--r--fs/xfs/xfs_iwalk.h46
-rw-r--r--fs/xfs/xfs_linux.h5
-rw-r--r--fs/xfs/xfs_log.c644
-rw-r--r--fs/xfs/xfs_log.h17
-rw-r--r--fs/xfs/xfs_log_cil.c51
-rw-r--r--fs/xfs/xfs_log_priv.h36
-rw-r--r--fs/xfs/xfs_log_recover.c463
-rw-r--r--fs/xfs/xfs_message.c2
-rw-r--r--fs/xfs/xfs_mount.c102
-rw-r--r--fs/xfs/xfs_mount.h22
-rw-r--r--fs/xfs/xfs_ondisk.h5
-rw-r--r--fs/xfs/xfs_pnfs.c9
-rw-r--r--fs/xfs/xfs_pwork.c136
-rw-r--r--fs/xfs/xfs_pwork.h61
-rw-r--r--fs/xfs/xfs_qm.c68
-rw-r--r--fs/xfs/xfs_qm_bhv.c2
-rw-r--r--fs/xfs/xfs_qm_syscalls.c5
-rw-r--r--fs/xfs/xfs_quotaops.c3
-rw-r--r--fs/xfs/xfs_refcount_item.c357
-rw-r--r--fs/xfs/xfs_refcount_item.h2
-rw-r--r--fs/xfs/xfs_reflink.c15
-rw-r--r--fs/xfs/xfs_rmap_item.c380
-rw-r--r--fs/xfs/xfs_rmap_item.h2
-rw-r--r--fs/xfs/xfs_rtalloc.c6
-rw-r--r--fs/xfs/xfs_stats.c1
-rw-r--r--fs/xfs/xfs_super.c32
-rw-r--r--fs/xfs/xfs_super.h14
-rw-r--r--fs/xfs/xfs_symlink.c9
-rw-r--r--fs/xfs/xfs_sysctl.c3
-rw-r--r--fs/xfs/xfs_sysctl.h3
-rw-r--r--fs/xfs/xfs_sysfs.c42
-rw-r--r--fs/xfs/xfs_trace.c8
-rw-r--r--fs/xfs/xfs_trace.h61
-rw-r--r--fs/xfs/xfs_trans.c43
-rw-r--r--fs/xfs/xfs_trans.h70
-rw-r--r--fs/xfs/xfs_trans_ail.c53
-rw-r--r--fs/xfs/xfs_trans_bmap.c232
-rw-r--r--fs/xfs/xfs_trans_buf.c11
-rw-r--r--fs/xfs/xfs_trans_dquot.c11
-rw-r--r--fs/xfs/xfs_trans_extfree.c286
-rw-r--r--fs/xfs/xfs_trans_inode.c3
-rw-r--r--fs/xfs/xfs_trans_priv.h4
-rw-r--r--fs/xfs/xfs_trans_refcount.c240
-rw-r--r--fs/xfs/xfs_trans_rmap.c257
-rw-r--r--fs/xfs/xfs_xattr.c5
160 files changed, 4397 insertions, 4703 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 91831975363b..b74a47169297 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -62,6 +62,7 @@ xfs-y += xfs_aops.o \
62 xfs_attr_inactive.o \ 62 xfs_attr_inactive.o \
63 xfs_attr_list.o \ 63 xfs_attr_list.o \
64 xfs_bmap_util.o \ 64 xfs_bmap_util.o \
65 xfs_bio_io.o \
65 xfs_buf.o \ 66 xfs_buf.o \
66 xfs_dir2_readdir.o \ 67 xfs_dir2_readdir.o \
67 xfs_discard.o \ 68 xfs_discard.o \
@@ -80,9 +81,11 @@ xfs-y += xfs_aops.o \
80 xfs_iops.o \ 81 xfs_iops.o \
81 xfs_inode.o \ 82 xfs_inode.o \
82 xfs_itable.o \ 83 xfs_itable.o \
84 xfs_iwalk.o \
83 xfs_message.o \ 85 xfs_message.o \
84 xfs_mount.o \ 86 xfs_mount.o \
85 xfs_mru_cache.o \ 87 xfs_mru_cache.o \
88 xfs_pwork.o \
86 xfs_reflink.o \ 89 xfs_reflink.o \
87 xfs_stats.o \ 90 xfs_stats.o \
88 xfs_super.o \ 91 xfs_super.o \
@@ -104,12 +107,8 @@ xfs-y += xfs_log.o \
104 xfs_rmap_item.o \ 107 xfs_rmap_item.o \
105 xfs_log_recover.o \ 108 xfs_log_recover.o \
106 xfs_trans_ail.o \ 109 xfs_trans_ail.o \
107 xfs_trans_bmap.o \
108 xfs_trans_buf.o \ 110 xfs_trans_buf.o \
109 xfs_trans_extfree.o \ 111 xfs_trans_inode.o
110 xfs_trans_inode.o \
111 xfs_trans_refcount.o \
112 xfs_trans_rmap.o \
113 112
114# optional features 113# optional features
115xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \ 114xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index fdd9d6ede25c..16bb9a328678 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -3,12 +3,7 @@
3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc.
4 * All Rights Reserved. 4 * All Rights Reserved.
5 */ 5 */
6#include <linux/mm.h>
7#include <linux/sched/mm.h> 6#include <linux/sched/mm.h>
8#include <linux/highmem.h>
9#include <linux/slab.h>
10#include <linux/swap.h>
11#include <linux/blkdev.h>
12#include <linux/backing-dev.h> 7#include <linux/backing-dev.h>
13#include "kmem.h" 8#include "kmem.h"
14#include "xfs_message.h" 9#include "xfs_message.h"
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 8e6b3ba81c03..267655acd426 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -124,4 +124,12 @@ kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags)
124 return kmem_zone_alloc(zone, flags | KM_ZERO); 124 return kmem_zone_alloc(zone, flags | KM_ZERO);
125} 125}
126 126
127static inline struct page *
128kmem_to_page(void *addr)
129{
130 if (is_vmalloc_addr(addr))
131 return vmalloc_to_page(addr);
132 return virt_to_page(addr);
133}
134
127#endif /* __XFS_SUPPORT_KMEM_H__ */ 135#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index b0c89f54d1bb..5de296b34ab1 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -10,6 +10,7 @@
10#include "xfs_shared.h" 10#include "xfs_shared.h"
11#include "xfs_format.h" 11#include "xfs_format.h"
12#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
13#include "xfs_bit.h"
13#include "xfs_sb.h" 14#include "xfs_sb.h"
14#include "xfs_mount.h" 15#include "xfs_mount.h"
15#include "xfs_btree.h" 16#include "xfs_btree.h"
@@ -44,6 +45,12 @@ xfs_get_aghdr_buf(
44 return bp; 45 return bp;
45} 46}
46 47
48static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id)
49{
50 return mp->m_sb.sb_logstart > 0 &&
51 id->agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart);
52}
53
47/* 54/*
48 * Generic btree root block init function 55 * Generic btree root block init function
49 */ 56 */
@@ -53,40 +60,85 @@ xfs_btroot_init(
53 struct xfs_buf *bp, 60 struct xfs_buf *bp,
54 struct aghdr_init_data *id) 61 struct aghdr_init_data *id)
55{ 62{
56 xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno, 0); 63 xfs_btree_init_block(mp, bp, id->type, 0, 0, id->agno);
57} 64}
58 65
59/* 66/* Finish initializing a free space btree. */
60 * Alloc btree root block init functions
61 */
62static void 67static void
63xfs_bnoroot_init( 68xfs_freesp_init_recs(
64 struct xfs_mount *mp, 69 struct xfs_mount *mp,
65 struct xfs_buf *bp, 70 struct xfs_buf *bp,
66 struct aghdr_init_data *id) 71 struct aghdr_init_data *id)
67{ 72{
68 struct xfs_alloc_rec *arec; 73 struct xfs_alloc_rec *arec;
74 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
69 75
70 xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno, 0);
71 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 76 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
72 arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); 77 arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
78
79 if (is_log_ag(mp, id)) {
80 struct xfs_alloc_rec *nrec;
81 xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp,
82 mp->m_sb.sb_logstart);
83
84 ASSERT(start >= mp->m_ag_prealloc_blocks);
85 if (start != mp->m_ag_prealloc_blocks) {
86 /*
87 * Modify first record to pad stripe align of log
88 */
89 arec->ar_blockcount = cpu_to_be32(start -
90 mp->m_ag_prealloc_blocks);
91 nrec = arec + 1;
92
93 /*
94 * Insert second record at start of internal log
95 * which then gets trimmed.
96 */
97 nrec->ar_startblock = cpu_to_be32(
98 be32_to_cpu(arec->ar_startblock) +
99 be32_to_cpu(arec->ar_blockcount));
100 arec = nrec;
101 be16_add_cpu(&block->bb_numrecs, 1);
102 }
103 /*
104 * Change record start to after the internal log
105 */
106 be32_add_cpu(&arec->ar_startblock, mp->m_sb.sb_logblocks);
107 }
108
109 /*
110 * Calculate the record block count and check for the case where
111 * the log might have consumed all available space in the AG. If
112 * so, reset the record count to 0 to avoid exposure of an invalid
113 * record start block.
114 */
73 arec->ar_blockcount = cpu_to_be32(id->agsize - 115 arec->ar_blockcount = cpu_to_be32(id->agsize -
74 be32_to_cpu(arec->ar_startblock)); 116 be32_to_cpu(arec->ar_startblock));
117 if (!arec->ar_blockcount)
118 block->bb_numrecs = 0;
75} 119}
76 120
121/*
122 * Alloc btree root block init functions
123 */
77static void 124static void
78xfs_cntroot_init( 125xfs_bnoroot_init(
79 struct xfs_mount *mp, 126 struct xfs_mount *mp,
80 struct xfs_buf *bp, 127 struct xfs_buf *bp,
81 struct aghdr_init_data *id) 128 struct aghdr_init_data *id)
82{ 129{
83 struct xfs_alloc_rec *arec; 130 xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno);
131 xfs_freesp_init_recs(mp, bp, id);
132}
84 133
85 xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno, 0); 134static void
86 arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); 135xfs_cntroot_init(
87 arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); 136 struct xfs_mount *mp,
88 arec->ar_blockcount = cpu_to_be32(id->agsize - 137 struct xfs_buf *bp,
89 be32_to_cpu(arec->ar_startblock)); 138 struct aghdr_init_data *id)
139{
140 xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno);
141 xfs_freesp_init_recs(mp, bp, id);
90} 142}
91 143
92/* 144/*
@@ -101,7 +153,7 @@ xfs_rmaproot_init(
101 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 153 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
102 struct xfs_rmap_rec *rrec; 154 struct xfs_rmap_rec *rrec;
103 155
104 xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno, 0); 156 xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno);
105 157
106 /* 158 /*
107 * mark the AG header regions as static metadata The BNO 159 * mark the AG header regions as static metadata The BNO
@@ -149,6 +201,18 @@ xfs_rmaproot_init(
149 rrec->rm_offset = 0; 201 rrec->rm_offset = 0;
150 be16_add_cpu(&block->bb_numrecs, 1); 202 be16_add_cpu(&block->bb_numrecs, 1);
151 } 203 }
204
205 /* account for the log space */
206 if (is_log_ag(mp, id)) {
207 rrec = XFS_RMAP_REC_ADDR(block,
208 be16_to_cpu(block->bb_numrecs) + 1);
209 rrec->rm_startblock = cpu_to_be32(
210 XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart));
211 rrec->rm_blockcount = cpu_to_be32(mp->m_sb.sb_logblocks);
212 rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG);
213 rrec->rm_offset = 0;
214 be16_add_cpu(&block->bb_numrecs, 1);
215 }
152} 216}
153 217
154/* 218/*
@@ -209,6 +273,14 @@ xfs_agfblock_init(
209 agf->agf_refcount_level = cpu_to_be32(1); 273 agf->agf_refcount_level = cpu_to_be32(1);
210 agf->agf_refcount_blocks = cpu_to_be32(1); 274 agf->agf_refcount_blocks = cpu_to_be32(1);
211 } 275 }
276
277 if (is_log_ag(mp, id)) {
278 int64_t logblocks = mp->m_sb.sb_logblocks;
279
280 be32_add_cpu(&agf->agf_freeblks, -logblocks);
281 agf->agf_longest = cpu_to_be32(id->agsize -
282 XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart) - logblocks);
283 }
212} 284}
213 285
214static void 286static void
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index e2ba2a3b63b2..87a9747f1d36 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -9,20 +9,12 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_sb.h"
13#include "xfs_mount.h" 12#include "xfs_mount.h"
14#include "xfs_defer.h"
15#include "xfs_alloc.h" 13#include "xfs_alloc.h"
16#include "xfs_errortag.h" 14#include "xfs_errortag.h"
17#include "xfs_error.h" 15#include "xfs_error.h"
18#include "xfs_trace.h" 16#include "xfs_trace.h"
19#include "xfs_cksum.h"
20#include "xfs_trans.h" 17#include "xfs_trans.h"
21#include "xfs_bit.h"
22#include "xfs_bmap.h"
23#include "xfs_bmap_btree.h"
24#include "xfs_ag_resv.h"
25#include "xfs_trans_space.h"
26#include "xfs_rmap_btree.h" 18#include "xfs_rmap_btree.h"
27#include "xfs_btree.h" 19#include "xfs_btree.h"
28#include "xfs_refcount_btree.h" 20#include "xfs_refcount_btree.h"
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index a9ff3cf82cce..372ad55631fc 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -13,7 +13,6 @@
13#include "xfs_sb.h" 13#include "xfs_sb.h"
14#include "xfs_mount.h" 14#include "xfs_mount.h"
15#include "xfs_defer.h" 15#include "xfs_defer.h"
16#include "xfs_inode.h"
17#include "xfs_btree.h" 16#include "xfs_btree.h"
18#include "xfs_rmap.h" 17#include "xfs_rmap.h"
19#include "xfs_alloc_btree.h" 18#include "xfs_alloc_btree.h"
@@ -21,7 +20,6 @@
21#include "xfs_extent_busy.h" 20#include "xfs_extent_busy.h"
22#include "xfs_errortag.h" 21#include "xfs_errortag.h"
23#include "xfs_error.h" 22#include "xfs_error.h"
24#include "xfs_cksum.h"
25#include "xfs_trace.h" 23#include "xfs_trace.h"
26#include "xfs_trans.h" 24#include "xfs_trans.h"
27#include "xfs_buf_item.h" 25#include "xfs_buf_item.h"
@@ -41,8 +39,6 @@ struct workqueue_struct *xfs_alloc_wq;
41STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 39STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
42STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 40STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
43STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 41STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
44STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
45 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
46 42
47/* 43/*
48 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in 44 * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
@@ -555,7 +551,7 @@ static xfs_failaddr_t
555xfs_agfl_verify( 551xfs_agfl_verify(
556 struct xfs_buf *bp) 552 struct xfs_buf *bp)
557{ 553{
558 struct xfs_mount *mp = bp->b_target->bt_mount; 554 struct xfs_mount *mp = bp->b_mount;
559 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); 555 struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
560 int i; 556 int i;
561 557
@@ -596,7 +592,7 @@ static void
596xfs_agfl_read_verify( 592xfs_agfl_read_verify(
597 struct xfs_buf *bp) 593 struct xfs_buf *bp)
598{ 594{
599 struct xfs_mount *mp = bp->b_target->bt_mount; 595 struct xfs_mount *mp = bp->b_mount;
600 xfs_failaddr_t fa; 596 xfs_failaddr_t fa;
601 597
602 /* 598 /*
@@ -621,7 +617,7 @@ static void
621xfs_agfl_write_verify( 617xfs_agfl_write_verify(
622 struct xfs_buf *bp) 618 struct xfs_buf *bp)
623{ 619{
624 struct xfs_mount *mp = bp->b_target->bt_mount; 620 struct xfs_mount *mp = bp->b_mount;
625 struct xfs_buf_log_item *bip = bp->b_log_item; 621 struct xfs_buf_log_item *bip = bp->b_log_item;
626 xfs_failaddr_t fa; 622 xfs_failaddr_t fa;
627 623
@@ -700,6 +696,107 @@ xfs_alloc_update_counters(
700 */ 696 */
701 697
702/* 698/*
699 * Deal with the case where only small freespaces remain. Either return the
700 * contents of the last freespace record, or allocate space from the freelist if
701 * there is nothing in the tree.
702 */
703STATIC int /* error */
704xfs_alloc_ag_vextent_small(
705 struct xfs_alloc_arg *args, /* allocation argument structure */
706 struct xfs_btree_cur *ccur, /* optional by-size cursor */
707 xfs_agblock_t *fbnop, /* result block number */
708 xfs_extlen_t *flenp, /* result length */
709 int *stat) /* status: 0-freelist, 1-normal/none */
710{
711 int error = 0;
712 xfs_agblock_t fbno = NULLAGBLOCK;
713 xfs_extlen_t flen = 0;
714 int i = 0;
715
716 /*
717 * If a cntbt cursor is provided, try to allocate the largest record in
718 * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
719 * allocation. Make sure to respect minleft even when pulling from the
720 * freelist.
721 */
722 if (ccur)
723 error = xfs_btree_decrement(ccur, 0, &i);
724 if (error)
725 goto error;
726 if (i) {
727 error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
728 if (error)
729 goto error;
730 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error);
731 goto out;
732 }
733
734 if (args->minlen != 1 || args->alignment != 1 ||
735 args->resv == XFS_AG_RESV_AGFL ||
736 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount) <=
737 args->minleft))
738 goto out;
739
740 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
741 if (error)
742 goto error;
743 if (fbno == NULLAGBLOCK)
744 goto out;
745
746 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
747 xfs_alloc_allow_busy_reuse(args->datatype));
748
749 if (xfs_alloc_is_userdata(args->datatype)) {
750 struct xfs_buf *bp;
751
752 bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno);
753 if (!bp) {
754 error = -EFSCORRUPTED;
755 goto error;
756 }
757 xfs_trans_binval(args->tp, bp);
758 }
759 *fbnop = args->agbno = fbno;
760 *flenp = args->len = 1;
761 XFS_WANT_CORRUPTED_GOTO(args->mp,
762 fbno < be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
763 error);
764 args->wasfromfl = 1;
765 trace_xfs_alloc_small_freelist(args);
766
767 /*
768 * If we're feeding an AGFL block to something that doesn't live in the
769 * free space, we need to clear out the OWN_AG rmap.
770 */
771 error = xfs_rmap_free(args->tp, args->agbp, args->agno, fbno, 1,
772 &XFS_RMAP_OINFO_AG);
773 if (error)
774 goto error;
775
776 *stat = 0;
777 return 0;
778
779out:
780 /*
781 * Can't do the allocation, give up.
782 */
783 if (flen < args->minlen) {
784 args->agbno = NULLAGBLOCK;
785 trace_xfs_alloc_small_notenough(args);
786 flen = 0;
787 }
788 *fbnop = fbno;
789 *flenp = flen;
790 *stat = 1;
791 trace_xfs_alloc_small_done(args);
792 return 0;
793
794error:
795 trace_xfs_alloc_small_error(args);
796 return error;
797}
798
799/*
703 * Allocate a variable extent in the allocation group agno. 800 * Allocate a variable extent in the allocation group agno.
704 * Type and bno are used to determine where in the allocation group the 801 * Type and bno are used to determine where in the allocation group the
705 * extent will start. 802 * extent will start.
@@ -1583,112 +1680,6 @@ out_nominleft:
1583} 1680}
1584 1681
1585/* 1682/*
1586 * Deal with the case where only small freespaces remain.
1587 * Either return the contents of the last freespace record,
1588 * or allocate space from the freelist if there is nothing in the tree.
1589 */
1590STATIC int /* error */
1591xfs_alloc_ag_vextent_small(
1592 xfs_alloc_arg_t *args, /* allocation argument structure */
1593 xfs_btree_cur_t *ccur, /* by-size cursor */
1594 xfs_agblock_t *fbnop, /* result block number */
1595 xfs_extlen_t *flenp, /* result length */
1596 int *stat) /* status: 0-freelist, 1-normal/none */
1597{
1598 int error;
1599 xfs_agblock_t fbno;
1600 xfs_extlen_t flen;
1601 int i;
1602
1603 if ((error = xfs_btree_decrement(ccur, 0, &i)))
1604 goto error0;
1605 if (i) {
1606 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1607 goto error0;
1608 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1609 }
1610 /*
1611 * Nothing in the btree, try the freelist. Make sure
1612 * to respect minleft even when pulling from the
1613 * freelist.
1614 */
1615 else if (args->minlen == 1 && args->alignment == 1 &&
1616 args->resv != XFS_AG_RESV_AGFL &&
1617 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1618 > args->minleft)) {
1619 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1620 if (error)
1621 goto error0;
1622 if (fbno != NULLAGBLOCK) {
1623 xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
1624 xfs_alloc_allow_busy_reuse(args->datatype));
1625
1626 if (xfs_alloc_is_userdata(args->datatype)) {
1627 xfs_buf_t *bp;
1628
1629 bp = xfs_btree_get_bufs(args->mp, args->tp,
1630 args->agno, fbno, 0);
1631 if (!bp) {
1632 error = -EFSCORRUPTED;
1633 goto error0;
1634 }
1635 xfs_trans_binval(args->tp, bp);
1636 }
1637 args->len = 1;
1638 args->agbno = fbno;
1639 XFS_WANT_CORRUPTED_GOTO(args->mp,
1640 args->agbno + args->len <=
1641 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
1642 error0);
1643 args->wasfromfl = 1;
1644 trace_xfs_alloc_small_freelist(args);
1645
1646 /*
1647 * If we're feeding an AGFL block to something that
1648 * doesn't live in the free space, we need to clear
1649 * out the OWN_AG rmap.
1650 */
1651 error = xfs_rmap_free(args->tp, args->agbp, args->agno,
1652 fbno, 1, &XFS_RMAP_OINFO_AG);
1653 if (error)
1654 goto error0;
1655
1656 *stat = 0;
1657 return 0;
1658 }
1659 /*
1660 * Nothing in the freelist.
1661 */
1662 else
1663 flen = 0;
1664 }
1665 /*
1666 * Can't allocate from the freelist for some reason.
1667 */
1668 else {
1669 fbno = NULLAGBLOCK;
1670 flen = 0;
1671 }
1672 /*
1673 * Can't do the allocation, give up.
1674 */
1675 if (flen < args->minlen) {
1676 args->agbno = NULLAGBLOCK;
1677 trace_xfs_alloc_small_notenough(args);
1678 flen = 0;
1679 }
1680 *fbnop = fbno;
1681 *flenp = flen;
1682 *stat = 1;
1683 trace_xfs_alloc_small_done(args);
1684 return 0;
1685
1686error0:
1687 trace_xfs_alloc_small_error(args);
1688 return error;
1689}
1690
1691/*
1692 * Free the extent starting at agno/bno for length. 1683 * Free the extent starting at agno/bno for length.
1693 */ 1684 */
1694STATIC int 1685STATIC int
@@ -2095,7 +2086,7 @@ xfs_free_agfl_block(
2095 if (error) 2086 if (error)
2096 return error; 2087 return error;
2097 2088
2098 bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno, 0); 2089 bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno);
2099 if (!bp) 2090 if (!bp)
2100 return -EFSCORRUPTED; 2091 return -EFSCORRUPTED;
2101 xfs_trans_binval(tp, bp); 2092 xfs_trans_binval(tp, bp);
@@ -2586,7 +2577,7 @@ static xfs_failaddr_t
2586xfs_agf_verify( 2577xfs_agf_verify(
2587 struct xfs_buf *bp) 2578 struct xfs_buf *bp)
2588{ 2579{
2589 struct xfs_mount *mp = bp->b_target->bt_mount; 2580 struct xfs_mount *mp = bp->b_mount;
2590 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2581 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
2591 2582
2592 if (xfs_sb_version_hascrc(&mp->m_sb)) { 2583 if (xfs_sb_version_hascrc(&mp->m_sb)) {
@@ -2644,7 +2635,7 @@ static void
2644xfs_agf_read_verify( 2635xfs_agf_read_verify(
2645 struct xfs_buf *bp) 2636 struct xfs_buf *bp)
2646{ 2637{
2647 struct xfs_mount *mp = bp->b_target->bt_mount; 2638 struct xfs_mount *mp = bp->b_mount;
2648 xfs_failaddr_t fa; 2639 xfs_failaddr_t fa;
2649 2640
2650 if (xfs_sb_version_hascrc(&mp->m_sb) && 2641 if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -2661,7 +2652,7 @@ static void
2661xfs_agf_write_verify( 2652xfs_agf_write_verify(
2662 struct xfs_buf *bp) 2653 struct xfs_buf *bp)
2663{ 2654{
2664 struct xfs_mount *mp = bp->b_target->bt_mount; 2655 struct xfs_mount *mp = bp->b_mount;
2665 struct xfs_buf_log_item *bip = bp->b_log_item; 2656 struct xfs_buf_log_item *bip = bp->b_log_item;
2666 xfs_failaddr_t fa; 2657 xfs_failaddr_t fa;
2667 2658
@@ -3146,7 +3137,7 @@ xfs_alloc_has_record(
3146 3137
3147/* 3138/*
3148 * Walk all the blocks in the AGFL. The @walk_fn can return any negative 3139 * Walk all the blocks in the AGFL. The @walk_fn can return any negative
3149 * error code or XFS_BTREE_QUERY_RANGE_ABORT. 3140 * error code or XFS_ITER_*.
3150 */ 3141 */
3151int 3142int
3152xfs_agfl_walk( 3143xfs_agfl_walk(
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 9fe949f6055e..2a94543857a1 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -17,7 +17,6 @@
17#include "xfs_extent_busy.h" 17#include "xfs_extent_busy.h"
18#include "xfs_error.h" 18#include "xfs_error.h"
19#include "xfs_trace.h" 19#include "xfs_trace.h"
20#include "xfs_cksum.h"
21#include "xfs_trans.h" 20#include "xfs_trans.h"
22 21
23 22
@@ -292,7 +291,7 @@ static xfs_failaddr_t
292xfs_allocbt_verify( 291xfs_allocbt_verify(
293 struct xfs_buf *bp) 292 struct xfs_buf *bp)
294{ 293{
295 struct xfs_mount *mp = bp->b_target->bt_mount; 294 struct xfs_mount *mp = bp->b_mount;
296 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 295 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
297 struct xfs_perag *pag = bp->b_pag; 296 struct xfs_perag *pag = bp->b_pag;
298 xfs_failaddr_t fa; 297 xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index c441f41f14e8..d48fcf11cc35 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -9,23 +9,18 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_mount.h" 12#include "xfs_mount.h"
14#include "xfs_defer.h" 13#include "xfs_defer.h"
15#include "xfs_da_format.h" 14#include "xfs_da_format.h"
16#include "xfs_da_btree.h" 15#include "xfs_da_btree.h"
17#include "xfs_attr_sf.h" 16#include "xfs_attr_sf.h"
18#include "xfs_inode.h" 17#include "xfs_inode.h"
19#include "xfs_alloc.h"
20#include "xfs_trans.h" 18#include "xfs_trans.h"
21#include "xfs_inode_item.h"
22#include "xfs_bmap.h" 19#include "xfs_bmap.h"
23#include "xfs_bmap_util.h"
24#include "xfs_bmap_btree.h" 20#include "xfs_bmap_btree.h"
25#include "xfs_attr.h" 21#include "xfs_attr.h"
26#include "xfs_attr_leaf.h" 22#include "xfs_attr_leaf.h"
27#include "xfs_attr_remote.h" 23#include "xfs_attr_remote.h"
28#include "xfs_error.h"
29#include "xfs_quota.h" 24#include "xfs_quota.h"
30#include "xfs_trans_space.h" 25#include "xfs_trans_space.h"
31#include "xfs_trace.h" 26#include "xfs_trace.h"
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 3b0dce06e454..ff28ebf3b635 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -112,7 +112,13 @@ typedef struct xfs_attr_list_context {
112 struct xfs_inode *dp; /* inode */ 112 struct xfs_inode *dp; /* inode */
113 struct attrlist_cursor_kern *cursor; /* position in list */ 113 struct attrlist_cursor_kern *cursor; /* position in list */
114 char *alist; /* output buffer */ 114 char *alist; /* output buffer */
115 int seen_enough; /* T/F: seen enough of list? */ 115
116 /*
117 * Abort attribute list iteration if non-zero. Can be used to pass
118 * error values to the xfs_attr_list caller.
119 */
120 int seen_enough;
121
116 ssize_t count; /* num used entries */ 122 ssize_t count; /* num used entries */
117 int dupcnt; /* count dup hashvals seen */ 123 int dupcnt; /* count dup hashvals seen */
118 int bufsize; /* total buffer size */ 124 int bufsize; /* total buffer size */
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 1f6e3965ff74..70eb941d02e4 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -10,14 +10,12 @@
10#include "xfs_format.h" 10#include "xfs_format.h"
11#include "xfs_log_format.h" 11#include "xfs_log_format.h"
12#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
13#include "xfs_bit.h"
14#include "xfs_sb.h" 13#include "xfs_sb.h"
15#include "xfs_mount.h" 14#include "xfs_mount.h"
16#include "xfs_da_format.h" 15#include "xfs_da_format.h"
17#include "xfs_da_btree.h" 16#include "xfs_da_btree.h"
18#include "xfs_inode.h" 17#include "xfs_inode.h"
19#include "xfs_trans.h" 18#include "xfs_trans.h"
20#include "xfs_inode_item.h"
21#include "xfs_bmap_btree.h" 19#include "xfs_bmap_btree.h"
22#include "xfs_bmap.h" 20#include "xfs_bmap.h"
23#include "xfs_attr_sf.h" 21#include "xfs_attr_sf.h"
@@ -27,7 +25,6 @@
27#include "xfs_error.h" 25#include "xfs_error.h"
28#include "xfs_trace.h" 26#include "xfs_trace.h"
29#include "xfs_buf_item.h" 27#include "xfs_buf_item.h"
30#include "xfs_cksum.h"
31#include "xfs_dir2.h" 28#include "xfs_dir2.h"
32#include "xfs_log.h" 29#include "xfs_log.h"
33 30
@@ -240,7 +237,7 @@ xfs_attr3_leaf_verify(
240 struct xfs_buf *bp) 237 struct xfs_buf *bp)
241{ 238{
242 struct xfs_attr3_icleaf_hdr ichdr; 239 struct xfs_attr3_icleaf_hdr ichdr;
243 struct xfs_mount *mp = bp->b_target->bt_mount; 240 struct xfs_mount *mp = bp->b_mount;
244 struct xfs_attr_leafblock *leaf = bp->b_addr; 241 struct xfs_attr_leafblock *leaf = bp->b_addr;
245 struct xfs_attr_leaf_entry *entries; 242 struct xfs_attr_leaf_entry *entries;
246 uint32_t end; /* must be 32bit - see below */ 243 uint32_t end; /* must be 32bit - see below */
@@ -313,7 +310,7 @@ static void
313xfs_attr3_leaf_write_verify( 310xfs_attr3_leaf_write_verify(
314 struct xfs_buf *bp) 311 struct xfs_buf *bp)
315{ 312{
316 struct xfs_mount *mp = bp->b_target->bt_mount; 313 struct xfs_mount *mp = bp->b_mount;
317 struct xfs_buf_log_item *bip = bp->b_log_item; 314 struct xfs_buf_log_item *bip = bp->b_log_item;
318 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; 315 struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
319 xfs_failaddr_t fa; 316 xfs_failaddr_t fa;
@@ -343,7 +340,7 @@ static void
343xfs_attr3_leaf_read_verify( 340xfs_attr3_leaf_read_verify(
344 struct xfs_buf *bp) 341 struct xfs_buf *bp)
345{ 342{
346 struct xfs_mount *mp = bp->b_target->bt_mount; 343 struct xfs_mount *mp = bp->b_mount;
347 xfs_failaddr_t fa; 344 xfs_failaddr_t fa;
348 345
349 if (xfs_sb_version_hascrc(&mp->m_sb) && 346 if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -865,7 +862,7 @@ xfs_attr_shortform_allfit(
865 struct xfs_attr3_icleaf_hdr leafhdr; 862 struct xfs_attr3_icleaf_hdr leafhdr;
866 int bytes; 863 int bytes;
867 int i; 864 int i;
868 struct xfs_mount *mp = bp->b_target->bt_mount; 865 struct xfs_mount *mp = bp->b_mount;
869 866
870 leaf = bp->b_addr; 867 leaf = bp->b_addr;
871 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); 868 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
@@ -1525,7 +1522,7 @@ xfs_attr_leaf_order(
1525{ 1522{
1526 struct xfs_attr3_icleaf_hdr ichdr1; 1523 struct xfs_attr3_icleaf_hdr ichdr1;
1527 struct xfs_attr3_icleaf_hdr ichdr2; 1524 struct xfs_attr3_icleaf_hdr ichdr2;
1528 struct xfs_mount *mp = leaf1_bp->b_target->bt_mount; 1525 struct xfs_mount *mp = leaf1_bp->b_mount;
1529 1526
1530 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr); 1527 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr1, leaf1_bp->b_addr);
1531 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr); 1528 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr2, leaf2_bp->b_addr);
@@ -2568,7 +2565,7 @@ xfs_attr_leaf_lasthash(
2568{ 2565{
2569 struct xfs_attr3_icleaf_hdr ichdr; 2566 struct xfs_attr3_icleaf_hdr ichdr;
2570 struct xfs_attr_leaf_entry *entries; 2567 struct xfs_attr_leaf_entry *entries;
2571 struct xfs_mount *mp = bp->b_target->bt_mount; 2568 struct xfs_mount *mp = bp->b_mount;
2572 2569
2573 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr); 2570 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, bp->b_addr);
2574 entries = xfs_attr3_leaf_entryp(bp->b_addr); 2571 entries = xfs_attr3_leaf_entryp(bp->b_addr);
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 65ff600a8067..4eb30d357045 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -16,18 +16,10 @@
16#include "xfs_da_format.h" 16#include "xfs_da_format.h"
17#include "xfs_da_btree.h" 17#include "xfs_da_btree.h"
18#include "xfs_inode.h" 18#include "xfs_inode.h"
19#include "xfs_alloc.h"
20#include "xfs_trans.h" 19#include "xfs_trans.h"
21#include "xfs_inode_item.h"
22#include "xfs_bmap.h" 20#include "xfs_bmap.h"
23#include "xfs_bmap_util.h"
24#include "xfs_attr.h" 21#include "xfs_attr.h"
25#include "xfs_attr_leaf.h"
26#include "xfs_attr_remote.h"
27#include "xfs_trans_space.h"
28#include "xfs_trace.h" 22#include "xfs_trace.h"
29#include "xfs_cksum.h"
30#include "xfs_buf_item.h"
31#include "xfs_error.h" 23#include "xfs_error.h"
32 24
33#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ 25#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
@@ -111,7 +103,7 @@ __xfs_attr3_rmt_read_verify(
111 bool check_crc, 103 bool check_crc,
112 xfs_failaddr_t *failaddr) 104 xfs_failaddr_t *failaddr)
113{ 105{
114 struct xfs_mount *mp = bp->b_target->bt_mount; 106 struct xfs_mount *mp = bp->b_mount;
115 char *ptr; 107 char *ptr;
116 int len; 108 int len;
117 xfs_daddr_t bno; 109 xfs_daddr_t bno;
@@ -175,7 +167,7 @@ static void
175xfs_attr3_rmt_write_verify( 167xfs_attr3_rmt_write_verify(
176 struct xfs_buf *bp) 168 struct xfs_buf *bp)
177{ 169{
178 struct xfs_mount *mp = bp->b_target->bt_mount; 170 struct xfs_mount *mp = bp->b_mount;
179 xfs_failaddr_t fa; 171 xfs_failaddr_t fa;
180 int blksize = mp->m_attr_geo->blksize; 172 int blksize = mp->m_attr_geo->blksize;
181 char *ptr; 173 char *ptr;
@@ -535,7 +527,7 @@ xfs_attr_rmtval_set(
535 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), 527 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
536 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); 528 dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
537 529
538 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, 0); 530 bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt);
539 if (!bp) 531 if (!bp)
540 return -ENOMEM; 532 return -ENOMEM;
541 bp->b_ops = &xfs_attr3_rmt_buf_ops; 533 bp->b_ops = &xfs_attr3_rmt_buf_ops;
diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c
index 40ce5f3094d1..7071ff98fdbc 100644
--- a/fs/xfs/libxfs/xfs_bit.c
+++ b/fs/xfs/libxfs/xfs_bit.c
@@ -5,7 +5,6 @@
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_log_format.h" 7#include "xfs_log_format.h"
8#include "xfs_bit.h"
9 8
10/* 9/*
11 * XFS bit manipulation routines, used in non-realtime code. 10 * XFS bit manipulation routines, used in non-realtime code.
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 356ebd1cbe82..baf0b72c0a37 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -13,14 +13,10 @@
13#include "xfs_sb.h" 13#include "xfs_sb.h"
14#include "xfs_mount.h" 14#include "xfs_mount.h"
15#include "xfs_defer.h" 15#include "xfs_defer.h"
16#include "xfs_da_format.h"
17#include "xfs_da_btree.h"
18#include "xfs_dir2.h" 16#include "xfs_dir2.h"
19#include "xfs_inode.h" 17#include "xfs_inode.h"
20#include "xfs_btree.h" 18#include "xfs_btree.h"
21#include "xfs_trans.h" 19#include "xfs_trans.h"
22#include "xfs_inode_item.h"
23#include "xfs_extfree_item.h"
24#include "xfs_alloc.h" 20#include "xfs_alloc.h"
25#include "xfs_bmap.h" 21#include "xfs_bmap.h"
26#include "xfs_bmap_util.h" 22#include "xfs_bmap_util.h"
@@ -32,7 +28,6 @@
32#include "xfs_trans_space.h" 28#include "xfs_trans_space.h"
33#include "xfs_buf_item.h" 29#include "xfs_buf_item.h"
34#include "xfs_trace.h" 30#include "xfs_trace.h"
35#include "xfs_symlink.h"
36#include "xfs_attr_leaf.h" 31#include "xfs_attr_leaf.h"
37#include "xfs_filestream.h" 32#include "xfs_filestream.h"
38#include "xfs_rmap.h" 33#include "xfs_rmap.h"
@@ -370,7 +365,7 @@ xfs_bmap_check_leaf_extents(
370 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); 365 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
371 if (!bp) { 366 if (!bp) {
372 bp_release = 1; 367 bp_release = 1;
373 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 368 error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
374 XFS_BMAP_BTREE_REF, 369 XFS_BMAP_BTREE_REF,
375 &xfs_bmbt_buf_ops); 370 &xfs_bmbt_buf_ops);
376 if (error) 371 if (error)
@@ -454,7 +449,7 @@ xfs_bmap_check_leaf_extents(
454 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno)); 449 bp = xfs_bmap_get_bp(cur, XFS_FSB_TO_DADDR(mp, bno));
455 if (!bp) { 450 if (!bp) {
456 bp_release = 1; 451 bp_release = 1;
457 error = xfs_btree_read_bufl(mp, NULL, bno, 0, &bp, 452 error = xfs_btree_read_bufl(mp, NULL, bno, &bp,
458 XFS_BMAP_BTREE_REF, 453 XFS_BMAP_BTREE_REF,
459 &xfs_bmbt_buf_ops); 454 &xfs_bmbt_buf_ops);
460 if (error) 455 if (error)
@@ -619,7 +614,7 @@ xfs_bmap_btree_to_extents(
619 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, 614 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
620 xfs_btree_check_lptr(cur, cbno, 1)); 615 xfs_btree_check_lptr(cur, cbno, 1));
621#endif 616#endif
622 error = xfs_btree_read_bufl(mp, tp, cbno, 0, &cbp, XFS_BMAP_BTREE_REF, 617 error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
623 &xfs_bmbt_buf_ops); 618 &xfs_bmbt_buf_ops);
624 if (error) 619 if (error)
625 return error; 620 return error;
@@ -732,7 +727,7 @@ xfs_bmap_extents_to_btree(
732 cur->bc_private.b.allocated++; 727 cur->bc_private.b.allocated++;
733 ip->i_d.di_nblocks++; 728 ip->i_d.di_nblocks++;
734 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); 729 xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
735 abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0); 730 abp = xfs_btree_get_bufl(mp, tp, args.fsbno);
736 if (!abp) { 731 if (!abp) {
737 error = -EFSCORRUPTED; 732 error = -EFSCORRUPTED;
738 goto out_unreserve_dquot; 733 goto out_unreserve_dquot;
@@ -878,7 +873,7 @@ xfs_bmap_local_to_extents(
878 ASSERT(args.fsbno != NULLFSBLOCK); 873 ASSERT(args.fsbno != NULLFSBLOCK);
879 ASSERT(args.len == 1); 874 ASSERT(args.len == 1);
880 tp->t_firstblock = args.fsbno; 875 tp->t_firstblock = args.fsbno;
881 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); 876 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno);
882 877
883 /* 878 /*
884 * Initialize the block, copy the data and log the remote buffer. 879 * Initialize the block, copy the data and log the remote buffer.
@@ -1203,7 +1198,7 @@ xfs_iread_extents(
1203 * pointer (leftmost) at each level. 1198 * pointer (leftmost) at each level.
1204 */ 1199 */
1205 while (level-- > 0) { 1200 while (level-- > 0) {
1206 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 1201 error = xfs_btree_read_bufl(mp, tp, bno, &bp,
1207 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); 1202 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1208 if (error) 1203 if (error)
1209 goto out; 1204 goto out;
@@ -1276,7 +1271,7 @@ xfs_iread_extents(
1276 */ 1271 */
1277 if (bno == NULLFSBLOCK) 1272 if (bno == NULLFSBLOCK)
1278 break; 1273 break;
1279 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 1274 error = xfs_btree_read_bufl(mp, tp, bno, &bp,
1280 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops); 1275 XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
1281 if (error) 1276 if (error)
1282 goto out; 1277 goto out;
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index aff82ed112c9..fbb18ba5d905 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -11,10 +11,8 @@
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_bit.h" 12#include "xfs_bit.h"
13#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_defer.h"
15#include "xfs_inode.h" 14#include "xfs_inode.h"
16#include "xfs_trans.h" 15#include "xfs_trans.h"
17#include "xfs_inode_item.h"
18#include "xfs_alloc.h" 16#include "xfs_alloc.h"
19#include "xfs_btree.h" 17#include "xfs_btree.h"
20#include "xfs_bmap_btree.h" 18#include "xfs_bmap_btree.h"
@@ -22,7 +20,6 @@
22#include "xfs_error.h" 20#include "xfs_error.h"
23#include "xfs_quota.h" 21#include "xfs_quota.h"
24#include "xfs_trace.h" 22#include "xfs_trace.h"
25#include "xfs_cksum.h"
26#include "xfs_rmap.h" 23#include "xfs_rmap.h"
27 24
28/* 25/*
@@ -411,7 +408,7 @@ static xfs_failaddr_t
411xfs_bmbt_verify( 408xfs_bmbt_verify(
412 struct xfs_buf *bp) 409 struct xfs_buf *bp)
413{ 410{
414 struct xfs_mount *mp = bp->b_target->bt_mount; 411 struct xfs_mount *mp = bp->b_mount;
415 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 412 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
416 xfs_failaddr_t fa; 413 xfs_failaddr_t fa;
417 unsigned int level; 414 unsigned int level;
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index bbdae2b4559f..f1048efa4268 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -11,16 +11,13 @@
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_bit.h" 12#include "xfs_bit.h"
13#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_defer.h"
15#include "xfs_inode.h" 14#include "xfs_inode.h"
16#include "xfs_trans.h" 15#include "xfs_trans.h"
17#include "xfs_inode_item.h"
18#include "xfs_buf_item.h" 16#include "xfs_buf_item.h"
19#include "xfs_btree.h" 17#include "xfs_btree.h"
20#include "xfs_errortag.h" 18#include "xfs_errortag.h"
21#include "xfs_error.h" 19#include "xfs_error.h"
22#include "xfs_trace.h" 20#include "xfs_trace.h"
23#include "xfs_cksum.h"
24#include "xfs_alloc.h" 21#include "xfs_alloc.h"
25#include "xfs_log.h" 22#include "xfs_log.h"
26 23
@@ -276,7 +273,7 @@ xfs_btree_lblock_calc_crc(
276 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 273 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
277 struct xfs_buf_log_item *bip = bp->b_log_item; 274 struct xfs_buf_log_item *bip = bp->b_log_item;
278 275
279 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 276 if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb))
280 return; 277 return;
281 if (bip) 278 if (bip)
282 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 279 block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
@@ -288,7 +285,7 @@ xfs_btree_lblock_verify_crc(
288 struct xfs_buf *bp) 285 struct xfs_buf *bp)
289{ 286{
290 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 287 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
291 struct xfs_mount *mp = bp->b_target->bt_mount; 288 struct xfs_mount *mp = bp->b_mount;
292 289
293 if (xfs_sb_version_hascrc(&mp->m_sb)) { 290 if (xfs_sb_version_hascrc(&mp->m_sb)) {
294 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn))) 291 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
@@ -314,7 +311,7 @@ xfs_btree_sblock_calc_crc(
314 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 311 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
315 struct xfs_buf_log_item *bip = bp->b_log_item; 312 struct xfs_buf_log_item *bip = bp->b_log_item;
316 313
317 if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 314 if (!xfs_sb_version_hascrc(&bp->b_mount->m_sb))
318 return; 315 return;
319 if (bip) 316 if (bip)
320 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); 317 block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
@@ -326,7 +323,7 @@ xfs_btree_sblock_verify_crc(
326 struct xfs_buf *bp) 323 struct xfs_buf *bp)
327{ 324{
328 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 325 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
329 struct xfs_mount *mp = bp->b_target->bt_mount; 326 struct xfs_mount *mp = bp->b_mount;
330 327
331 if (xfs_sb_version_hascrc(&mp->m_sb)) { 328 if (xfs_sb_version_hascrc(&mp->m_sb)) {
332 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn))) 329 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
@@ -691,14 +688,13 @@ xfs_buf_t * /* buffer for fsbno */
691xfs_btree_get_bufl( 688xfs_btree_get_bufl(
692 xfs_mount_t *mp, /* file system mount point */ 689 xfs_mount_t *mp, /* file system mount point */
693 xfs_trans_t *tp, /* transaction pointer */ 690 xfs_trans_t *tp, /* transaction pointer */
694 xfs_fsblock_t fsbno, /* file system block number */ 691 xfs_fsblock_t fsbno) /* file system block number */
695 uint lock) /* lock flags for get_buf */
696{ 692{
697 xfs_daddr_t d; /* real disk block address */ 693 xfs_daddr_t d; /* real disk block address */
698 694
699 ASSERT(fsbno != NULLFSBLOCK); 695 ASSERT(fsbno != NULLFSBLOCK);
700 d = XFS_FSB_TO_DADDR(mp, fsbno); 696 d = XFS_FSB_TO_DADDR(mp, fsbno);
701 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 697 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0);
702} 698}
703 699
704/* 700/*
@@ -710,15 +706,14 @@ xfs_btree_get_bufs(
710 xfs_mount_t *mp, /* file system mount point */ 706 xfs_mount_t *mp, /* file system mount point */
711 xfs_trans_t *tp, /* transaction pointer */ 707 xfs_trans_t *tp, /* transaction pointer */
712 xfs_agnumber_t agno, /* allocation group number */ 708 xfs_agnumber_t agno, /* allocation group number */
713 xfs_agblock_t agbno, /* allocation group block number */ 709 xfs_agblock_t agbno) /* allocation group block number */
714 uint lock) /* lock flags for get_buf */
715{ 710{
716 xfs_daddr_t d; /* real disk block address */ 711 xfs_daddr_t d; /* real disk block address */
717 712
718 ASSERT(agno != NULLAGNUMBER); 713 ASSERT(agno != NULLAGNUMBER);
719 ASSERT(agbno != NULLAGBLOCK); 714 ASSERT(agbno != NULLAGBLOCK);
720 d = XFS_AGB_TO_DADDR(mp, agno, agbno); 715 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
721 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); 716 return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0);
722} 717}
723 718
724/* 719/*
@@ -845,7 +840,6 @@ xfs_btree_read_bufl(
845 struct xfs_mount *mp, /* file system mount point */ 840 struct xfs_mount *mp, /* file system mount point */
846 struct xfs_trans *tp, /* transaction pointer */ 841 struct xfs_trans *tp, /* transaction pointer */
847 xfs_fsblock_t fsbno, /* file system block number */ 842 xfs_fsblock_t fsbno, /* file system block number */
848 uint lock, /* lock flags for read_buf */
849 struct xfs_buf **bpp, /* buffer for fsbno */ 843 struct xfs_buf **bpp, /* buffer for fsbno */
850 int refval, /* ref count value for buffer */ 844 int refval, /* ref count value for buffer */
851 const struct xfs_buf_ops *ops) 845 const struct xfs_buf_ops *ops)
@@ -858,7 +852,7 @@ xfs_btree_read_bufl(
858 return -EFSCORRUPTED; 852 return -EFSCORRUPTED;
859 d = XFS_FSB_TO_DADDR(mp, fsbno); 853 d = XFS_FSB_TO_DADDR(mp, fsbno);
860 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, 854 error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
861 mp->m_bsize, lock, &bp, ops); 855 mp->m_bsize, 0, &bp, ops);
862 if (error) 856 if (error)
863 return error; 857 return error;
864 if (bp) 858 if (bp)
@@ -1185,11 +1179,10 @@ xfs_btree_init_block(
1185 xfs_btnum_t btnum, 1179 xfs_btnum_t btnum,
1186 __u16 level, 1180 __u16 level,
1187 __u16 numrecs, 1181 __u16 numrecs,
1188 __u64 owner, 1182 __u64 owner)
1189 unsigned int flags)
1190{ 1183{
1191 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, 1184 xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn,
1192 btnum, level, numrecs, owner, flags); 1185 btnum, level, numrecs, owner, 0);
1193} 1186}
1194 1187
1195STATIC void 1188STATIC void
@@ -1288,7 +1281,6 @@ STATIC int
1288xfs_btree_get_buf_block( 1281xfs_btree_get_buf_block(
1289 struct xfs_btree_cur *cur, 1282 struct xfs_btree_cur *cur,
1290 union xfs_btree_ptr *ptr, 1283 union xfs_btree_ptr *ptr,
1291 int flags,
1292 struct xfs_btree_block **block, 1284 struct xfs_btree_block **block,
1293 struct xfs_buf **bpp) 1285 struct xfs_buf **bpp)
1294{ 1286{
@@ -1296,14 +1288,11 @@ xfs_btree_get_buf_block(
1296 xfs_daddr_t d; 1288 xfs_daddr_t d;
1297 int error; 1289 int error;
1298 1290
1299 /* need to sort out how callers deal with failures first */
1300 ASSERT(!(flags & XBF_TRYLOCK));
1301
1302 error = xfs_btree_ptr_to_daddr(cur, ptr, &d); 1291 error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
1303 if (error) 1292 if (error)
1304 return error; 1293 return error;
1305 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, 1294 *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1306 mp->m_bsize, flags); 1295 mp->m_bsize, 0);
1307 1296
1308 if (!*bpp) 1297 if (!*bpp)
1309 return -ENOMEM; 1298 return -ENOMEM;
@@ -2706,7 +2695,7 @@ __xfs_btree_split(
2706 XFS_BTREE_STATS_INC(cur, alloc); 2695 XFS_BTREE_STATS_INC(cur, alloc);
2707 2696
2708 /* Set up the new block as "right". */ 2697 /* Set up the new block as "right". */
2709 error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp); 2698 error = xfs_btree_get_buf_block(cur, &rptr, &right, &rbp);
2710 if (error) 2699 if (error)
2711 goto error0; 2700 goto error0;
2712 2701
@@ -2961,7 +2950,7 @@ xfs_btree_new_iroot(
2961 XFS_BTREE_STATS_INC(cur, alloc); 2950 XFS_BTREE_STATS_INC(cur, alloc);
2962 2951
2963 /* Copy the root into a real block. */ 2952 /* Copy the root into a real block. */
2964 error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp); 2953 error = xfs_btree_get_buf_block(cur, &nptr, &cblock, &cbp);
2965 if (error) 2954 if (error)
2966 goto error0; 2955 goto error0;
2967 2956
@@ -3058,7 +3047,7 @@ xfs_btree_new_root(
3058 XFS_BTREE_STATS_INC(cur, alloc); 3047 XFS_BTREE_STATS_INC(cur, alloc);
3059 3048
3060 /* Set up the new block. */ 3049 /* Set up the new block. */
3061 error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp); 3050 error = xfs_btree_get_buf_block(cur, &lptr, &new, &nbp);
3062 if (error) 3051 if (error)
3063 goto error0; 3052 goto error0;
3064 3053
@@ -4433,7 +4422,7 @@ xfs_btree_lblock_v5hdr_verify(
4433 struct xfs_buf *bp, 4422 struct xfs_buf *bp,
4434 uint64_t owner) 4423 uint64_t owner)
4435{ 4424{
4436 struct xfs_mount *mp = bp->b_target->bt_mount; 4425 struct xfs_mount *mp = bp->b_mount;
4437 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4426 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4438 4427
4439 if (!xfs_sb_version_hascrc(&mp->m_sb)) 4428 if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -4454,7 +4443,7 @@ xfs_btree_lblock_verify(
4454 struct xfs_buf *bp, 4443 struct xfs_buf *bp,
4455 unsigned int max_recs) 4444 unsigned int max_recs)
4456{ 4445{
4457 struct xfs_mount *mp = bp->b_target->bt_mount; 4446 struct xfs_mount *mp = bp->b_mount;
4458 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4447 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4459 4448
4460 /* numrecs verification */ 4449 /* numrecs verification */
@@ -4484,7 +4473,7 @@ xfs_failaddr_t
4484xfs_btree_sblock_v5hdr_verify( 4473xfs_btree_sblock_v5hdr_verify(
4485 struct xfs_buf *bp) 4474 struct xfs_buf *bp)
4486{ 4475{
4487 struct xfs_mount *mp = bp->b_target->bt_mount; 4476 struct xfs_mount *mp = bp->b_mount;
4488 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4477 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4489 struct xfs_perag *pag = bp->b_pag; 4478 struct xfs_perag *pag = bp->b_pag;
4490 4479
@@ -4510,7 +4499,7 @@ xfs_btree_sblock_verify(
4510 struct xfs_buf *bp, 4499 struct xfs_buf *bp,
4511 unsigned int max_recs) 4500 unsigned int max_recs)
4512{ 4501{
4513 struct xfs_mount *mp = bp->b_target->bt_mount; 4502 struct xfs_mount *mp = bp->b_mount;
4514 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4503 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4515 xfs_agblock_t agno; 4504 xfs_agblock_t agno;
4516 4505
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index e3b3e9dce5da..fa3cd8ab9aba 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -301,8 +301,7 @@ struct xfs_buf * /* buffer for fsbno */
301xfs_btree_get_bufl( 301xfs_btree_get_bufl(
302 struct xfs_mount *mp, /* file system mount point */ 302 struct xfs_mount *mp, /* file system mount point */
303 struct xfs_trans *tp, /* transaction pointer */ 303 struct xfs_trans *tp, /* transaction pointer */
304 xfs_fsblock_t fsbno, /* file system block number */ 304 xfs_fsblock_t fsbno); /* file system block number */
305 uint lock); /* lock flags for get_buf */
306 305
307/* 306/*
308 * Get a buffer for the block, return it with no data read. 307 * Get a buffer for the block, return it with no data read.
@@ -313,8 +312,7 @@ xfs_btree_get_bufs(
313 struct xfs_mount *mp, /* file system mount point */ 312 struct xfs_mount *mp, /* file system mount point */
314 struct xfs_trans *tp, /* transaction pointer */ 313 struct xfs_trans *tp, /* transaction pointer */
315 xfs_agnumber_t agno, /* allocation group number */ 314 xfs_agnumber_t agno, /* allocation group number */
316 xfs_agblock_t agbno, /* allocation group block number */ 315 xfs_agblock_t agbno); /* allocation group block number */
317 uint lock); /* lock flags for get_buf */
318 316
319/* 317/*
320 * Check for the cursor referring to the last block at the given level. 318 * Check for the cursor referring to the last block at the given level.
@@ -345,7 +343,6 @@ xfs_btree_read_bufl(
345 struct xfs_mount *mp, /* file system mount point */ 343 struct xfs_mount *mp, /* file system mount point */
346 struct xfs_trans *tp, /* transaction pointer */ 344 struct xfs_trans *tp, /* transaction pointer */
347 xfs_fsblock_t fsbno, /* file system block number */ 345 xfs_fsblock_t fsbno, /* file system block number */
348 uint lock, /* lock flags for read_buf */
349 struct xfs_buf **bpp, /* buffer for fsbno */ 346 struct xfs_buf **bpp, /* buffer for fsbno */
350 int refval, /* ref count value for buffer */ 347 int refval, /* ref count value for buffer */
351 const struct xfs_buf_ops *ops); 348 const struct xfs_buf_ops *ops);
@@ -383,8 +380,7 @@ xfs_btree_init_block(
383 xfs_btnum_t btnum, 380 xfs_btnum_t btnum,
384 __u16 level, 381 __u16 level,
385 __u16 numrecs, 382 __u16 numrecs,
386 __u64 owner, 383 __u64 owner);
387 unsigned int flags);
388 384
389void 385void
390xfs_btree_init_block_int( 386xfs_btree_init_block_int(
@@ -469,8 +465,8 @@ uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len);
469unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len); 465unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len);
470 466
471/* return codes */ 467/* return codes */
472#define XFS_BTREE_QUERY_RANGE_CONTINUE 0 /* keep iterating */ 468#define XFS_BTREE_QUERY_RANGE_CONTINUE (XFS_ITER_CONTINUE) /* keep iterating */
473#define XFS_BTREE_QUERY_RANGE_ABORT 1 /* stop iterating */ 469#define XFS_BTREE_QUERY_RANGE_ABORT (XFS_ITER_ABORT) /* stop iterating */
474typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur, 470typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
475 union xfs_btree_rec *rec, void *priv); 471 union xfs_btree_rec *rec, void *priv);
476 472
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e2737e2ac2ae..d1c77fd0815d 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -12,20 +12,14 @@
12#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
13#include "xfs_bit.h" 13#include "xfs_bit.h"
14#include "xfs_mount.h" 14#include "xfs_mount.h"
15#include "xfs_da_format.h"
16#include "xfs_da_btree.h"
17#include "xfs_dir2.h" 15#include "xfs_dir2.h"
18#include "xfs_dir2_priv.h" 16#include "xfs_dir2_priv.h"
19#include "xfs_inode.h" 17#include "xfs_inode.h"
20#include "xfs_trans.h" 18#include "xfs_trans.h"
21#include "xfs_inode_item.h"
22#include "xfs_alloc.h"
23#include "xfs_bmap.h" 19#include "xfs_bmap.h"
24#include "xfs_attr.h"
25#include "xfs_attr_leaf.h" 20#include "xfs_attr_leaf.h"
26#include "xfs_error.h" 21#include "xfs_error.h"
27#include "xfs_trace.h" 22#include "xfs_trace.h"
28#include "xfs_cksum.h"
29#include "xfs_buf_item.h" 23#include "xfs_buf_item.h"
30#include "xfs_log.h" 24#include "xfs_log.h"
31 25
@@ -126,7 +120,7 @@ xfs_da3_blkinfo_verify(
126 struct xfs_buf *bp, 120 struct xfs_buf *bp,
127 struct xfs_da3_blkinfo *hdr3) 121 struct xfs_da3_blkinfo *hdr3)
128{ 122{
129 struct xfs_mount *mp = bp->b_target->bt_mount; 123 struct xfs_mount *mp = bp->b_mount;
130 struct xfs_da_blkinfo *hdr = &hdr3->hdr; 124 struct xfs_da_blkinfo *hdr = &hdr3->hdr;
131 125
132 if (!xfs_verify_magic16(bp, hdr->magic)) 126 if (!xfs_verify_magic16(bp, hdr->magic))
@@ -148,7 +142,7 @@ static xfs_failaddr_t
148xfs_da3_node_verify( 142xfs_da3_node_verify(
149 struct xfs_buf *bp) 143 struct xfs_buf *bp)
150{ 144{
151 struct xfs_mount *mp = bp->b_target->bt_mount; 145 struct xfs_mount *mp = bp->b_mount;
152 struct xfs_da_intnode *hdr = bp->b_addr; 146 struct xfs_da_intnode *hdr = bp->b_addr;
153 struct xfs_da3_icnode_hdr ichdr; 147 struct xfs_da3_icnode_hdr ichdr;
154 const struct xfs_dir_ops *ops; 148 const struct xfs_dir_ops *ops;
@@ -186,7 +180,7 @@ static void
186xfs_da3_node_write_verify( 180xfs_da3_node_write_verify(
187 struct xfs_buf *bp) 181 struct xfs_buf *bp)
188{ 182{
189 struct xfs_mount *mp = bp->b_target->bt_mount; 183 struct xfs_mount *mp = bp->b_mount;
190 struct xfs_buf_log_item *bip = bp->b_log_item; 184 struct xfs_buf_log_item *bip = bp->b_log_item;
191 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 185 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
192 xfs_failaddr_t fa; 186 xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c
index b39053dcb643..b1ae572496b6 100644
--- a/fs/xfs/libxfs/xfs_da_format.c
+++ b/fs/xfs/libxfs/xfs_da_format.c
@@ -11,11 +11,8 @@
11#include "xfs_log_format.h" 11#include "xfs_log_format.h"
12#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
13#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_da_format.h"
15#include "xfs_da_btree.h"
16#include "xfs_inode.h" 14#include "xfs_inode.h"
17#include "xfs_dir2.h" 15#include "xfs_dir2.h"
18#include "xfs_dir2_priv.h"
19 16
20/* 17/*
21 * Shortform directory ops 18 * Shortform directory ops
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 1c6bf2105939..eb2be2a6a25a 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -9,8 +9,6 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h" 12#include "xfs_mount.h"
15#include "xfs_defer.h" 13#include "xfs_defer.h"
16#include "xfs_trans.h" 14#include "xfs_trans.h"
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 156ce95c9c45..67840723edbb 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -5,20 +5,16 @@
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_fs.h" 7#include "xfs_fs.h"
8#include "xfs_shared.h"
8#include "xfs_format.h" 9#include "xfs_format.h"
9#include "xfs_log_format.h" 10#include "xfs_log_format.h"
10#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 12#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_da_format.h"
14#include "xfs_da_btree.h"
15#include "xfs_inode.h" 13#include "xfs_inode.h"
16#include "xfs_trans.h" 14#include "xfs_trans.h"
17#include "xfs_inode_item.h"
18#include "xfs_bmap.h" 15#include "xfs_bmap.h"
19#include "xfs_dir2.h" 16#include "xfs_dir2.h"
20#include "xfs_dir2_priv.h" 17#include "xfs_dir2_priv.h"
21#include "xfs_ialloc.h"
22#include "xfs_errortag.h" 18#include "xfs_errortag.h"
23#include "xfs_error.h" 19#include "xfs_error.h"
24#include "xfs_trace.h" 20#include "xfs_trace.h"
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index b7d6d78f4ce2..a6fb0cc2085e 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -6,22 +6,19 @@
6 */ 6 */
7#include "xfs.h" 7#include "xfs.h"
8#include "xfs_fs.h" 8#include "xfs_fs.h"
9#include "xfs_shared.h"
9#include "xfs_format.h" 10#include "xfs_format.h"
10#include "xfs_log_format.h" 11#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
12#include "xfs_mount.h" 13#include "xfs_mount.h"
13#include "xfs_da_format.h"
14#include "xfs_da_btree.h"
15#include "xfs_inode.h" 14#include "xfs_inode.h"
16#include "xfs_trans.h" 15#include "xfs_trans.h"
17#include "xfs_inode_item.h"
18#include "xfs_bmap.h" 16#include "xfs_bmap.h"
19#include "xfs_buf_item.h" 17#include "xfs_buf_item.h"
20#include "xfs_dir2.h" 18#include "xfs_dir2.h"
21#include "xfs_dir2_priv.h" 19#include "xfs_dir2_priv.h"
22#include "xfs_error.h" 20#include "xfs_error.h"
23#include "xfs_trace.h" 21#include "xfs_trace.h"
24#include "xfs_cksum.h"
25#include "xfs_log.h" 22#include "xfs_log.h"
26 23
27/* 24/*
@@ -50,7 +47,7 @@ static xfs_failaddr_t
50xfs_dir3_block_verify( 47xfs_dir3_block_verify(
51 struct xfs_buf *bp) 48 struct xfs_buf *bp)
52{ 49{
53 struct xfs_mount *mp = bp->b_target->bt_mount; 50 struct xfs_mount *mp = bp->b_mount;
54 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 51 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
55 52
56 if (!xfs_verify_magic(bp, hdr3->magic)) 53 if (!xfs_verify_magic(bp, hdr3->magic))
@@ -71,7 +68,7 @@ static void
71xfs_dir3_block_read_verify( 68xfs_dir3_block_read_verify(
72 struct xfs_buf *bp) 69 struct xfs_buf *bp)
73{ 70{
74 struct xfs_mount *mp = bp->b_target->bt_mount; 71 struct xfs_mount *mp = bp->b_mount;
75 xfs_failaddr_t fa; 72 xfs_failaddr_t fa;
76 73
77 if (xfs_sb_version_hascrc(&mp->m_sb) && 74 if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -88,7 +85,7 @@ static void
88xfs_dir3_block_write_verify( 85xfs_dir3_block_write_verify(
89 struct xfs_buf *bp) 86 struct xfs_buf *bp)
90{ 87{
91 struct xfs_mount *mp = bp->b_target->bt_mount; 88 struct xfs_mount *mp = bp->b_mount;
92 struct xfs_buf_log_item *bip = bp->b_log_item; 89 struct xfs_buf_log_item *bip = bp->b_log_item;
93 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 90 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
94 xfs_failaddr_t fa; 91 xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index b7b9ce002cb9..2c79be4c3153 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -6,19 +6,16 @@
6 */ 6 */
7#include "xfs.h" 7#include "xfs.h"
8#include "xfs_fs.h" 8#include "xfs_fs.h"
9#include "xfs_shared.h"
9#include "xfs_format.h" 10#include "xfs_format.h"
10#include "xfs_log_format.h" 11#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
12#include "xfs_mount.h" 13#include "xfs_mount.h"
13#include "xfs_da_format.h"
14#include "xfs_da_btree.h"
15#include "xfs_inode.h" 14#include "xfs_inode.h"
16#include "xfs_dir2.h" 15#include "xfs_dir2.h"
17#include "xfs_dir2_priv.h"
18#include "xfs_error.h" 16#include "xfs_error.h"
19#include "xfs_trans.h" 17#include "xfs_trans.h"
20#include "xfs_buf_item.h" 18#include "xfs_buf_item.h"
21#include "xfs_cksum.h"
22#include "xfs_log.h" 19#include "xfs_log.h"
23 20
24static xfs_failaddr_t xfs_dir2_data_freefind_verify( 21static xfs_failaddr_t xfs_dir2_data_freefind_verify(
@@ -50,14 +47,13 @@ __xfs_dir3_data_check(
50 int i; /* leaf index */ 47 int i; /* leaf index */
51 int lastfree; /* last entry was unused */ 48 int lastfree; /* last entry was unused */
52 xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */ 49 xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */
53 xfs_mount_t *mp; /* filesystem mount point */ 50 struct xfs_mount *mp = bp->b_mount;
54 char *p; /* current data position */ 51 char *p; /* current data position */
55 int stale; /* count of stale leaves */ 52 int stale; /* count of stale leaves */
56 struct xfs_name name; 53 struct xfs_name name;
57 const struct xfs_dir_ops *ops; 54 const struct xfs_dir_ops *ops;
58 struct xfs_da_geometry *geo; 55 struct xfs_da_geometry *geo;
59 56
60 mp = bp->b_target->bt_mount;
61 geo = mp->m_dir_geo; 57 geo = mp->m_dir_geo;
62 58
63 /* 59 /*
@@ -249,7 +245,7 @@ static xfs_failaddr_t
249xfs_dir3_data_verify( 245xfs_dir3_data_verify(
250 struct xfs_buf *bp) 246 struct xfs_buf *bp)
251{ 247{
252 struct xfs_mount *mp = bp->b_target->bt_mount; 248 struct xfs_mount *mp = bp->b_mount;
253 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 249 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
254 250
255 if (!xfs_verify_magic(bp, hdr3->magic)) 251 if (!xfs_verify_magic(bp, hdr3->magic))
@@ -298,7 +294,7 @@ static void
298xfs_dir3_data_read_verify( 294xfs_dir3_data_read_verify(
299 struct xfs_buf *bp) 295 struct xfs_buf *bp)
300{ 296{
301 struct xfs_mount *mp = bp->b_target->bt_mount; 297 struct xfs_mount *mp = bp->b_mount;
302 xfs_failaddr_t fa; 298 xfs_failaddr_t fa;
303 299
304 if (xfs_sb_version_hascrc(&mp->m_sb) && 300 if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -315,7 +311,7 @@ static void
315xfs_dir3_data_write_verify( 311xfs_dir3_data_write_verify(
316 struct xfs_buf *bp) 312 struct xfs_buf *bp)
317{ 313{
318 struct xfs_mount *mp = bp->b_target->bt_mount; 314 struct xfs_mount *mp = bp->b_mount;
319 struct xfs_buf_log_item *bip = bp->b_log_item; 315 struct xfs_buf_log_item *bip = bp->b_log_item;
320 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 316 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
321 xfs_failaddr_t fa; 317 xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 9c2a0a13ed61..a53e4585a2f3 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -6,12 +6,11 @@
6 */ 6 */
7#include "xfs.h" 7#include "xfs.h"
8#include "xfs_fs.h" 8#include "xfs_fs.h"
9#include "xfs_shared.h"
9#include "xfs_format.h" 10#include "xfs_format.h"
10#include "xfs_log_format.h" 11#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
12#include "xfs_mount.h" 13#include "xfs_mount.h"
13#include "xfs_da_format.h"
14#include "xfs_da_btree.h"
15#include "xfs_inode.h" 14#include "xfs_inode.h"
16#include "xfs_bmap.h" 15#include "xfs_bmap.h"
17#include "xfs_dir2.h" 16#include "xfs_dir2.h"
@@ -20,8 +19,6 @@
20#include "xfs_trace.h" 19#include "xfs_trace.h"
21#include "xfs_trans.h" 20#include "xfs_trans.h"
22#include "xfs_buf_item.h" 21#include "xfs_buf_item.h"
23#include "xfs_cksum.h"
24#include "xfs_log.h"
25 22
26/* 23/*
27 * Local function declarations. 24 * Local function declarations.
@@ -144,7 +141,7 @@ static xfs_failaddr_t
144xfs_dir3_leaf_verify( 141xfs_dir3_leaf_verify(
145 struct xfs_buf *bp) 142 struct xfs_buf *bp)
146{ 143{
147 struct xfs_mount *mp = bp->b_target->bt_mount; 144 struct xfs_mount *mp = bp->b_mount;
148 struct xfs_dir2_leaf *leaf = bp->b_addr; 145 struct xfs_dir2_leaf *leaf = bp->b_addr;
149 xfs_failaddr_t fa; 146 xfs_failaddr_t fa;
150 147
@@ -159,7 +156,7 @@ static void
159xfs_dir3_leaf_read_verify( 156xfs_dir3_leaf_read_verify(
160 struct xfs_buf *bp) 157 struct xfs_buf *bp)
161{ 158{
162 struct xfs_mount *mp = bp->b_target->bt_mount; 159 struct xfs_mount *mp = bp->b_mount;
163 xfs_failaddr_t fa; 160 xfs_failaddr_t fa;
164 161
165 if (xfs_sb_version_hascrc(&mp->m_sb) && 162 if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -176,7 +173,7 @@ static void
176xfs_dir3_leaf_write_verify( 173xfs_dir3_leaf_write_verify(
177 struct xfs_buf *bp) 174 struct xfs_buf *bp)
178{ 175{
179 struct xfs_mount *mp = bp->b_target->bt_mount; 176 struct xfs_mount *mp = bp->b_mount;
180 struct xfs_buf_log_item *bip = bp->b_log_item; 177 struct xfs_buf_log_item *bip = bp->b_log_item;
181 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; 178 struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
182 xfs_failaddr_t fa; 179 xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 16731d2d684b..afcc6642690a 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -6,12 +6,11 @@
6 */ 6 */
7#include "xfs.h" 7#include "xfs.h"
8#include "xfs_fs.h" 8#include "xfs_fs.h"
9#include "xfs_shared.h"
9#include "xfs_format.h" 10#include "xfs_format.h"
10#include "xfs_log_format.h" 11#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
12#include "xfs_mount.h" 13#include "xfs_mount.h"
13#include "xfs_da_format.h"
14#include "xfs_da_btree.h"
15#include "xfs_inode.h" 14#include "xfs_inode.h"
16#include "xfs_bmap.h" 15#include "xfs_bmap.h"
17#include "xfs_dir2.h" 16#include "xfs_dir2.h"
@@ -20,7 +19,6 @@
20#include "xfs_trace.h" 19#include "xfs_trace.h"
21#include "xfs_trans.h" 20#include "xfs_trans.h"
22#include "xfs_buf_item.h" 21#include "xfs_buf_item.h"
23#include "xfs_cksum.h"
24#include "xfs_log.h" 22#include "xfs_log.h"
25 23
26/* 24/*
@@ -84,7 +82,7 @@ static xfs_failaddr_t
84xfs_dir3_free_verify( 82xfs_dir3_free_verify(
85 struct xfs_buf *bp) 83 struct xfs_buf *bp)
86{ 84{
87 struct xfs_mount *mp = bp->b_target->bt_mount; 85 struct xfs_mount *mp = bp->b_mount;
88 struct xfs_dir2_free_hdr *hdr = bp->b_addr; 86 struct xfs_dir2_free_hdr *hdr = bp->b_addr;
89 87
90 if (!xfs_verify_magic(bp, hdr->magic)) 88 if (!xfs_verify_magic(bp, hdr->magic))
@@ -110,7 +108,7 @@ static void
110xfs_dir3_free_read_verify( 108xfs_dir3_free_read_verify(
111 struct xfs_buf *bp) 109 struct xfs_buf *bp)
112{ 110{
113 struct xfs_mount *mp = bp->b_target->bt_mount; 111 struct xfs_mount *mp = bp->b_mount;
114 xfs_failaddr_t fa; 112 xfs_failaddr_t fa;
115 113
116 if (xfs_sb_version_hascrc(&mp->m_sb) && 114 if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -127,7 +125,7 @@ static void
127xfs_dir3_free_write_verify( 125xfs_dir3_free_write_verify(
128 struct xfs_buf *bp) 126 struct xfs_buf *bp)
129{ 127{
130 struct xfs_mount *mp = bp->b_target->bt_mount; 128 struct xfs_mount *mp = bp->b_mount;
131 struct xfs_buf_log_item *bip = bp->b_log_item; 129 struct xfs_buf_log_item *bip = bp->b_log_item;
132 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; 130 struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
133 xfs_failaddr_t fa; 131 xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 585dfdb7b6b6..033589257f54 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -5,16 +5,13 @@
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_fs.h" 7#include "xfs_fs.h"
8#include "xfs_shared.h"
8#include "xfs_format.h" 9#include "xfs_format.h"
9#include "xfs_log_format.h" 10#include "xfs_log_format.h"
10#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 12#include "xfs_mount.h"
12#include "xfs_da_format.h"
13#include "xfs_da_btree.h"
14#include "xfs_inode.h" 13#include "xfs_inode.h"
15#include "xfs_trans.h" 14#include "xfs_trans.h"
16#include "xfs_inode_item.h"
17#include "xfs_error.h"
18#include "xfs_dir2.h" 15#include "xfs_dir2.h"
19#include "xfs_dir2_priv.h" 16#include "xfs_dir2_priv.h"
20#include "xfs_trace.h" 17#include "xfs_trace.h"
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 88fa11071f9f..e8bd688a4073 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -16,8 +16,6 @@
16#include "xfs_trans.h" 16#include "xfs_trans.h"
17#include "xfs_qm.h" 17#include "xfs_qm.h"
18#include "xfs_error.h" 18#include "xfs_error.h"
19#include "xfs_cksum.h"
20#include "xfs_trace.h"
21 19
22int 20int
23xfs_calc_dquots_per_chunk( 21xfs_calc_dquots_per_chunk(
@@ -224,7 +222,7 @@ static xfs_failaddr_t
224xfs_dquot_buf_verify_struct( 222xfs_dquot_buf_verify_struct(
225 struct xfs_buf *bp) 223 struct xfs_buf *bp)
226{ 224{
227 struct xfs_mount *mp = bp->b_target->bt_mount; 225 struct xfs_mount *mp = bp->b_mount;
228 226
229 return xfs_dquot_buf_verify(mp, bp, false); 227 return xfs_dquot_buf_verify(mp, bp, false);
230} 228}
@@ -233,7 +231,7 @@ static void
233xfs_dquot_buf_read_verify( 231xfs_dquot_buf_read_verify(
234 struct xfs_buf *bp) 232 struct xfs_buf *bp)
235{ 233{
236 struct xfs_mount *mp = bp->b_target->bt_mount; 234 struct xfs_mount *mp = bp->b_mount;
237 235
238 if (!xfs_dquot_buf_verify_crc(mp, bp, false)) 236 if (!xfs_dquot_buf_verify_crc(mp, bp, false))
239 return; 237 return;
@@ -250,7 +248,7 @@ static void
250xfs_dquot_buf_readahead_verify( 248xfs_dquot_buf_readahead_verify(
251 struct xfs_buf *bp) 249 struct xfs_buf *bp)
252{ 250{
253 struct xfs_mount *mp = bp->b_target->bt_mount; 251 struct xfs_mount *mp = bp->b_mount;
254 252
255 if (!xfs_dquot_buf_verify_crc(mp, bp, true) || 253 if (!xfs_dquot_buf_verify_crc(mp, bp, true) ||
256 xfs_dquot_buf_verify(mp, bp, true) != NULL) { 254 xfs_dquot_buf_verify(mp, bp, true) != NULL) {
@@ -268,7 +266,7 @@ static void
268xfs_dquot_buf_write_verify( 266xfs_dquot_buf_write_verify(
269 struct xfs_buf *bp) 267 struct xfs_buf *bp)
270{ 268{
271 struct xfs_mount *mp = bp->b_target->bt_mount; 269 struct xfs_mount *mp = bp->b_mount;
272 270
273 xfs_dquot_buf_verify(mp, bp, false); 271 xfs_dquot_buf_verify(mp, bp, false);
274} 272}
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 9bb3c48843ec..c968b60cee15 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -1071,7 +1071,7 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
1071#define XFS_INO_MASK(k) (uint32_t)((1ULL << (k)) - 1) 1071#define XFS_INO_MASK(k) (uint32_t)((1ULL << (k)) - 1)
1072#define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog 1072#define XFS_INO_OFFSET_BITS(mp) (mp)->m_sb.sb_inopblog
1073#define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog 1073#define XFS_INO_AGBNO_BITS(mp) (mp)->m_sb.sb_agblklog
1074#define XFS_INO_AGINO_BITS(mp) (mp)->m_agino_log 1074#define XFS_INO_AGINO_BITS(mp) ((mp)->m_ino_geo.agino_log)
1075#define XFS_INO_AGNO_BITS(mp) (mp)->m_agno_log 1075#define XFS_INO_AGNO_BITS(mp) (mp)->m_agno_log
1076#define XFS_INO_BITS(mp) \ 1076#define XFS_INO_BITS(mp) \
1077 XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp) 1077 XFS_INO_AGNO_BITS(mp) + XFS_INO_AGINO_BITS(mp)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index e7382c780ed7..52d03a3a02a4 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -97,7 +97,7 @@ struct getbmapx {
97 * For use by backup and restore programs to set the XFS on-disk inode 97 * For use by backup and restore programs to set the XFS on-disk inode
98 * fields di_dmevmask and di_dmstate. These must be set to exactly and 98 * fields di_dmevmask and di_dmstate. These must be set to exactly and
99 * only values previously obtained via xfs_bulkstat! (Specifically the 99 * only values previously obtained via xfs_bulkstat! (Specifically the
100 * xfs_bstat_t fields bs_dmevmask and bs_dmstate.) 100 * struct xfs_bstat fields bs_dmevmask and bs_dmstate.)
101 */ 101 */
102#ifndef HAVE_FSDMIDATA 102#ifndef HAVE_FSDMIDATA
103struct fsdmidata { 103struct fsdmidata {
@@ -328,7 +328,7 @@ typedef struct xfs_bstime {
328 __s32 tv_nsec; /* and nanoseconds */ 328 __s32 tv_nsec; /* and nanoseconds */
329} xfs_bstime_t; 329} xfs_bstime_t;
330 330
331typedef struct xfs_bstat { 331struct xfs_bstat {
332 __u64 bs_ino; /* inode number */ 332 __u64 bs_ino; /* inode number */
333 __u16 bs_mode; /* type and mode */ 333 __u16 bs_mode; /* type and mode */
334 __u16 bs_nlink; /* number of links */ 334 __u16 bs_nlink; /* number of links */
@@ -356,7 +356,53 @@ typedef struct xfs_bstat {
356 __u32 bs_dmevmask; /* DMIG event mask */ 356 __u32 bs_dmevmask; /* DMIG event mask */
357 __u16 bs_dmstate; /* DMIG state info */ 357 __u16 bs_dmstate; /* DMIG state info */
358 __u16 bs_aextents; /* attribute number of extents */ 358 __u16 bs_aextents; /* attribute number of extents */
359} xfs_bstat_t; 359};
360
361/* New bulkstat structure that reports v5 features and fixes padding issues */
362struct xfs_bulkstat {
363 uint64_t bs_ino; /* inode number */
364 uint64_t bs_size; /* file size */
365
366 uint64_t bs_blocks; /* number of blocks */
367 uint64_t bs_xflags; /* extended flags */
368
369 uint64_t bs_atime; /* access time, seconds */
370 uint64_t bs_mtime; /* modify time, seconds */
371
372 uint64_t bs_ctime; /* inode change time, seconds */
373 uint64_t bs_btime; /* creation time, seconds */
374
375 uint32_t bs_gen; /* generation count */
376 uint32_t bs_uid; /* user id */
377 uint32_t bs_gid; /* group id */
378 uint32_t bs_projectid; /* project id */
379
380 uint32_t bs_atime_nsec; /* access time, nanoseconds */
381 uint32_t bs_mtime_nsec; /* modify time, nanoseconds */
382 uint32_t bs_ctime_nsec; /* inode change time, nanoseconds */
383 uint32_t bs_btime_nsec; /* creation time, nanoseconds */
384
385 uint32_t bs_blksize; /* block size */
386 uint32_t bs_rdev; /* device value */
387 uint32_t bs_cowextsize_blks; /* cow extent size hint, blocks */
388 uint32_t bs_extsize_blks; /* extent size hint, blocks */
389
390 uint32_t bs_nlink; /* number of links */
391 uint32_t bs_extents; /* number of extents */
392 uint32_t bs_aextents; /* attribute number of extents */
393 uint16_t bs_version; /* structure version */
394 uint16_t bs_forkoff; /* inode fork offset in bytes */
395
396 uint16_t bs_sick; /* sick inode metadata */
397 uint16_t bs_checked; /* checked inode metadata */
398 uint16_t bs_mode; /* type and mode */
399 uint16_t bs_pad2; /* zeroed */
400
401 uint64_t bs_pad[7]; /* zeroed */
402};
403
404#define XFS_BULKSTAT_VERSION_V1 (1)
405#define XFS_BULKSTAT_VERSION_V5 (5)
360 406
361/* bs_sick flags */ 407/* bs_sick flags */
362#define XFS_BS_SICK_INODE (1 << 0) /* inode core */ 408#define XFS_BS_SICK_INODE (1 << 0) /* inode core */
@@ -374,7 +420,7 @@ typedef struct xfs_bstat {
374 * to retain compatibility with "old" filesystems). 420 * to retain compatibility with "old" filesystems).
375 */ 421 */
376static inline uint32_t 422static inline uint32_t
377bstat_get_projid(struct xfs_bstat *bs) 423bstat_get_projid(const struct xfs_bstat *bs)
378{ 424{
379 return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo; 425 return (uint32_t)bs->bs_projid_hi << 16 | bs->bs_projid_lo;
380} 426}
@@ -382,23 +428,79 @@ bstat_get_projid(struct xfs_bstat *bs)
382/* 428/*
383 * The user-level BulkStat Request interface structure. 429 * The user-level BulkStat Request interface structure.
384 */ 430 */
385typedef struct xfs_fsop_bulkreq { 431struct xfs_fsop_bulkreq {
386 __u64 __user *lastip; /* last inode # pointer */ 432 __u64 __user *lastip; /* last inode # pointer */
387 __s32 icount; /* count of entries in buffer */ 433 __s32 icount; /* count of entries in buffer */
388 void __user *ubuffer;/* user buffer for inode desc. */ 434 void __user *ubuffer;/* user buffer for inode desc. */
389 __s32 __user *ocount; /* output count pointer */ 435 __s32 __user *ocount; /* output count pointer */
390} xfs_fsop_bulkreq_t; 436};
391
392 437
393/* 438/*
394 * Structures returned from xfs_inumbers routine (XFS_IOC_FSINUMBERS). 439 * Structures returned from xfs_inumbers routine (XFS_IOC_FSINUMBERS).
395 */ 440 */
396typedef struct xfs_inogrp { 441struct xfs_inogrp {
397 __u64 xi_startino; /* starting inode number */ 442 __u64 xi_startino; /* starting inode number */
398 __s32 xi_alloccount; /* # bits set in allocmask */ 443 __s32 xi_alloccount; /* # bits set in allocmask */
399 __u64 xi_allocmask; /* mask of allocated inodes */ 444 __u64 xi_allocmask; /* mask of allocated inodes */
400} xfs_inogrp_t; 445};
401 446
447/* New inumbers structure that reports v5 features and fixes padding issues */
448struct xfs_inumbers {
449 uint64_t xi_startino; /* starting inode number */
450 uint64_t xi_allocmask; /* mask of allocated inodes */
451 uint8_t xi_alloccount; /* # bits set in allocmask */
452 uint8_t xi_version; /* version */
453 uint8_t xi_padding[6]; /* zero */
454};
455
456#define XFS_INUMBERS_VERSION_V1 (1)
457#define XFS_INUMBERS_VERSION_V5 (5)
458
459/* Header for bulk inode requests. */
460struct xfs_bulk_ireq {
461 uint64_t ino; /* I/O: start with this inode */
462 uint32_t flags; /* I/O: operation flags */
463 uint32_t icount; /* I: count of entries in buffer */
464 uint32_t ocount; /* O: count of entries filled out */
465 uint32_t agno; /* I: see comment for IREQ_AGNO */
466 uint64_t reserved[5]; /* must be zero */
467};
468
469/*
470 * Only return results from the specified @agno. If @ino is zero, start
471 * with the first inode of @agno.
472 */
473#define XFS_BULK_IREQ_AGNO (1 << 0)
474
475/*
476 * Return bulkstat information for a single inode, where @ino value is a
477 * special value, not a literal inode number. See the XFS_BULK_IREQ_SPECIAL_*
478 * values below. Not compatible with XFS_BULK_IREQ_AGNO.
479 */
480#define XFS_BULK_IREQ_SPECIAL (1 << 1)
481
482#define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \
483 XFS_BULK_IREQ_SPECIAL)
484
485/* Operate on the root directory inode. */
486#define XFS_BULK_IREQ_SPECIAL_ROOT (1)
487
488/*
489 * ioctl structures for v5 bulkstat and inumbers requests
490 */
491struct xfs_bulkstat_req {
492 struct xfs_bulk_ireq hdr;
493 struct xfs_bulkstat bulkstat[];
494};
495#define XFS_BULKSTAT_REQ_SIZE(nr) (sizeof(struct xfs_bulkstat_req) + \
496 (nr) * sizeof(struct xfs_bulkstat))
497
498struct xfs_inumbers_req {
499 struct xfs_bulk_ireq hdr;
500 struct xfs_inumbers inumbers[];
501};
502#define XFS_INUMBERS_REQ_SIZE(nr) (sizeof(struct xfs_inumbers_req) + \
503 (nr) * sizeof(struct xfs_inumbers))
402 504
403/* 505/*
404 * Error injection. 506 * Error injection.
@@ -529,7 +631,7 @@ typedef struct xfs_swapext
529 xfs_off_t sx_offset; /* offset into file */ 631 xfs_off_t sx_offset; /* offset into file */
530 xfs_off_t sx_length; /* leng from offset */ 632 xfs_off_t sx_length; /* leng from offset */
531 char sx_pad[16]; /* pad space, unused */ 633 char sx_pad[16]; /* pad space, unused */
532 xfs_bstat_t sx_stat; /* stat of target b4 copy */ 634 struct xfs_bstat sx_stat; /* stat of target b4 copy */
533} xfs_swapext_t; 635} xfs_swapext_t;
534 636
535/* 637/*
@@ -701,6 +803,8 @@ struct xfs_scrub_metadata {
701#define XFS_IOC_FSGEOMETRY_V4 _IOR ('X', 124, struct xfs_fsop_geom_v4) 803#define XFS_IOC_FSGEOMETRY_V4 _IOR ('X', 124, struct xfs_fsop_geom_v4)
702#define XFS_IOC_GOINGDOWN _IOR ('X', 125, uint32_t) 804#define XFS_IOC_GOINGDOWN _IOR ('X', 125, uint32_t)
703#define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom) 805#define XFS_IOC_FSGEOMETRY _IOR ('X', 126, struct xfs_fsop_geom)
806#define XFS_IOC_BULKSTAT _IOR ('X', 127, struct xfs_bulkstat_req)
807#define XFS_IOC_INUMBERS _IOR ('X', 128, struct xfs_inumbers_req)
704/* XFS_IOC_GETFSUUID ---------- deprecated 140 */ 808/* XFS_IOC_GETFSUUID ---------- deprecated 140 */
705 809
706 810
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
index 49ddfeac19f2..272005ac8c88 100644
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -185,6 +185,6 @@ xfs_inode_is_healthy(struct xfs_inode *ip)
185 185
186void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo); 186void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo);
187void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo); 187void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
188void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bstat *bs); 188void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs);
189 189
190#endif /* __XFS_HEALTH_H__ */ 190#endif /* __XFS_HEALTH_H__ */
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index fe9898875097..04377ab75863 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -12,17 +12,14 @@
12#include "xfs_bit.h" 12#include "xfs_bit.h"
13#include "xfs_sb.h" 13#include "xfs_sb.h"
14#include "xfs_mount.h" 14#include "xfs_mount.h"
15#include "xfs_defer.h"
16#include "xfs_inode.h" 15#include "xfs_inode.h"
17#include "xfs_btree.h" 16#include "xfs_btree.h"
18#include "xfs_ialloc.h" 17#include "xfs_ialloc.h"
19#include "xfs_ialloc_btree.h" 18#include "xfs_ialloc_btree.h"
20#include "xfs_alloc.h" 19#include "xfs_alloc.h"
21#include "xfs_rtalloc.h"
22#include "xfs_errortag.h" 20#include "xfs_errortag.h"
23#include "xfs_error.h" 21#include "xfs_error.h"
24#include "xfs_bmap.h" 22#include "xfs_bmap.h"
25#include "xfs_cksum.h"
26#include "xfs_trans.h" 23#include "xfs_trans.h"
27#include "xfs_buf_item.h" 24#include "xfs_buf_item.h"
28#include "xfs_icreate_item.h" 25#include "xfs_icreate_item.h"
@@ -31,20 +28,6 @@
31#include "xfs_log.h" 28#include "xfs_log.h"
32#include "xfs_rmap.h" 29#include "xfs_rmap.h"
33 30
34
35/*
36 * Allocation group level functions.
37 */
38int
39xfs_ialloc_cluster_alignment(
40 struct xfs_mount *mp)
41{
42 if (xfs_sb_version_hasalign(&mp->m_sb) &&
43 mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
44 return mp->m_sb.sb_inoalignmt;
45 return 1;
46}
47
48/* 31/*
49 * Lookup a record by ino in the btree given by cur. 32 * Lookup a record by ino in the btree given by cur.
50 */ 33 */
@@ -299,7 +282,7 @@ xfs_ialloc_inode_init(
299 * sizes, manipulate the inodes in buffers which are multiples of the 282 * sizes, manipulate the inodes in buffers which are multiples of the
300 * blocks size. 283 * blocks size.
301 */ 284 */
302 nbufs = length / mp->m_blocks_per_cluster; 285 nbufs = length / M_IGEO(mp)->blocks_per_cluster;
303 286
304 /* 287 /*
305 * Figure out what version number to use in the inodes we create. If 288 * Figure out what version number to use in the inodes we create. If
@@ -343,9 +326,10 @@ xfs_ialloc_inode_init(
343 * Get the block. 326 * Get the block.
344 */ 327 */
345 d = XFS_AGB_TO_DADDR(mp, agno, agbno + 328 d = XFS_AGB_TO_DADDR(mp, agno, agbno +
346 (j * mp->m_blocks_per_cluster)); 329 (j * M_IGEO(mp)->blocks_per_cluster));
347 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, 330 fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
348 mp->m_bsize * mp->m_blocks_per_cluster, 331 mp->m_bsize *
332 M_IGEO(mp)->blocks_per_cluster,
349 XBF_UNMAPPED); 333 XBF_UNMAPPED);
350 if (!fbuf) 334 if (!fbuf)
351 return -ENOMEM; 335 return -ENOMEM;
@@ -353,7 +337,7 @@ xfs_ialloc_inode_init(
353 /* Initialize the inode buffers and log them appropriately. */ 337 /* Initialize the inode buffers and log them appropriately. */
354 fbuf->b_ops = &xfs_inode_buf_ops; 338 fbuf->b_ops = &xfs_inode_buf_ops;
355 xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); 339 xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
356 for (i = 0; i < mp->m_inodes_per_cluster; i++) { 340 for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
357 int ioffset = i << mp->m_sb.sb_inodelog; 341 int ioffset = i << mp->m_sb.sb_inodelog;
358 uint isize = xfs_dinode_size(version); 342 uint isize = xfs_dinode_size(version);
359 343
@@ -616,24 +600,26 @@ error:
616 * Allocate new inodes in the allocation group specified by agbp. 600 * Allocate new inodes in the allocation group specified by agbp.
617 * Return 0 for success, else error code. 601 * Return 0 for success, else error code.
618 */ 602 */
619STATIC int /* error code or 0 */ 603STATIC int
620xfs_ialloc_ag_alloc( 604xfs_ialloc_ag_alloc(
621 xfs_trans_t *tp, /* transaction pointer */ 605 struct xfs_trans *tp,
622 xfs_buf_t *agbp, /* alloc group buffer */ 606 struct xfs_buf *agbp,
623 int *alloc) 607 int *alloc)
624{ 608{
625 xfs_agi_t *agi; /* allocation group header */ 609 struct xfs_agi *agi;
626 xfs_alloc_arg_t args; /* allocation argument structure */ 610 struct xfs_alloc_arg args;
627 xfs_agnumber_t agno; 611 xfs_agnumber_t agno;
628 int error; 612 int error;
629 xfs_agino_t newino; /* new first inode's number */ 613 xfs_agino_t newino; /* new first inode's number */
630 xfs_agino_t newlen; /* new number of inodes */ 614 xfs_agino_t newlen; /* new number of inodes */
631 int isaligned = 0; /* inode allocation at stripe unit */ 615 int isaligned = 0; /* inode allocation at stripe */
632 /* boundary */ 616 /* unit boundary */
633 uint16_t allocmask = (uint16_t) -1; /* init. to full chunk */ 617 /* init. to full chunk */
618 uint16_t allocmask = (uint16_t) -1;
634 struct xfs_inobt_rec_incore rec; 619 struct xfs_inobt_rec_incore rec;
635 struct xfs_perag *pag; 620 struct xfs_perag *pag;
636 int do_sparse = 0; 621 struct xfs_ino_geometry *igeo = M_IGEO(tp->t_mountp);
622 int do_sparse = 0;
637 623
638 memset(&args, 0, sizeof(args)); 624 memset(&args, 0, sizeof(args));
639 args.tp = tp; 625 args.tp = tp;
@@ -644,7 +630,7 @@ xfs_ialloc_ag_alloc(
644#ifdef DEBUG 630#ifdef DEBUG
645 /* randomly do sparse inode allocations */ 631 /* randomly do sparse inode allocations */
646 if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) && 632 if (xfs_sb_version_hassparseinodes(&tp->t_mountp->m_sb) &&
647 args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks) 633 igeo->ialloc_min_blks < igeo->ialloc_blks)
648 do_sparse = prandom_u32() & 1; 634 do_sparse = prandom_u32() & 1;
649#endif 635#endif
650 636
@@ -652,12 +638,12 @@ xfs_ialloc_ag_alloc(
652 * Locking will ensure that we don't have two callers in here 638 * Locking will ensure that we don't have two callers in here
653 * at one time. 639 * at one time.
654 */ 640 */
655 newlen = args.mp->m_ialloc_inos; 641 newlen = igeo->ialloc_inos;
656 if (args.mp->m_maxicount && 642 if (igeo->maxicount &&
657 percpu_counter_read_positive(&args.mp->m_icount) + newlen > 643 percpu_counter_read_positive(&args.mp->m_icount) + newlen >
658 args.mp->m_maxicount) 644 igeo->maxicount)
659 return -ENOSPC; 645 return -ENOSPC;
660 args.minlen = args.maxlen = args.mp->m_ialloc_blks; 646 args.minlen = args.maxlen = igeo->ialloc_blks;
661 /* 647 /*
662 * First try to allocate inodes contiguous with the last-allocated 648 * First try to allocate inodes contiguous with the last-allocated
663 * chunk of inodes. If the filesystem is striped, this will fill 649 * chunk of inodes. If the filesystem is striped, this will fill
@@ -667,7 +653,7 @@ xfs_ialloc_ag_alloc(
667 newino = be32_to_cpu(agi->agi_newino); 653 newino = be32_to_cpu(agi->agi_newino);
668 agno = be32_to_cpu(agi->agi_seqno); 654 agno = be32_to_cpu(agi->agi_seqno);
669 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + 655 args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
670 args.mp->m_ialloc_blks; 656 igeo->ialloc_blks;
671 if (do_sparse) 657 if (do_sparse)
672 goto sparse_alloc; 658 goto sparse_alloc;
673 if (likely(newino != NULLAGINO && 659 if (likely(newino != NULLAGINO &&
@@ -690,10 +676,10 @@ xfs_ialloc_ag_alloc(
690 * but not to use them in the actual exact allocation. 676 * but not to use them in the actual exact allocation.
691 */ 677 */
692 args.alignment = 1; 678 args.alignment = 1;
693 args.minalignslop = args.mp->m_cluster_align - 1; 679 args.minalignslop = igeo->cluster_align - 1;
694 680
695 /* Allow space for the inode btree to split. */ 681 /* Allow space for the inode btree to split. */
696 args.minleft = args.mp->m_in_maxlevels - 1; 682 args.minleft = igeo->inobt_maxlevels - 1;
697 if ((error = xfs_alloc_vextent(&args))) 683 if ((error = xfs_alloc_vextent(&args)))
698 return error; 684 return error;
699 685
@@ -720,12 +706,12 @@ xfs_ialloc_ag_alloc(
720 * pieces, so don't need alignment anyway. 706 * pieces, so don't need alignment anyway.
721 */ 707 */
722 isaligned = 0; 708 isaligned = 0;
723 if (args.mp->m_sinoalign) { 709 if (igeo->ialloc_align) {
724 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN)); 710 ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
725 args.alignment = args.mp->m_dalign; 711 args.alignment = args.mp->m_dalign;
726 isaligned = 1; 712 isaligned = 1;
727 } else 713 } else
728 args.alignment = args.mp->m_cluster_align; 714 args.alignment = igeo->cluster_align;
729 /* 715 /*
730 * Need to figure out where to allocate the inode blocks. 716 * Need to figure out where to allocate the inode blocks.
731 * Ideally they should be spaced out through the a.g. 717 * Ideally they should be spaced out through the a.g.
@@ -741,7 +727,7 @@ xfs_ialloc_ag_alloc(
741 /* 727 /*
742 * Allow space for the inode btree to split. 728 * Allow space for the inode btree to split.
743 */ 729 */
744 args.minleft = args.mp->m_in_maxlevels - 1; 730 args.minleft = igeo->inobt_maxlevels - 1;
745 if ((error = xfs_alloc_vextent(&args))) 731 if ((error = xfs_alloc_vextent(&args)))
746 return error; 732 return error;
747 } 733 }
@@ -754,7 +740,7 @@ xfs_ialloc_ag_alloc(
754 args.type = XFS_ALLOCTYPE_NEAR_BNO; 740 args.type = XFS_ALLOCTYPE_NEAR_BNO;
755 args.agbno = be32_to_cpu(agi->agi_root); 741 args.agbno = be32_to_cpu(agi->agi_root);
756 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); 742 args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno);
757 args.alignment = args.mp->m_cluster_align; 743 args.alignment = igeo->cluster_align;
758 if ((error = xfs_alloc_vextent(&args))) 744 if ((error = xfs_alloc_vextent(&args)))
759 return error; 745 return error;
760 } 746 }
@@ -764,7 +750,7 @@ xfs_ialloc_ag_alloc(
764 * the sparse allocation length is smaller than a full chunk. 750 * the sparse allocation length is smaller than a full chunk.
765 */ 751 */
766 if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) && 752 if (xfs_sb_version_hassparseinodes(&args.mp->m_sb) &&
767 args.mp->m_ialloc_min_blks < args.mp->m_ialloc_blks && 753 igeo->ialloc_min_blks < igeo->ialloc_blks &&
768 args.fsbno == NULLFSBLOCK) { 754 args.fsbno == NULLFSBLOCK) {
769sparse_alloc: 755sparse_alloc:
770 args.type = XFS_ALLOCTYPE_NEAR_BNO; 756 args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -773,7 +759,7 @@ sparse_alloc:
773 args.alignment = args.mp->m_sb.sb_spino_align; 759 args.alignment = args.mp->m_sb.sb_spino_align;
774 args.prod = 1; 760 args.prod = 1;
775 761
776 args.minlen = args.mp->m_ialloc_min_blks; 762 args.minlen = igeo->ialloc_min_blks;
777 args.maxlen = args.minlen; 763 args.maxlen = args.minlen;
778 764
779 /* 765 /*
@@ -789,7 +775,7 @@ sparse_alloc:
789 args.min_agbno = args.mp->m_sb.sb_inoalignmt; 775 args.min_agbno = args.mp->m_sb.sb_inoalignmt;
790 args.max_agbno = round_down(args.mp->m_sb.sb_agblocks, 776 args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
791 args.mp->m_sb.sb_inoalignmt) - 777 args.mp->m_sb.sb_inoalignmt) -
792 args.mp->m_ialloc_blks; 778 igeo->ialloc_blks;
793 779
794 error = xfs_alloc_vextent(&args); 780 error = xfs_alloc_vextent(&args);
795 if (error) 781 if (error)
@@ -1006,7 +992,7 @@ xfs_ialloc_ag_select(
1006 * space needed for alignment of inode chunks when checking the 992 * space needed for alignment of inode chunks when checking the
1007 * longest contiguous free space in the AG - this prevents us 993 * longest contiguous free space in the AG - this prevents us
1008 * from getting ENOSPC because we have free space larger than 994 * from getting ENOSPC because we have free space larger than
1009 * m_ialloc_blks but alignment constraints prevent us from using 995 * ialloc_blks but alignment constraints prevent us from using
1010 * it. 996 * it.
1011 * 997 *
1012 * If we can't find an AG with space for full alignment slack to 998 * If we can't find an AG with space for full alignment slack to
@@ -1015,9 +1001,9 @@ xfs_ialloc_ag_select(
1015 * if we fail allocation due to alignment issues then it is most 1001 * if we fail allocation due to alignment issues then it is most
1016 * likely a real ENOSPC condition. 1002 * likely a real ENOSPC condition.
1017 */ 1003 */
1018 ineed = mp->m_ialloc_min_blks; 1004 ineed = M_IGEO(mp)->ialloc_min_blks;
1019 if (flags && ineed > 1) 1005 if (flags && ineed > 1)
1020 ineed += mp->m_cluster_align; 1006 ineed += M_IGEO(mp)->cluster_align;
1021 longest = pag->pagf_longest; 1007 longest = pag->pagf_longest;
1022 if (!longest) 1008 if (!longest)
1023 longest = pag->pagf_flcount > 0; 1009 longest = pag->pagf_flcount > 0;
@@ -1703,6 +1689,7 @@ xfs_dialloc(
1703 int noroom = 0; 1689 int noroom = 0;
1704 xfs_agnumber_t start_agno; 1690 xfs_agnumber_t start_agno;
1705 struct xfs_perag *pag; 1691 struct xfs_perag *pag;
1692 struct xfs_ino_geometry *igeo = M_IGEO(mp);
1706 int okalloc = 1; 1693 int okalloc = 1;
1707 1694
1708 if (*IO_agbp) { 1695 if (*IO_agbp) {
@@ -1733,9 +1720,9 @@ xfs_dialloc(
1733 * Read rough value of mp->m_icount by percpu_counter_read_positive, 1720 * Read rough value of mp->m_icount by percpu_counter_read_positive,
1734 * which will sacrifice the preciseness but improve the performance. 1721 * which will sacrifice the preciseness but improve the performance.
1735 */ 1722 */
1736 if (mp->m_maxicount && 1723 if (igeo->maxicount &&
1737 percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos 1724 percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos
1738 > mp->m_maxicount) { 1725 > igeo->maxicount) {
1739 noroom = 1; 1726 noroom = 1;
1740 okalloc = 0; 1727 okalloc = 0;
1741 } 1728 }
@@ -1852,7 +1839,8 @@ xfs_difree_inode_chunk(
1852 if (!xfs_inobt_issparse(rec->ir_holemask)) { 1839 if (!xfs_inobt_issparse(rec->ir_holemask)) {
1853 /* not sparse, calculate extent info directly */ 1840 /* not sparse, calculate extent info directly */
1854 xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), 1841 xfs_bmap_add_free(tp, XFS_AGB_TO_FSB(mp, agno, sagbno),
1855 mp->m_ialloc_blks, &XFS_RMAP_OINFO_INODES); 1842 M_IGEO(mp)->ialloc_blks,
1843 &XFS_RMAP_OINFO_INODES);
1856 return; 1844 return;
1857 } 1845 }
1858 1846
@@ -2261,7 +2249,7 @@ xfs_imap_lookup(
2261 2249
2262 /* check that the returned record contains the required inode */ 2250 /* check that the returned record contains the required inode */
2263 if (rec.ir_startino > agino || 2251 if (rec.ir_startino > agino ||
2264 rec.ir_startino + mp->m_ialloc_inos <= agino) 2252 rec.ir_startino + M_IGEO(mp)->ialloc_inos <= agino)
2265 return -EINVAL; 2253 return -EINVAL;
2266 2254
2267 /* for untrusted inodes check it is allocated first */ 2255 /* for untrusted inodes check it is allocated first */
@@ -2352,7 +2340,7 @@ xfs_imap(
2352 * If the inode cluster size is the same as the blocksize or 2340 * If the inode cluster size is the same as the blocksize or
2353 * smaller we get to the buffer by simple arithmetics. 2341 * smaller we get to the buffer by simple arithmetics.
2354 */ 2342 */
2355 if (mp->m_blocks_per_cluster == 1) { 2343 if (M_IGEO(mp)->blocks_per_cluster == 1) {
2356 offset = XFS_INO_TO_OFFSET(mp, ino); 2344 offset = XFS_INO_TO_OFFSET(mp, ino);
2357 ASSERT(offset < mp->m_sb.sb_inopblock); 2345 ASSERT(offset < mp->m_sb.sb_inopblock);
2358 2346
@@ -2368,8 +2356,8 @@ xfs_imap(
2368 * find the location. Otherwise we have to do a btree 2356 * find the location. Otherwise we have to do a btree
2369 * lookup to find the location. 2357 * lookup to find the location.
2370 */ 2358 */
2371 if (mp->m_inoalign_mask) { 2359 if (M_IGEO(mp)->inoalign_mask) {
2372 offset_agbno = agbno & mp->m_inoalign_mask; 2360 offset_agbno = agbno & M_IGEO(mp)->inoalign_mask;
2373 chunk_agbno = agbno - offset_agbno; 2361 chunk_agbno = agbno - offset_agbno;
2374 } else { 2362 } else {
2375 error = xfs_imap_lookup(mp, tp, agno, agino, agbno, 2363 error = xfs_imap_lookup(mp, tp, agno, agino, agbno,
@@ -2381,13 +2369,13 @@ xfs_imap(
2381out_map: 2369out_map:
2382 ASSERT(agbno >= chunk_agbno); 2370 ASSERT(agbno >= chunk_agbno);
2383 cluster_agbno = chunk_agbno + 2371 cluster_agbno = chunk_agbno +
2384 ((offset_agbno / mp->m_blocks_per_cluster) * 2372 ((offset_agbno / M_IGEO(mp)->blocks_per_cluster) *
2385 mp->m_blocks_per_cluster); 2373 M_IGEO(mp)->blocks_per_cluster);
2386 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) + 2374 offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
2387 XFS_INO_TO_OFFSET(mp, ino); 2375 XFS_INO_TO_OFFSET(mp, ino);
2388 2376
2389 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno); 2377 imap->im_blkno = XFS_AGB_TO_DADDR(mp, agno, cluster_agbno);
2390 imap->im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); 2378 imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
2391 imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog); 2379 imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog);
2392 2380
2393 /* 2381 /*
@@ -2409,20 +2397,6 @@ out_map:
2409} 2397}
2410 2398
2411/* 2399/*
2412 * Compute and fill in value of m_in_maxlevels.
2413 */
2414void
2415xfs_ialloc_compute_maxlevels(
2416 xfs_mount_t *mp) /* file system mount structure */
2417{
2418 uint inodes;
2419
2420 inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
2421 mp->m_in_maxlevels = xfs_btree_compute_maxlevels(mp->m_inobt_mnr,
2422 inodes);
2423}
2424
2425/*
2426 * Log specified fields for the ag hdr (inode section). The growth of the agi 2400 * Log specified fields for the ag hdr (inode section). The growth of the agi
2427 * structure over time requires that we interpret the buffer as two logical 2401 * structure over time requires that we interpret the buffer as two logical
2428 * regions delineated by the end of the unlinked list. This is due to the size 2402 * regions delineated by the end of the unlinked list. This is due to the size
@@ -2493,7 +2467,7 @@ static xfs_failaddr_t
2493xfs_agi_verify( 2467xfs_agi_verify(
2494 struct xfs_buf *bp) 2468 struct xfs_buf *bp)
2495{ 2469{
2496 struct xfs_mount *mp = bp->b_target->bt_mount; 2470 struct xfs_mount *mp = bp->b_mount;
2497 struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); 2471 struct xfs_agi *agi = XFS_BUF_TO_AGI(bp);
2498 int i; 2472 int i;
2499 2473
@@ -2545,7 +2519,7 @@ static void
2545xfs_agi_read_verify( 2519xfs_agi_read_verify(
2546 struct xfs_buf *bp) 2520 struct xfs_buf *bp)
2547{ 2521{
2548 struct xfs_mount *mp = bp->b_target->bt_mount; 2522 struct xfs_mount *mp = bp->b_mount;
2549 xfs_failaddr_t fa; 2523 xfs_failaddr_t fa;
2550 2524
2551 if (xfs_sb_version_hascrc(&mp->m_sb) && 2525 if (xfs_sb_version_hascrc(&mp->m_sb) &&
@@ -2562,7 +2536,7 @@ static void
2562xfs_agi_write_verify( 2536xfs_agi_write_verify(
2563 struct xfs_buf *bp) 2537 struct xfs_buf *bp)
2564{ 2538{
2565 struct xfs_mount *mp = bp->b_target->bt_mount; 2539 struct xfs_mount *mp = bp->b_mount;
2566 struct xfs_buf_log_item *bip = bp->b_log_item; 2540 struct xfs_buf_log_item *bip = bp->b_log_item;
2567 xfs_failaddr_t fa; 2541 xfs_failaddr_t fa;
2568 2542
@@ -2768,3 +2742,110 @@ xfs_ialloc_count_inodes(
2768 *freecount = ci.freecount; 2742 *freecount = ci.freecount;
2769 return 0; 2743 return 0;
2770} 2744}
2745
2746/*
2747 * Initialize inode-related geometry information.
2748 *
2749 * Compute the inode btree min and max levels and set maxicount.
2750 *
2751 * Set the inode cluster size. This may still be overridden by the file
2752 * system block size if it is larger than the chosen cluster size.
2753 *
2754 * For v5 filesystems, scale the cluster size with the inode size to keep a
2755 * constant ratio of inode per cluster buffer, but only if mkfs has set the
2756 * inode alignment value appropriately for larger cluster sizes.
2757 *
2758 * Then compute the inode cluster alignment information.
2759 */
2760void
2761xfs_ialloc_setup_geometry(
2762 struct xfs_mount *mp)
2763{
2764 struct xfs_sb *sbp = &mp->m_sb;
2765 struct xfs_ino_geometry *igeo = M_IGEO(mp);
2766 uint64_t icount;
2767 uint inodes;
2768
2769 /* Compute inode btree geometry. */
2770 igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
2771 igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
2772 igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
2773 igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2;
2774 igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2;
2775
2776 igeo->ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK,
2777 sbp->sb_inopblock);
2778 igeo->ialloc_blks = igeo->ialloc_inos >> sbp->sb_inopblog;
2779
2780 if (sbp->sb_spino_align)
2781 igeo->ialloc_min_blks = sbp->sb_spino_align;
2782 else
2783 igeo->ialloc_min_blks = igeo->ialloc_blks;
2784
2785 /* Compute and fill in value of m_ino_geo.inobt_maxlevels. */
2786 inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
2787 igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr,
2788 inodes);
2789
2790 /* Set the maximum inode count for this filesystem. */
2791 if (sbp->sb_imax_pct) {
2792 /*
2793 * Make sure the maximum inode count is a multiple
2794 * of the units we allocate inodes in.
2795 */
2796 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
2797 do_div(icount, 100);
2798 do_div(icount, igeo->ialloc_blks);
2799 igeo->maxicount = XFS_FSB_TO_INO(mp,
2800 icount * igeo->ialloc_blks);
2801 } else {
2802 igeo->maxicount = 0;
2803 }
2804
2805 /*
2806 * Compute the desired size of an inode cluster buffer size, which
2807 * starts at 8K and (on v5 filesystems) scales up with larger inode
2808 * sizes.
2809 *
2810 * Preserve the desired inode cluster size because the sparse inodes
2811 * feature uses that desired size (not the actual size) to compute the
2812 * sparse inode alignment. The mount code validates this value, so we
2813 * cannot change the behavior.
2814 */
2815 igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE;
2816 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2817 int new_size = igeo->inode_cluster_size_raw;
2818
2819 new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
2820 if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
2821 igeo->inode_cluster_size_raw = new_size;
2822 }
2823
2824 /* Calculate inode cluster ratios. */
2825 if (igeo->inode_cluster_size_raw > mp->m_sb.sb_blocksize)
2826 igeo->blocks_per_cluster = XFS_B_TO_FSBT(mp,
2827 igeo->inode_cluster_size_raw);
2828 else
2829 igeo->blocks_per_cluster = 1;
2830 igeo->inode_cluster_size = XFS_FSB_TO_B(mp, igeo->blocks_per_cluster);
2831 igeo->inodes_per_cluster = XFS_FSB_TO_INO(mp, igeo->blocks_per_cluster);
2832
2833 /* Calculate inode cluster alignment. */
2834 if (xfs_sb_version_hasalign(&mp->m_sb) &&
2835 mp->m_sb.sb_inoalignmt >= igeo->blocks_per_cluster)
2836 igeo->cluster_align = mp->m_sb.sb_inoalignmt;
2837 else
2838 igeo->cluster_align = 1;
2839 igeo->inoalign_mask = igeo->cluster_align - 1;
2840 igeo->cluster_align_inodes = XFS_FSB_TO_INO(mp, igeo->cluster_align);
2841
2842 /*
2843 * If we are using stripe alignment, check whether
2844 * the stripe unit is a multiple of the inode alignment
2845 */
2846 if (mp->m_dalign && igeo->inoalign_mask &&
2847 !(mp->m_dalign & igeo->inoalign_mask))
2848 igeo->ialloc_align = mp->m_dalign;
2849 else
2850 igeo->ialloc_align = 0;
2851}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index e936b7cc9389..323592d563d5 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -23,16 +23,6 @@ struct xfs_icluster {
23 * sparse chunks */ 23 * sparse chunks */
24}; 24};
25 25
26/* Calculate and return the number of filesystem blocks per inode cluster */
27static inline int
28xfs_icluster_size_fsb(
29 struct xfs_mount *mp)
30{
31 if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size)
32 return 1;
33 return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog;
34}
35
36/* 26/*
37 * Make an inode pointer out of the buffer/offset. 27 * Make an inode pointer out of the buffer/offset.
38 */ 28 */
@@ -96,13 +86,6 @@ xfs_imap(
96 uint flags); /* flags for inode btree lookup */ 86 uint flags); /* flags for inode btree lookup */
97 87
98/* 88/*
99 * Compute and fill in value of m_in_maxlevels.
100 */
101void
102xfs_ialloc_compute_maxlevels(
103 struct xfs_mount *mp); /* file system mount structure */
104
105/*
106 * Log specified fields for the ag hdr (inode section) 89 * Log specified fields for the ag hdr (inode section)
107 */ 90 */
108void 91void
@@ -168,5 +151,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,
168 int *stat); 151 int *stat);
169 152
170int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); 153int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
154void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
171 155
172#endif /* __XFS_IALLOC_H__ */ 156#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index bc2dfacd2f4a..b82992f795aa 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -11,14 +11,12 @@
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_bit.h" 12#include "xfs_bit.h"
13#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_inode.h"
15#include "xfs_btree.h" 14#include "xfs_btree.h"
16#include "xfs_ialloc.h" 15#include "xfs_ialloc.h"
17#include "xfs_ialloc_btree.h" 16#include "xfs_ialloc_btree.h"
18#include "xfs_alloc.h" 17#include "xfs_alloc.h"
19#include "xfs_error.h" 18#include "xfs_error.h"
20#include "xfs_trace.h" 19#include "xfs_trace.h"
21#include "xfs_cksum.h"
22#include "xfs_trans.h" 20#include "xfs_trans.h"
23#include "xfs_rmap.h" 21#include "xfs_rmap.h"
24 22
@@ -28,7 +26,7 @@ xfs_inobt_get_minrecs(
28 struct xfs_btree_cur *cur, 26 struct xfs_btree_cur *cur,
29 int level) 27 int level)
30{ 28{
31 return cur->bc_mp->m_inobt_mnr[level != 0]; 29 return M_IGEO(cur->bc_mp)->inobt_mnr[level != 0];
32} 30}
33 31
34STATIC struct xfs_btree_cur * 32STATIC struct xfs_btree_cur *
@@ -164,7 +162,7 @@ xfs_inobt_get_maxrecs(
164 struct xfs_btree_cur *cur, 162 struct xfs_btree_cur *cur,
165 int level) 163 int level)
166{ 164{
167 return cur->bc_mp->m_inobt_mxr[level != 0]; 165 return M_IGEO(cur->bc_mp)->inobt_mxr[level != 0];
168} 166}
169 167
170STATIC void 168STATIC void
@@ -255,7 +253,7 @@ static xfs_failaddr_t
255xfs_inobt_verify( 253xfs_inobt_verify(
256 struct xfs_buf *bp) 254 struct xfs_buf *bp)
257{ 255{
258 struct xfs_mount *mp = bp->b_target->bt_mount; 256 struct xfs_mount *mp = bp->b_mount;
259 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 257 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
260 xfs_failaddr_t fa; 258 xfs_failaddr_t fa;
261 unsigned int level; 259 unsigned int level;
@@ -281,10 +279,11 @@ xfs_inobt_verify(
281 279
282 /* level verification */ 280 /* level verification */
283 level = be16_to_cpu(block->bb_level); 281 level = be16_to_cpu(block->bb_level);
284 if (level >= mp->m_in_maxlevels) 282 if (level >= M_IGEO(mp)->inobt_maxlevels)
285 return __this_address; 283 return __this_address;
286 284
287 return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]); 285 return xfs_btree_sblock_verify(bp,
286 M_IGEO(mp)->inobt_mxr[level != 0]);
288} 287}
289 288
290static void 289static void
@@ -546,7 +545,7 @@ xfs_inobt_max_size(
546 xfs_agblock_t agblocks = xfs_ag_block_count(mp, agno); 545 xfs_agblock_t agblocks = xfs_ag_block_count(mp, agno);
547 546
548 /* Bail out if we're uninitialized, which can happen in mkfs. */ 547 /* Bail out if we're uninitialized, which can happen in mkfs. */
549 if (mp->m_inobt_mxr[0] == 0) 548 if (M_IGEO(mp)->inobt_mxr[0] == 0)
550 return 0; 549 return 0;
551 550
552 /* 551 /*
@@ -558,11 +557,41 @@ xfs_inobt_max_size(
558 XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno) 557 XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno)
559 agblocks -= mp->m_sb.sb_logblocks; 558 agblocks -= mp->m_sb.sb_logblocks;
560 559
561 return xfs_btree_calc_size(mp->m_inobt_mnr, 560 return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr,
562 (uint64_t)agblocks * mp->m_sb.sb_inopblock / 561 (uint64_t)agblocks * mp->m_sb.sb_inopblock /
563 XFS_INODES_PER_CHUNK); 562 XFS_INODES_PER_CHUNK);
564} 563}
565 564
565/* Read AGI and create inobt cursor. */
566int
567xfs_inobt_cur(
568 struct xfs_mount *mp,
569 struct xfs_trans *tp,
570 xfs_agnumber_t agno,
571 xfs_btnum_t which,
572 struct xfs_btree_cur **curpp,
573 struct xfs_buf **agi_bpp)
574{
575 struct xfs_btree_cur *cur;
576 int error;
577
578 ASSERT(*agi_bpp == NULL);
579 ASSERT(*curpp == NULL);
580
581 error = xfs_ialloc_read_agi(mp, tp, agno, agi_bpp);
582 if (error)
583 return error;
584
585 cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which);
586 if (!cur) {
587 xfs_trans_brelse(tp, *agi_bpp);
588 *agi_bpp = NULL;
589 return -ENOMEM;
590 }
591 *curpp = cur;
592 return 0;
593}
594
566static int 595static int
567xfs_inobt_count_blocks( 596xfs_inobt_count_blocks(
568 struct xfs_mount *mp, 597 struct xfs_mount *mp,
@@ -571,15 +600,14 @@ xfs_inobt_count_blocks(
571 xfs_btnum_t btnum, 600 xfs_btnum_t btnum,
572 xfs_extlen_t *tree_blocks) 601 xfs_extlen_t *tree_blocks)
573{ 602{
574 struct xfs_buf *agbp; 603 struct xfs_buf *agbp = NULL;
575 struct xfs_btree_cur *cur; 604 struct xfs_btree_cur *cur = NULL;
576 int error; 605 int error;
577 606
578 error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); 607 error = xfs_inobt_cur(mp, tp, agno, btnum, &cur, &agbp);
579 if (error) 608 if (error)
580 return error; 609 return error;
581 610
582 cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum);
583 error = xfs_btree_count_blocks(cur, tree_blocks); 611 error = xfs_btree_count_blocks(cur, tree_blocks);
584 xfs_btree_del_cursor(cur, error); 612 xfs_btree_del_cursor(cur, error);
585 xfs_trans_brelse(tp, agbp); 613 xfs_trans_brelse(tp, agbp);
@@ -619,5 +647,5 @@ xfs_iallocbt_calc_size(
619 struct xfs_mount *mp, 647 struct xfs_mount *mp,
620 unsigned long long len) 648 unsigned long long len)
621{ 649{
622 return xfs_btree_calc_size(mp->m_inobt_mnr, len); 650 return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, len);
623} 651}
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h
index ebdd0c6b8766..951305ecaae1 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.h
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.h
@@ -64,5 +64,8 @@ int xfs_finobt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp,
64 xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used); 64 xfs_agnumber_t agno, xfs_extlen_t *ask, xfs_extlen_t *used);
65extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp, 65extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp,
66 unsigned long long len); 66 unsigned long long len);
67int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp,
68 xfs_agnumber_t agno, xfs_btnum_t btnum,
69 struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp);
67 70
68#endif /* __XFS_IALLOC_BTREE_H__ */ 71#endif /* __XFS_IALLOC_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index bc690f2409fa..27aa3f2bc4bc 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -3,18 +3,14 @@
3 * Copyright (c) 2017 Christoph Hellwig. 3 * Copyright (c) 2017 Christoph Hellwig.
4 */ 4 */
5 5
6#include <linux/cache.h>
7#include <linux/kernel.h>
8#include <linux/slab.h>
9#include "xfs.h" 6#include "xfs.h"
7#include "xfs_shared.h"
10#include "xfs_format.h" 8#include "xfs_format.h"
11#include "xfs_bit.h" 9#include "xfs_bit.h"
12#include "xfs_log_format.h" 10#include "xfs_log_format.h"
13#include "xfs_inode.h" 11#include "xfs_inode.h"
14#include "xfs_inode_fork.h"
15#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
16#include "xfs_mount.h" 13#include "xfs_mount.h"
17#include "xfs_bmap.h"
18#include "xfs_trace.h" 14#include "xfs_trace.h"
19 15
20/* 16/*
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index e021d5133ccb..28ab3c5255e1 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -10,11 +10,9 @@
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_mount.h" 12#include "xfs_mount.h"
13#include "xfs_defer.h"
14#include "xfs_inode.h" 13#include "xfs_inode.h"
15#include "xfs_errortag.h" 14#include "xfs_errortag.h"
16#include "xfs_error.h" 15#include "xfs_error.h"
17#include "xfs_cksum.h"
18#include "xfs_icache.h" 16#include "xfs_icache.h"
19#include "xfs_trans.h" 17#include "xfs_trans.h"
20#include "xfs_ialloc.h" 18#include "xfs_ialloc.h"
@@ -33,12 +31,9 @@ xfs_inobp_check(
33 xfs_buf_t *bp) 31 xfs_buf_t *bp)
34{ 32{
35 int i; 33 int i;
36 int j;
37 xfs_dinode_t *dip; 34 xfs_dinode_t *dip;
38 35
39 j = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 36 for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
40
41 for (i = 0; i < j; i++) {
42 dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize); 37 dip = xfs_buf_offset(bp, i * mp->m_sb.sb_inodesize);
43 if (!dip->di_next_unlinked) { 38 if (!dip->di_next_unlinked) {
44 xfs_alert(mp, 39 xfs_alert(mp,
@@ -80,7 +75,7 @@ xfs_inode_buf_verify(
80 struct xfs_buf *bp, 75 struct xfs_buf *bp,
81 bool readahead) 76 bool readahead)
82{ 77{
83 struct xfs_mount *mp = bp->b_target->bt_mount; 78 struct xfs_mount *mp = bp->b_mount;
84 xfs_agnumber_t agno; 79 xfs_agnumber_t agno;
85 int i; 80 int i;
86 int ni; 81 int ni;
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index f9acf1d436f6..bf3e04018246 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -3,10 +3,10 @@
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4 * All Rights Reserved. 4 * All Rights Reserved.
5 */ 5 */
6#include <linux/log2.h>
7 6
8#include "xfs.h" 7#include "xfs.h"
9#include "xfs_fs.h" 8#include "xfs_fs.h"
9#include "xfs_shared.h"
10#include "xfs_format.h" 10#include "xfs_format.h"
11#include "xfs_log_format.h" 11#include "xfs_log_format.h"
12#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
@@ -19,12 +19,10 @@
19#include "xfs_bmap.h" 19#include "xfs_bmap.h"
20#include "xfs_error.h" 20#include "xfs_error.h"
21#include "xfs_trace.h" 21#include "xfs_trace.h"
22#include "xfs_attr_sf.h"
23#include "xfs_da_format.h" 22#include "xfs_da_format.h"
24#include "xfs_da_btree.h" 23#include "xfs_da_btree.h"
25#include "xfs_dir2_priv.h" 24#include "xfs_dir2_priv.h"
26#include "xfs_attr_leaf.h" 25#include "xfs_attr_leaf.h"
27#include "xfs_shared.h"
28 26
29kmem_zone_t *xfs_ifork_zone; 27kmem_zone_t *xfs_ifork_zone;
30 28
diff --git a/fs/xfs/libxfs/xfs_log_rlimit.c b/fs/xfs/libxfs/xfs_log_rlimit.c
index 1b542ec11d5d..7f55eb3f3653 100644
--- a/fs/xfs/libxfs/xfs_log_rlimit.c
+++ b/fs/xfs/libxfs/xfs_log_rlimit.c
@@ -12,9 +12,7 @@
12#include "xfs_mount.h" 12#include "xfs_mount.h"
13#include "xfs_da_format.h" 13#include "xfs_da_format.h"
14#include "xfs_trans_space.h" 14#include "xfs_trans_space.h"
15#include "xfs_inode.h"
16#include "xfs_da_btree.h" 15#include "xfs_da_btree.h"
17#include "xfs_attr_leaf.h"
18#include "xfs_bmap_btree.h" 16#include "xfs_bmap_btree.h"
19 17
20/* 18/*
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 542aa1475b5f..51bb9bdb0e84 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -9,7 +9,6 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_sb.h"
13#include "xfs_mount.h" 12#include "xfs_mount.h"
14#include "xfs_defer.h" 13#include "xfs_defer.h"
15#include "xfs_btree.h" 14#include "xfs_btree.h"
@@ -19,7 +18,6 @@
19#include "xfs_errortag.h" 18#include "xfs_errortag.h"
20#include "xfs_error.h" 19#include "xfs_error.h"
21#include "xfs_trace.h" 20#include "xfs_trace.h"
22#include "xfs_cksum.h"
23#include "xfs_trans.h" 21#include "xfs_trans.h"
24#include "xfs_bit.h" 22#include "xfs_bit.h"
25#include "xfs_refcount.h" 23#include "xfs_refcount.h"
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 5d9de9b21726..38529dbacd55 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -12,12 +12,10 @@
12#include "xfs_sb.h" 12#include "xfs_sb.h"
13#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_btree.h" 14#include "xfs_btree.h"
15#include "xfs_bmap.h"
16#include "xfs_refcount_btree.h" 15#include "xfs_refcount_btree.h"
17#include "xfs_alloc.h" 16#include "xfs_alloc.h"
18#include "xfs_error.h" 17#include "xfs_error.h"
19#include "xfs_trace.h" 18#include "xfs_trace.h"
20#include "xfs_cksum.h"
21#include "xfs_trans.h" 19#include "xfs_trans.h"
22#include "xfs_bit.h" 20#include "xfs_bit.h"
23#include "xfs_rmap.h" 21#include "xfs_rmap.h"
@@ -203,7 +201,7 @@ STATIC xfs_failaddr_t
203xfs_refcountbt_verify( 201xfs_refcountbt_verify(
204 struct xfs_buf *bp) 202 struct xfs_buf *bp)
205{ 203{
206 struct xfs_mount *mp = bp->b_target->bt_mount; 204 struct xfs_mount *mp = bp->b_mount;
207 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 205 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
208 struct xfs_perag *pag = bp->b_pag; 206 struct xfs_perag *pag = bp->b_pag;
209 xfs_failaddr_t fa; 207 xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 8ed885507dd8..e6aeb390b2fb 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -10,24 +10,17 @@
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_bit.h" 12#include "xfs_bit.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h" 13#include "xfs_mount.h"
15#include "xfs_defer.h" 14#include "xfs_defer.h"
16#include "xfs_da_format.h"
17#include "xfs_da_btree.h"
18#include "xfs_btree.h" 15#include "xfs_btree.h"
19#include "xfs_trans.h" 16#include "xfs_trans.h"
20#include "xfs_alloc.h" 17#include "xfs_alloc.h"
21#include "xfs_rmap.h" 18#include "xfs_rmap.h"
22#include "xfs_rmap_btree.h" 19#include "xfs_rmap_btree.h"
23#include "xfs_trans_space.h"
24#include "xfs_trace.h" 20#include "xfs_trace.h"
25#include "xfs_errortag.h" 21#include "xfs_errortag.h"
26#include "xfs_error.h" 22#include "xfs_error.h"
27#include "xfs_extent_busy.h"
28#include "xfs_bmap.h"
29#include "xfs_inode.h" 23#include "xfs_inode.h"
30#include "xfs_ialloc.h"
31 24
32/* 25/*
33 * Lookup the first record less than or equal to [bno, len, owner, offset] 26 * Lookup the first record less than or equal to [bno, len, owner, offset]
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 5d1f8884c888..fc78efa52c94 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -9,18 +9,14 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_sb.h" 12#include "xfs_sb.h"
14#include "xfs_mount.h" 13#include "xfs_mount.h"
15#include "xfs_defer.h"
16#include "xfs_inode.h"
17#include "xfs_trans.h" 14#include "xfs_trans.h"
18#include "xfs_alloc.h" 15#include "xfs_alloc.h"
19#include "xfs_btree.h" 16#include "xfs_btree.h"
20#include "xfs_rmap.h" 17#include "xfs_rmap.h"
21#include "xfs_rmap_btree.h" 18#include "xfs_rmap_btree.h"
22#include "xfs_trace.h" 19#include "xfs_trace.h"
23#include "xfs_cksum.h"
24#include "xfs_error.h" 20#include "xfs_error.h"
25#include "xfs_extent_busy.h" 21#include "xfs_extent_busy.h"
26#include "xfs_ag_resv.h" 22#include "xfs_ag_resv.h"
@@ -292,7 +288,7 @@ static xfs_failaddr_t
292xfs_rmapbt_verify( 288xfs_rmapbt_verify(
293 struct xfs_buf *bp) 289 struct xfs_buf *bp)
294{ 290{
295 struct xfs_mount *mp = bp->b_target->bt_mount; 291 struct xfs_mount *mp = bp->b_mount;
296 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 292 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
297 struct xfs_perag *pag = bp->b_pag; 293 struct xfs_perag *pag = bp->b_pag;
298 xfs_failaddr_t fa; 294 xfs_failaddr_t fa;
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index eaaff67e9626..8ea1efc97b41 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -13,15 +13,7 @@
13#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_inode.h" 14#include "xfs_inode.h"
15#include "xfs_bmap.h" 15#include "xfs_bmap.h"
16#include "xfs_bmap_util.h"
17#include "xfs_bmap_btree.h"
18#include "xfs_alloc.h"
19#include "xfs_error.h"
20#include "xfs_trans.h" 16#include "xfs_trans.h"
21#include "xfs_trans_space.h"
22#include "xfs_trace.h"
23#include "xfs_buf.h"
24#include "xfs_icache.h"
25#include "xfs_rtalloc.h" 17#include "xfs_rtalloc.h"
26 18
27 19
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index e76a3e5d28d7..a08dd8f40346 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -10,26 +10,19 @@
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_bit.h" 12#include "xfs_bit.h"
13#include "xfs_sb.h"
14#include "xfs_mount.h" 13#include "xfs_mount.h"
15#include "xfs_defer.h"
16#include "xfs_inode.h"
17#include "xfs_ialloc.h" 14#include "xfs_ialloc.h"
18#include "xfs_alloc.h" 15#include "xfs_alloc.h"
19#include "xfs_error.h" 16#include "xfs_error.h"
20#include "xfs_trace.h" 17#include "xfs_trace.h"
21#include "xfs_cksum.h"
22#include "xfs_trans.h" 18#include "xfs_trans.h"
23#include "xfs_buf_item.h" 19#include "xfs_buf_item.h"
24#include "xfs_bmap_btree.h" 20#include "xfs_bmap_btree.h"
25#include "xfs_alloc_btree.h" 21#include "xfs_alloc_btree.h"
26#include "xfs_ialloc_btree.h"
27#include "xfs_log.h" 22#include "xfs_log.h"
28#include "xfs_rmap_btree.h" 23#include "xfs_rmap_btree.h"
29#include "xfs_bmap.h"
30#include "xfs_refcount_btree.h" 24#include "xfs_refcount_btree.h"
31#include "xfs_da_format.h" 25#include "xfs_da_format.h"
32#include "xfs_da_btree.h"
33#include "xfs_health.h" 26#include "xfs_health.h"
34 27
35/* 28/*
@@ -686,7 +679,7 @@ xfs_sb_read_verify(
686 struct xfs_buf *bp) 679 struct xfs_buf *bp)
687{ 680{
688 struct xfs_sb sb; 681 struct xfs_sb sb;
689 struct xfs_mount *mp = bp->b_target->bt_mount; 682 struct xfs_mount *mp = bp->b_mount;
690 struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); 683 struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp);
691 int error; 684 int error;
692 685
@@ -752,7 +745,7 @@ xfs_sb_write_verify(
752 struct xfs_buf *bp) 745 struct xfs_buf *bp)
753{ 746{
754 struct xfs_sb sb; 747 struct xfs_sb sb;
755 struct xfs_mount *mp = bp->b_target->bt_mount; 748 struct xfs_mount *mp = bp->b_mount;
756 struct xfs_buf_log_item *bip = bp->b_log_item; 749 struct xfs_buf_log_item *bip = bp->b_log_item;
757 int error; 750 int error;
758 751
@@ -800,12 +793,14 @@ const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
800 * 793 *
801 * Mount initialization code establishing various mount 794 * Mount initialization code establishing various mount
802 * fields from the superblock associated with the given 795 * fields from the superblock associated with the given
803 * mount structure 796 * mount structure.
797 *
798 * Inode geometry are calculated in xfs_ialloc_setup_geometry.
804 */ 799 */
805void 800void
806xfs_sb_mount_common( 801xfs_sb_mount_common(
807 struct xfs_mount *mp, 802 struct xfs_mount *mp,
808 struct xfs_sb *sbp) 803 struct xfs_sb *sbp)
809{ 804{
810 mp->m_agfrotor = mp->m_agirotor = 0; 805 mp->m_agfrotor = mp->m_agirotor = 0;
811 mp->m_maxagi = mp->m_sb.sb_agcount; 806 mp->m_maxagi = mp->m_sb.sb_agcount;
@@ -813,7 +808,6 @@ xfs_sb_mount_common(
813 mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; 808 mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT;
814 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; 809 mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
815 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1; 810 mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
816 mp->m_agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
817 mp->m_blockmask = sbp->sb_blocksize - 1; 811 mp->m_blockmask = sbp->sb_blocksize - 1;
818 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG; 812 mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
819 mp->m_blockwmask = mp->m_blockwsize - 1; 813 mp->m_blockwmask = mp->m_blockwsize - 1;
@@ -823,11 +817,6 @@ xfs_sb_mount_common(
823 mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2; 817 mp->m_alloc_mnr[0] = mp->m_alloc_mxr[0] / 2;
824 mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2; 818 mp->m_alloc_mnr[1] = mp->m_alloc_mxr[1] / 2;
825 819
826 mp->m_inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
827 mp->m_inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
828 mp->m_inobt_mnr[0] = mp->m_inobt_mxr[0] / 2;
829 mp->m_inobt_mnr[1] = mp->m_inobt_mxr[1] / 2;
830
831 mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1); 820 mp->m_bmap_dmxr[0] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 1);
832 mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0); 821 mp->m_bmap_dmxr[1] = xfs_bmbt_maxrecs(mp, sbp->sb_blocksize, 0);
833 mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2; 822 mp->m_bmap_dmnr[0] = mp->m_bmap_dmxr[0] / 2;
@@ -844,14 +833,6 @@ xfs_sb_mount_common(
844 mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2; 833 mp->m_refc_mnr[1] = mp->m_refc_mxr[1] / 2;
845 834
846 mp->m_bsize = XFS_FSB_TO_BB(mp, 1); 835 mp->m_bsize = XFS_FSB_TO_BB(mp, 1);
847 mp->m_ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK,
848 sbp->sb_inopblock);
849 mp->m_ialloc_blks = mp->m_ialloc_inos >> sbp->sb_inopblog;
850
851 if (sbp->sb_spino_align)
852 mp->m_ialloc_min_blks = sbp->sb_spino_align;
853 else
854 mp->m_ialloc_min_blks = mp->m_ialloc_blks;
855 mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); 836 mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
856 mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp); 837 mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp);
857} 838}
@@ -939,7 +920,7 @@ xfs_log_sb(
939 struct xfs_trans *tp) 920 struct xfs_trans *tp)
940{ 921{
941 struct xfs_mount *mp = tp->t_mountp; 922 struct xfs_mount *mp = tp->t_mountp;
942 struct xfs_buf *bp = xfs_trans_getsb(tp, mp, 0); 923 struct xfs_buf *bp = xfs_trans_getsb(tp, mp);
943 924
944 mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); 925 mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
945 mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); 926 mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
@@ -1005,7 +986,7 @@ xfs_update_secondary_sbs(
1005 986
1006 bp = xfs_buf_get(mp->m_ddev_targp, 987 bp = xfs_buf_get(mp->m_ddev_targp,
1007 XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), 988 XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
1008 XFS_FSS_TO_BB(mp, 1), 0); 989 XFS_FSS_TO_BB(mp, 1));
1009 /* 990 /*
1010 * If we get an error reading or writing alternate superblocks, 991 * If we get an error reading or writing alternate superblocks,
1011 * continue. xfs_repair chooses the "best" superblock based 992 * continue. xfs_repair chooses the "best" superblock based
@@ -1069,7 +1050,7 @@ xfs_sync_sb_buf(
1069 if (error) 1050 if (error)
1070 return error; 1051 return error;
1071 1052
1072 bp = xfs_trans_getsb(tp, mp, 0); 1053 bp = xfs_trans_getsb(tp, mp);
1073 xfs_log_sb(tp); 1054 xfs_log_sb(tp);
1074 xfs_trans_bhold(tp, bp); 1055 xfs_trans_bhold(tp, bp);
1075 xfs_trans_set_sync(tp); 1056 xfs_trans_set_sync(tp);
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 4e909791aeac..e0641b7337b3 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -65,7 +65,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
65#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ 65#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */
66#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ 66#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */
67#define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */ 67#define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */
68#define XFS_TRANS_NOFS 0x80 /* pass KM_NOFS to kmem_alloc */
69/* 68/*
70 * LOWMODE is used by the allocator to activate the lowspace algorithm - when 69 * LOWMODE is used by the allocator to activate the lowspace algorithm - when
71 * free space is running low the extent allocator may choose to allocate an 70 * free space is running low the extent allocator may choose to allocate an
@@ -136,4 +135,52 @@ void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp,
136 struct xfs_inode *ip, struct xfs_ifork *ifp); 135 struct xfs_inode *ip, struct xfs_ifork *ifp);
137xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip); 136xfs_failaddr_t xfs_symlink_shortform_verify(struct xfs_inode *ip);
138 137
138/* Computed inode geometry for the filesystem. */
139struct xfs_ino_geometry {
140 /* Maximum inode count in this filesystem. */
141 uint64_t maxicount;
142
143 /* Actual inode cluster buffer size, in bytes. */
144 unsigned int inode_cluster_size;
145
146 /*
147 * Desired inode cluster buffer size, in bytes. This value is not
148 * rounded up to at least one filesystem block, which is necessary for
149 * the sole purpose of validating sb_spino_align. Runtime code must
150 * only ever use inode_cluster_size.
151 */
152 unsigned int inode_cluster_size_raw;
153
154 /* Inode cluster sizes, adjusted to be at least 1 fsb. */
155 unsigned int inodes_per_cluster;
156 unsigned int blocks_per_cluster;
157
158 /* Inode cluster alignment. */
159 unsigned int cluster_align;
160 unsigned int cluster_align_inodes;
161 unsigned int inoalign_mask; /* mask sb_inoalignmt if used */
162
163 unsigned int inobt_mxr[2]; /* max inobt btree records */
164 unsigned int inobt_mnr[2]; /* min inobt btree records */
165 unsigned int inobt_maxlevels; /* max inobt btree levels. */
166
167 /* Size of inode allocations under normal operation. */
168 unsigned int ialloc_inos;
169 unsigned int ialloc_blks;
170
171 /* Minimum inode blocks for a sparse allocation. */
172 unsigned int ialloc_min_blks;
173
174 /* stripe unit inode alignment */
175 unsigned int ialloc_align;
176
177 unsigned int agino_log; /* #bits for agino in inum */
178};
179
180/* Keep iterating the data structure. */
181#define XFS_ITER_CONTINUE (0)
182
183/* Stop iterating the data structure. */
184#define XFS_ITER_ABORT (1)
185
139#endif /* __XFS_SHARED_H__ */ 186#endif /* __XFS_SHARED_H__ */
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index a0ccc253c43d..3b8260ca7d1b 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -11,12 +11,8 @@
11#include "xfs_shared.h" 11#include "xfs_shared.h"
12#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
13#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_bmap_btree.h"
15#include "xfs_inode.h" 14#include "xfs_inode.h"
16#include "xfs_error.h" 15#include "xfs_error.h"
17#include "xfs_trace.h"
18#include "xfs_symlink.h"
19#include "xfs_cksum.h"
20#include "xfs_trans.h" 16#include "xfs_trans.h"
21#include "xfs_buf_item.h" 17#include "xfs_buf_item.h"
22#include "xfs_log.h" 18#include "xfs_log.h"
@@ -90,7 +86,7 @@ static xfs_failaddr_t
90xfs_symlink_verify( 86xfs_symlink_verify(
91 struct xfs_buf *bp) 87 struct xfs_buf *bp)
92{ 88{
93 struct xfs_mount *mp = bp->b_target->bt_mount; 89 struct xfs_mount *mp = bp->b_mount;
94 struct xfs_dsymlink_hdr *dsl = bp->b_addr; 90 struct xfs_dsymlink_hdr *dsl = bp->b_addr;
95 91
96 if (!xfs_sb_version_hascrc(&mp->m_sb)) 92 if (!xfs_sb_version_hascrc(&mp->m_sb))
@@ -116,7 +112,7 @@ static void
116xfs_symlink_read_verify( 112xfs_symlink_read_verify(
117 struct xfs_buf *bp) 113 struct xfs_buf *bp)
118{ 114{
119 struct xfs_mount *mp = bp->b_target->bt_mount; 115 struct xfs_mount *mp = bp->b_mount;
120 xfs_failaddr_t fa; 116 xfs_failaddr_t fa;
121 117
122 /* no verification of non-crc buffers */ 118 /* no verification of non-crc buffers */
@@ -136,7 +132,7 @@ static void
136xfs_symlink_write_verify( 132xfs_symlink_write_verify(
137 struct xfs_buf *bp) 133 struct xfs_buf *bp)
138{ 134{
139 struct xfs_mount *mp = bp->b_target->bt_mount; 135 struct xfs_mount *mp = bp->b_mount;
140 struct xfs_buf_log_item *bip = bp->b_log_item; 136 struct xfs_buf_log_item *bip = bp->b_log_item;
141 xfs_failaddr_t fa; 137 xfs_failaddr_t fa;
142 138
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 83f4ee2afc49..d12bbd526e7c 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -15,12 +15,10 @@
15#include "xfs_da_btree.h" 15#include "xfs_da_btree.h"
16#include "xfs_inode.h" 16#include "xfs_inode.h"
17#include "xfs_bmap_btree.h" 17#include "xfs_bmap_btree.h"
18#include "xfs_ialloc.h"
19#include "xfs_quota.h" 18#include "xfs_quota.h"
20#include "xfs_trans.h" 19#include "xfs_trans.h"
21#include "xfs_qm.h" 20#include "xfs_qm.h"
22#include "xfs_trans_space.h" 21#include "xfs_trans_space.h"
23#include "xfs_trace.h"
24 22
25#define _ALLOC true 23#define _ALLOC true
26#define _FREE false 24#define _FREE false
@@ -136,9 +134,10 @@ STATIC uint
136xfs_calc_inobt_res( 134xfs_calc_inobt_res(
137 struct xfs_mount *mp) 135 struct xfs_mount *mp)
138{ 136{
139 return xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + 137 return xfs_calc_buf_res(M_IGEO(mp)->inobt_maxlevels,
140 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), 138 XFS_FSB_TO_B(mp, 1)) +
141 XFS_FSB_TO_B(mp, 1)); 139 xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1),
140 XFS_FSB_TO_B(mp, 1));
142} 141}
143 142
144/* 143/*
@@ -167,7 +166,7 @@ xfs_calc_finobt_res(
167 * includes: 166 * includes:
168 * 167 *
169 * the allocation btrees: 2 trees * (max depth - 1) * block size 168 * the allocation btrees: 2 trees * (max depth - 1) * block size
170 * the inode chunk: m_ialloc_blks * N 169 * the inode chunk: m_ino_geo.ialloc_blks * N
171 * 170 *
172 * The size N of the inode chunk reservation depends on whether it is for 171 * The size N of the inode chunk reservation depends on whether it is for
173 * allocation or free and which type of create transaction is in use. An inode 172 * allocation or free and which type of create transaction is in use. An inode
@@ -193,7 +192,7 @@ xfs_calc_inode_chunk_res(
193 size = XFS_FSB_TO_B(mp, 1); 192 size = XFS_FSB_TO_B(mp, 1);
194 } 193 }
195 194
196 res += xfs_calc_buf_res(mp->m_ialloc_blks, size); 195 res += xfs_calc_buf_res(M_IGEO(mp)->ialloc_blks, size);
197 return res; 196 return res;
198} 197}
199 198
@@ -307,7 +306,7 @@ xfs_calc_iunlink_remove_reservation(
307 struct xfs_mount *mp) 306 struct xfs_mount *mp)
308{ 307{
309 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 308 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
310 2 * max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); 309 2 * M_IGEO(mp)->inode_cluster_size;
311} 310}
312 311
313/* 312/*
@@ -345,7 +344,7 @@ STATIC uint
345xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) 344xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
346{ 345{
347 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + 346 return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
348 max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); 347 M_IGEO(mp)->inode_cluster_size;
349} 348}
350 349
351/* 350/*
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index a62fb950bef1..88221c7a04cc 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -56,9 +56,9 @@
56#define XFS_DIRREMOVE_SPACE_RES(mp) \ 56#define XFS_DIRREMOVE_SPACE_RES(mp) \
57 XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK) 57 XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)
58#define XFS_IALLOC_SPACE_RES(mp) \ 58#define XFS_IALLOC_SPACE_RES(mp) \
59 ((mp)->m_ialloc_blks + \ 59 (M_IGEO(mp)->ialloc_blks + \
60 (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \ 60 (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \
61 ((mp)->m_in_maxlevels - 1))) 61 (M_IGEO(mp)->inobt_maxlevels - 1)))
62 62
63/* 63/*
64 * Space reservation values for various transactions. 64 * Space reservation values for various transactions.
@@ -94,7 +94,8 @@
94#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \ 94#define XFS_SYMLINK_SPACE_RES(mp,nl,b) \
95 (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) 95 (XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b))
96#define XFS_IFREE_SPACE_RES(mp) \ 96#define XFS_IFREE_SPACE_RES(mp) \
97 (xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0) 97 (xfs_sb_version_hasfinobt(&mp->m_sb) ? \
98 M_IGEO(mp)->inobt_maxlevels : 0)
98 99
99 100
100#endif /* __XFS_TRANS_SPACE_H__ */ 101#endif /* __XFS_TRANS_SPACE_H__ */
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c
index d51acc95bc00..4f595546a639 100644
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -7,19 +7,10 @@
7#include "xfs.h" 7#include "xfs.h"
8#include "xfs_fs.h" 8#include "xfs_fs.h"
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_shared.h" 10#include "xfs_shared.h"
12#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
13#include "xfs_bit.h" 12#include "xfs_bit.h"
14#include "xfs_sb.h"
15#include "xfs_mount.h" 13#include "xfs_mount.h"
16#include "xfs_defer.h"
17#include "xfs_inode.h"
18#include "xfs_btree.h"
19#include "xfs_rmap.h"
20#include "xfs_alloc_btree.h"
21#include "xfs_alloc.h"
22#include "xfs_ialloc.h"
23 14
24/* Find the size of the AG, in blocks. */ 15/* Find the size of the AG, in blocks. */
25xfs_agblock_t 16xfs_agblock_t
@@ -87,14 +78,14 @@ xfs_agino_range(
87 * Calculate the first inode, which will be in the first 78 * Calculate the first inode, which will be in the first
88 * cluster-aligned block after the AGFL. 79 * cluster-aligned block after the AGFL.
89 */ 80 */
90 bno = round_up(XFS_AGFL_BLOCK(mp) + 1, mp->m_cluster_align); 81 bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align);
91 *first = XFS_AGB_TO_AGINO(mp, bno); 82 *first = XFS_AGB_TO_AGINO(mp, bno);
92 83
93 /* 84 /*
94 * Calculate the last inode, which will be at the end of the 85 * Calculate the last inode, which will be at the end of the
95 * last (aligned) cluster that can be allocated in the AG. 86 * last (aligned) cluster that can be allocated in the AG.
96 */ 87 */
97 bno = round_down(eoag, mp->m_cluster_align); 88 bno = round_down(eoag, M_IGEO(mp)->cluster_align);
98 *last = XFS_AGB_TO_AGINO(mp, bno) - 1; 89 *last = XFS_AGB_TO_AGINO(mp, bno) - 1;
99} 90}
100 91
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index adaeabdefdd3..16b09b941441 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -9,20 +9,13 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h" 12#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h"
16#include "xfs_trans.h"
17#include "xfs_sb.h" 13#include "xfs_sb.h"
18#include "xfs_inode.h"
19#include "xfs_alloc.h" 14#include "xfs_alloc.h"
20#include "xfs_ialloc.h" 15#include "xfs_ialloc.h"
21#include "xfs_rmap.h" 16#include "xfs_rmap.h"
22#include "scrub/xfs_scrub.h"
23#include "scrub/scrub.h" 17#include "scrub/scrub.h"
24#include "scrub/common.h" 18#include "scrub/common.h"
25#include "scrub/trace.h"
26 19
27/* Superblock */ 20/* Superblock */
28 21
@@ -646,7 +639,7 @@ xchk_agfl_block(
646 xchk_agfl_block_xref(sc, agbno); 639 xchk_agfl_block_xref(sc, agbno);
647 640
648 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 641 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
649 return XFS_BTREE_QUERY_RANGE_ABORT; 642 return XFS_ITER_ABORT;
650 643
651 return 0; 644 return 0;
652} 645}
@@ -737,7 +730,7 @@ xchk_agfl(
737 /* Check the blocks in the AGFL. */ 730 /* Check the blocks in the AGFL. */
738 error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp), 731 error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp),
739 sc->sa.agfl_bp, xchk_agfl_block, &sai); 732 sc->sa.agfl_bp, xchk_agfl_block, &sai);
740 if (error == XFS_BTREE_QUERY_RANGE_ABORT) { 733 if (error == XFS_ITER_ABORT) {
741 error = 0; 734 error = 0;
742 goto out_free; 735 goto out_free;
743 } 736 }
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 64e31f87d490..7a1a38b636a9 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -9,22 +9,17 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h" 12#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h" 13#include "xfs_log_format.h"
16#include "xfs_trans.h" 14#include "xfs_trans.h"
17#include "xfs_sb.h" 15#include "xfs_sb.h"
18#include "xfs_inode.h"
19#include "xfs_alloc.h" 16#include "xfs_alloc.h"
20#include "xfs_alloc_btree.h" 17#include "xfs_alloc_btree.h"
21#include "xfs_ialloc.h" 18#include "xfs_ialloc.h"
22#include "xfs_ialloc_btree.h" 19#include "xfs_ialloc_btree.h"
23#include "xfs_rmap.h" 20#include "xfs_rmap.h"
24#include "xfs_rmap_btree.h" 21#include "xfs_rmap_btree.h"
25#include "xfs_refcount.h"
26#include "xfs_refcount_btree.h" 22#include "xfs_refcount_btree.h"
27#include "scrub/xfs_scrub.h"
28#include "scrub/scrub.h" 23#include "scrub/scrub.h"
29#include "scrub/common.h" 24#include "scrub/common.h"
30#include "scrub/trace.h" 25#include "scrub/trace.h"
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index 44883e9112ad..a43d1813c4ff 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -9,19 +9,12 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h" 12#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h"
16#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_alloc.h" 13#include "xfs_alloc.h"
19#include "xfs_rmap.h" 14#include "xfs_rmap.h"
20#include "scrub/xfs_scrub.h"
21#include "scrub/scrub.h" 15#include "scrub/scrub.h"
22#include "scrub/common.h" 16#include "scrub/common.h"
23#include "scrub/btree.h" 17#include "scrub/btree.h"
24#include "scrub/trace.h"
25 18
26/* 19/*
27 * Set us up to scrub free space btrees. 20 * Set us up to scrub free space btrees.
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index dce74ec57038..1afc58bf71dd 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -9,26 +9,62 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h" 12#include "xfs_log_format.h"
16#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_inode.h" 13#include "xfs_inode.h"
19#include "xfs_da_format.h" 14#include "xfs_da_format.h"
20#include "xfs_da_btree.h" 15#include "xfs_da_btree.h"
21#include "xfs_dir2.h"
22#include "xfs_attr.h" 16#include "xfs_attr.h"
23#include "xfs_attr_leaf.h" 17#include "xfs_attr_leaf.h"
24#include "scrub/xfs_scrub.h"
25#include "scrub/scrub.h" 18#include "scrub/scrub.h"
26#include "scrub/common.h" 19#include "scrub/common.h"
27#include "scrub/dabtree.h" 20#include "scrub/dabtree.h"
28#include "scrub/trace.h" 21#include "scrub/attr.h"
29 22
30#include <linux/posix_acl_xattr.h> 23/*
31#include <linux/xattr.h> 24 * Allocate enough memory to hold an attr value and attr block bitmaps,
25 * reallocating the buffer if necessary. Buffer contents are not preserved
26 * across a reallocation.
27 */
28int
29xchk_setup_xattr_buf(
30 struct xfs_scrub *sc,
31 size_t value_size,
32 xfs_km_flags_t flags)
33{
34 size_t sz;
35 struct xchk_xattr_buf *ab = sc->buf;
36
37 /*
38 * We need enough space to read an xattr value from the file or enough
39 * space to hold three copies of the xattr free space bitmap. We don't
40 * need the buffer space for both purposes at the same time.
41 */
42 sz = 3 * sizeof(long) * BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
43 sz = max_t(size_t, sz, value_size);
44
45 /*
46 * If there's already a buffer, figure out if we need to reallocate it
47 * to accommodate a larger size.
48 */
49 if (ab) {
50 if (sz <= ab->sz)
51 return 0;
52 kmem_free(ab);
53 sc->buf = NULL;
54 }
55
56 /*
57 * Don't zero the buffer upon allocation to avoid runtime overhead.
58 * All users must be careful never to read uninitialized contents.
59 */
60 ab = kmem_alloc_large(sizeof(*ab) + sz, flags);
61 if (!ab)
62 return -ENOMEM;
63
64 ab->sz = sz;
65 sc->buf = ab;
66 return 0;
67}
32 68
33/* Set us up to scrub an inode's extended attributes. */ 69/* Set us up to scrub an inode's extended attributes. */
34int 70int
@@ -36,19 +72,18 @@ xchk_setup_xattr(
36 struct xfs_scrub *sc, 72 struct xfs_scrub *sc,
37 struct xfs_inode *ip) 73 struct xfs_inode *ip)
38{ 74{
39 size_t sz; 75 int error;
40 76
41 /* 77 /*
42 * Allocate the buffer without the inode lock held. We need enough 78 * We failed to get memory while checking attrs, so this time try to
43 * space to read every xattr value in the file or enough space to 79 * get all the memory we're ever going to need. Allocate the buffer
44 * hold three copies of the xattr free space bitmap. (Not both at 80 * without the inode lock held, which means we can sleep.
45 * the same time.)
46 */ 81 */
47 sz = max_t(size_t, XATTR_SIZE_MAX, 3 * sizeof(long) * 82 if (sc->flags & XCHK_TRY_HARDER) {
48 BITS_TO_LONGS(sc->mp->m_attr_geo->blksize)); 83 error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, KM_SLEEP);
49 sc->buf = kmem_zalloc_large(sz, KM_SLEEP); 84 if (error)
50 if (!sc->buf) 85 return error;
51 return -ENOMEM; 86 }
52 87
53 return xchk_setup_inode_contents(sc, ip, 0); 88 return xchk_setup_inode_contents(sc, ip, 0);
54} 89}
@@ -83,7 +118,7 @@ xchk_xattr_listent(
83 sx = container_of(context, struct xchk_xattr, context); 118 sx = container_of(context, struct xchk_xattr, context);
84 119
85 if (xchk_should_terminate(sx->sc, &error)) { 120 if (xchk_should_terminate(sx->sc, &error)) {
86 context->seen_enough = 1; 121 context->seen_enough = error;
87 return; 122 return;
88 } 123 }
89 124
@@ -99,6 +134,19 @@ xchk_xattr_listent(
99 return; 134 return;
100 } 135 }
101 136
137 /*
138 * Try to allocate enough memory to extrat the attr value. If that
139 * doesn't work, we overload the seen_enough variable to convey
140 * the error message back to the main scrub function.
141 */
142 error = xchk_setup_xattr_buf(sx->sc, valuelen, KM_MAYFAIL);
143 if (error == -ENOMEM)
144 error = -EDEADLOCK;
145 if (error) {
146 context->seen_enough = error;
147 return;
148 }
149
102 args.flags = ATTR_KERNOTIME; 150 args.flags = ATTR_KERNOTIME;
103 if (flags & XFS_ATTR_ROOT) 151 if (flags & XFS_ATTR_ROOT)
104 args.flags |= ATTR_ROOT; 152 args.flags |= ATTR_ROOT;
@@ -111,8 +159,8 @@ xchk_xattr_listent(
111 args.namelen = namelen; 159 args.namelen = namelen;
112 args.hashval = xfs_da_hashname(args.name, args.namelen); 160 args.hashval = xfs_da_hashname(args.name, args.namelen);
113 args.trans = context->tp; 161 args.trans = context->tp;
114 args.value = sx->sc->buf; 162 args.value = xchk_xattr_valuebuf(sx->sc);
115 args.valuelen = XATTR_SIZE_MAX; 163 args.valuelen = valuelen;
116 164
117 error = xfs_attr_get_ilocked(context->dp, &args); 165 error = xfs_attr_get_ilocked(context->dp, &args);
118 if (error == -EEXIST) 166 if (error == -EEXIST)
@@ -125,7 +173,7 @@ xchk_xattr_listent(
125 args.blkno); 173 args.blkno);
126fail_xref: 174fail_xref:
127 if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 175 if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
128 context->seen_enough = 1; 176 context->seen_enough = XFS_ITER_ABORT;
129 return; 177 return;
130} 178}
131 179
@@ -170,13 +218,12 @@ xchk_xattr_check_freemap(
170 unsigned long *map, 218 unsigned long *map,
171 struct xfs_attr3_icleaf_hdr *leafhdr) 219 struct xfs_attr3_icleaf_hdr *leafhdr)
172{ 220{
173 unsigned long *freemap; 221 unsigned long *freemap = xchk_xattr_freemap(sc);
174 unsigned long *dstmap; 222 unsigned long *dstmap = xchk_xattr_dstmap(sc);
175 unsigned int mapsize = sc->mp->m_attr_geo->blksize; 223 unsigned int mapsize = sc->mp->m_attr_geo->blksize;
176 int i; 224 int i;
177 225
178 /* Construct bitmap of freemap contents. */ 226 /* Construct bitmap of freemap contents. */
179 freemap = (unsigned long *)sc->buf + BITS_TO_LONGS(mapsize);
180 bitmap_zero(freemap, mapsize); 227 bitmap_zero(freemap, mapsize);
181 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { 228 for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
182 if (!xchk_xattr_set_map(sc, freemap, 229 if (!xchk_xattr_set_map(sc, freemap,
@@ -186,7 +233,6 @@ xchk_xattr_check_freemap(
186 } 233 }
187 234
188 /* Look for bits that are set in freemap and are marked in use. */ 235 /* Look for bits that are set in freemap and are marked in use. */
189 dstmap = freemap + BITS_TO_LONGS(mapsize);
190 return bitmap_and(dstmap, freemap, map, mapsize) == 0; 236 return bitmap_and(dstmap, freemap, map, mapsize) == 0;
191} 237}
192 238
@@ -201,13 +247,13 @@ xchk_xattr_entry(
201 char *buf_end, 247 char *buf_end,
202 struct xfs_attr_leafblock *leaf, 248 struct xfs_attr_leafblock *leaf,
203 struct xfs_attr3_icleaf_hdr *leafhdr, 249 struct xfs_attr3_icleaf_hdr *leafhdr,
204 unsigned long *usedmap,
205 struct xfs_attr_leaf_entry *ent, 250 struct xfs_attr_leaf_entry *ent,
206 int idx, 251 int idx,
207 unsigned int *usedbytes, 252 unsigned int *usedbytes,
208 __u32 *last_hashval) 253 __u32 *last_hashval)
209{ 254{
210 struct xfs_mount *mp = ds->state->mp; 255 struct xfs_mount *mp = ds->state->mp;
256 unsigned long *usedmap = xchk_xattr_usedmap(ds->sc);
211 char *name_end; 257 char *name_end;
212 struct xfs_attr_leaf_name_local *lentry; 258 struct xfs_attr_leaf_name_local *lentry;
213 struct xfs_attr_leaf_name_remote *rentry; 259 struct xfs_attr_leaf_name_remote *rentry;
@@ -267,16 +313,26 @@ xchk_xattr_block(
267 struct xfs_attr_leafblock *leaf = bp->b_addr; 313 struct xfs_attr_leafblock *leaf = bp->b_addr;
268 struct xfs_attr_leaf_entry *ent; 314 struct xfs_attr_leaf_entry *ent;
269 struct xfs_attr_leaf_entry *entries; 315 struct xfs_attr_leaf_entry *entries;
270 unsigned long *usedmap = ds->sc->buf; 316 unsigned long *usedmap;
271 char *buf_end; 317 char *buf_end;
272 size_t off; 318 size_t off;
273 __u32 last_hashval = 0; 319 __u32 last_hashval = 0;
274 unsigned int usedbytes = 0; 320 unsigned int usedbytes = 0;
275 unsigned int hdrsize; 321 unsigned int hdrsize;
276 int i; 322 int i;
323 int error;
277 324
278 if (*last_checked == blk->blkno) 325 if (*last_checked == blk->blkno)
279 return 0; 326 return 0;
327
328 /* Allocate memory for block usage checking. */
329 error = xchk_setup_xattr_buf(ds->sc, 0, KM_MAYFAIL);
330 if (error == -ENOMEM)
331 return -EDEADLOCK;
332 if (error)
333 return error;
334 usedmap = xchk_xattr_usedmap(ds->sc);
335
280 *last_checked = blk->blkno; 336 *last_checked = blk->blkno;
281 bitmap_zero(usedmap, mp->m_attr_geo->blksize); 337 bitmap_zero(usedmap, mp->m_attr_geo->blksize);
282 338
@@ -324,7 +380,7 @@ xchk_xattr_block(
324 380
325 /* Check the entry and nameval. */ 381 /* Check the entry and nameval. */
326 xchk_xattr_entry(ds, level, buf_end, leaf, &leafhdr, 382 xchk_xattr_entry(ds, level, buf_end, leaf, &leafhdr,
327 usedmap, ent, i, &usedbytes, &last_hashval); 383 ent, i, &usedbytes, &last_hashval);
328 384
329 if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 385 if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
330 goto out; 386 goto out;
@@ -464,6 +520,10 @@ xchk_xattr(
464 error = xfs_attr_list_int_ilocked(&sx.context); 520 error = xfs_attr_list_int_ilocked(&sx.context);
465 if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error)) 521 if (!xchk_fblock_process_error(sc, XFS_ATTR_FORK, 0, &error))
466 goto out; 522 goto out;
523
524 /* Did our listent function try to return any errors? */
525 if (sx.context.seen_enough < 0)
526 error = sx.context.seen_enough;
467out: 527out:
468 return error; 528 return error;
469} 529}
diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h
new file mode 100644
index 000000000000..13a1d2e8424d
--- /dev/null
+++ b/fs/xfs/scrub/attr.h
@@ -0,0 +1,71 @@
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * Copyright (C) 2019 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#ifndef __XFS_SCRUB_ATTR_H__
7#define __XFS_SCRUB_ATTR_H__
8
9/*
10 * Temporary storage for online scrub and repair of extended attributes.
11 */
12struct xchk_xattr_buf {
13 /* Size of @buf, in bytes. */
14 size_t sz;
15
16 /*
17 * Memory buffer -- either used for extracting attr values while
18 * walking the attributes; or for computing attr block bitmaps when
19 * checking the attribute tree.
20 *
21 * Each bitmap contains enough bits to track every byte in an attr
22 * block (rounded up to the size of an unsigned long). The attr block
23 * used space bitmap starts at the beginning of the buffer; the free
24 * space bitmap follows immediately after; and we have a third buffer
25 * for storing intermediate bitmap results.
26 */
27 uint8_t buf[0];
28};
29
30/* A place to store attribute values. */
31static inline uint8_t *
32xchk_xattr_valuebuf(
33 struct xfs_scrub *sc)
34{
35 struct xchk_xattr_buf *ab = sc->buf;
36
37 return ab->buf;
38}
39
40/* A bitmap of space usage computed by walking an attr leaf block. */
41static inline unsigned long *
42xchk_xattr_usedmap(
43 struct xfs_scrub *sc)
44{
45 struct xchk_xattr_buf *ab = sc->buf;
46
47 return (unsigned long *)ab->buf;
48}
49
50/* A bitmap of free space computed by walking attr leaf block free info. */
51static inline unsigned long *
52xchk_xattr_freemap(
53 struct xfs_scrub *sc)
54{
55 return xchk_xattr_usedmap(sc) +
56 BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
57}
58
59/* A bitmap used to hold temporary results. */
60static inline unsigned long *
61xchk_xattr_dstmap(
62 struct xfs_scrub *sc)
63{
64 return xchk_xattr_freemap(sc) +
65 BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
66}
67
68int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size,
69 xfs_km_flags_t flags);
70
71#endif /* __XFS_SCRUB_ATTR_H__ */
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index fdadc9e1dc49..3d47d111be5a 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -10,11 +10,6 @@
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_btree.h" 12#include "xfs_btree.h"
13#include "scrub/xfs_scrub.h"
14#include "scrub/scrub.h"
15#include "scrub/common.h"
16#include "scrub/trace.h"
17#include "scrub/repair.h"
18#include "scrub/bitmap.h" 13#include "scrub/bitmap.h"
19 14
20/* 15/*
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index a703cd58a90e..1bd29fdc2ab5 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -9,27 +9,19 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h" 12#include "xfs_btree.h"
14#include "xfs_bit.h" 13#include "xfs_bit.h"
15#include "xfs_log_format.h" 14#include "xfs_log_format.h"
16#include "xfs_trans.h" 15#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_inode.h" 16#include "xfs_inode.h"
19#include "xfs_inode_fork.h"
20#include "xfs_alloc.h" 17#include "xfs_alloc.h"
21#include "xfs_rtalloc.h"
22#include "xfs_bmap.h" 18#include "xfs_bmap.h"
23#include "xfs_bmap_util.h"
24#include "xfs_bmap_btree.h" 19#include "xfs_bmap_btree.h"
25#include "xfs_rmap.h" 20#include "xfs_rmap.h"
26#include "xfs_rmap_btree.h" 21#include "xfs_rmap_btree.h"
27#include "xfs_refcount.h"
28#include "scrub/xfs_scrub.h"
29#include "scrub/scrub.h" 22#include "scrub/scrub.h"
30#include "scrub/common.h" 23#include "scrub/common.h"
31#include "scrub/btree.h" 24#include "scrub/btree.h"
32#include "scrub/trace.h"
33 25
34/* Set us up with an inode's bmap. */ 26/* Set us up with an inode's bmap. */
35int 27int
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index 117910db51b8..f52a7b8256f9 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -9,14 +9,7 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h" 12#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h"
16#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_inode.h"
19#include "xfs_alloc.h"
20#include "scrub/scrub.h" 13#include "scrub/scrub.h"
21#include "scrub/common.h" 14#include "scrub/common.h"
22#include "scrub/btree.h" 15#include "scrub/btree.h"
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 973aa59975e3..18876056e5e0 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -9,22 +9,16 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h" 12#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h" 13#include "xfs_log_format.h"
16#include "xfs_trans.h" 14#include "xfs_trans.h"
17#include "xfs_sb.h" 15#include "xfs_sb.h"
18#include "xfs_inode.h" 16#include "xfs_inode.h"
19#include "xfs_icache.h" 17#include "xfs_icache.h"
20#include "xfs_itable.h"
21#include "xfs_alloc.h" 18#include "xfs_alloc.h"
22#include "xfs_alloc_btree.h" 19#include "xfs_alloc_btree.h"
23#include "xfs_bmap.h"
24#include "xfs_bmap_btree.h"
25#include "xfs_ialloc.h" 20#include "xfs_ialloc.h"
26#include "xfs_ialloc_btree.h" 21#include "xfs_ialloc_btree.h"
27#include "xfs_refcount.h"
28#include "xfs_refcount_btree.h" 22#include "xfs_refcount_btree.h"
29#include "xfs_rmap.h" 23#include "xfs_rmap.h"
30#include "xfs_rmap_btree.h" 24#include "xfs_rmap_btree.h"
@@ -32,11 +26,9 @@
32#include "xfs_trans_priv.h" 26#include "xfs_trans_priv.h"
33#include "xfs_attr.h" 27#include "xfs_attr.h"
34#include "xfs_reflink.h" 28#include "xfs_reflink.h"
35#include "scrub/xfs_scrub.h"
36#include "scrub/scrub.h" 29#include "scrub/scrub.h"
37#include "scrub/common.h" 30#include "scrub/common.h"
38#include "scrub/trace.h" 31#include "scrub/trace.h"
39#include "scrub/btree.h"
40#include "scrub/repair.h" 32#include "scrub/repair.h"
41#include "scrub/health.h" 33#include "scrub/health.h"
42 34
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index 90527b094878..94c4f1de1922 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -9,20 +9,12 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h" 12#include "xfs_log_format.h"
16#include "xfs_trans.h" 13#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_inode.h" 14#include "xfs_inode.h"
19#include "xfs_inode_fork.h"
20#include "xfs_da_format.h"
21#include "xfs_da_btree.h"
22#include "xfs_dir2.h" 15#include "xfs_dir2.h"
23#include "xfs_dir2_priv.h" 16#include "xfs_dir2_priv.h"
24#include "xfs_attr_leaf.h" 17#include "xfs_attr_leaf.h"
25#include "scrub/xfs_scrub.h"
26#include "scrub/scrub.h" 18#include "scrub/scrub.h"
27#include "scrub/common.h" 19#include "scrub/common.h"
28#include "scrub/trace.h" 20#include "scrub/trace.h"
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index a38a22785a1a..1e2e11721eb9 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -9,24 +9,14 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h" 12#include "xfs_log_format.h"
16#include "xfs_trans.h" 13#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_inode.h" 14#include "xfs_inode.h"
19#include "xfs_icache.h" 15#include "xfs_icache.h"
20#include "xfs_itable.h"
21#include "xfs_da_format.h"
22#include "xfs_da_btree.h"
23#include "xfs_dir2.h" 16#include "xfs_dir2.h"
24#include "xfs_dir2_priv.h" 17#include "xfs_dir2_priv.h"
25#include "xfs_ialloc.h"
26#include "scrub/xfs_scrub.h"
27#include "scrub/scrub.h" 18#include "scrub/scrub.h"
28#include "scrub/common.h" 19#include "scrub/common.h"
29#include "scrub/trace.h"
30#include "scrub/dabtree.h" 20#include "scrub/dabtree.h"
31 21
32/* Set us up to scrub directories. */ 22/* Set us up to scrub directories. */
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 07c11e3e6437..fc3f510c9034 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -9,22 +9,10 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h"
16#include "xfs_trans.h"
17#include "xfs_sb.h" 12#include "xfs_sb.h"
18#include "xfs_inode.h"
19#include "xfs_alloc.h" 13#include "xfs_alloc.h"
20#include "xfs_ialloc.h" 14#include "xfs_ialloc.h"
21#include "xfs_rmap.h"
22#include "xfs_error.h"
23#include "xfs_errortag.h"
24#include "xfs_icache.h"
25#include "xfs_health.h" 15#include "xfs_health.h"
26#include "xfs_bmap.h"
27#include "scrub/xfs_scrub.h"
28#include "scrub/scrub.h" 16#include "scrub/scrub.h"
29#include "scrub/common.h" 17#include "scrub/common.h"
30#include "scrub/trace.h" 18#include "scrub/trace.h"
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index 23cf8e2f25db..b2f602811e9d 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -7,18 +7,10 @@
7#include "xfs_fs.h" 7#include "xfs_fs.h"
8#include "xfs_shared.h" 8#include "xfs_shared.h"
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h" 10#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h"
16#include "xfs_trans.h"
17#include "xfs_sb.h" 11#include "xfs_sb.h"
18#include "xfs_inode.h"
19#include "xfs_health.h" 12#include "xfs_health.h"
20#include "scrub/scrub.h" 13#include "scrub/scrub.h"
21#include "scrub/health.h"
22 14
23/* 15/*
24 * Scrub and In-Core Filesystem Health Assessments 16 * Scrub and In-Core Filesystem Health Assessments
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 9b47117180cb..681758704fda 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -9,21 +9,14 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h" 12#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h" 13#include "xfs_log_format.h"
16#include "xfs_trans.h" 14#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_inode.h" 15#include "xfs_inode.h"
19#include "xfs_alloc.h"
20#include "xfs_ialloc.h" 16#include "xfs_ialloc.h"
21#include "xfs_ialloc_btree.h" 17#include "xfs_ialloc_btree.h"
22#include "xfs_icache.h" 18#include "xfs_icache.h"
23#include "xfs_rmap.h" 19#include "xfs_rmap.h"
24#include "xfs_log.h"
25#include "xfs_trans_priv.h"
26#include "scrub/xfs_scrub.h"
27#include "scrub/scrub.h" 20#include "scrub/scrub.h"
28#include "scrub/common.h" 21#include "scrub/common.h"
29#include "scrub/btree.h" 22#include "scrub/btree.h"
@@ -230,7 +223,7 @@ xchk_iallocbt_check_cluster(
230 int error = 0; 223 int error = 0;
231 224
232 nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK, 225 nr_inodes = min_t(unsigned int, XFS_INODES_PER_CHUNK,
233 mp->m_inodes_per_cluster); 226 M_IGEO(mp)->inodes_per_cluster);
234 227
235 /* Map this inode cluster */ 228 /* Map this inode cluster */
236 agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base); 229 agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino + cluster_base);
@@ -251,7 +244,7 @@ xchk_iallocbt_check_cluster(
251 */ 244 */
252 ir_holemask = (irec->ir_holemask & cluster_mask); 245 ir_holemask = (irec->ir_holemask & cluster_mask);
253 imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno); 246 imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
254 imap.im_len = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); 247 imap.im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
255 imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) << 248 imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) <<
256 mp->m_sb.sb_inodelog; 249 mp->m_sb.sb_inodelog;
257 250
@@ -276,12 +269,12 @@ xchk_iallocbt_check_cluster(
276 /* If any part of this is a hole, skip it. */ 269 /* If any part of this is a hole, skip it. */
277 if (ir_holemask) { 270 if (ir_holemask) {
278 xchk_xref_is_not_owned_by(bs->sc, agbno, 271 xchk_xref_is_not_owned_by(bs->sc, agbno,
279 mp->m_blocks_per_cluster, 272 M_IGEO(mp)->blocks_per_cluster,
280 &XFS_RMAP_OINFO_INODES); 273 &XFS_RMAP_OINFO_INODES);
281 return 0; 274 return 0;
282 } 275 }
283 276
284 xchk_xref_is_owned_by(bs->sc, agbno, mp->m_blocks_per_cluster, 277 xchk_xref_is_owned_by(bs->sc, agbno, M_IGEO(mp)->blocks_per_cluster,
285 &XFS_RMAP_OINFO_INODES); 278 &XFS_RMAP_OINFO_INODES);
286 279
287 /* Grab the inode cluster buffer. */ 280 /* Grab the inode cluster buffer. */
@@ -333,7 +326,7 @@ xchk_iallocbt_check_clusters(
333 */ 326 */
334 for (cluster_base = 0; 327 for (cluster_base = 0;
335 cluster_base < XFS_INODES_PER_CHUNK; 328 cluster_base < XFS_INODES_PER_CHUNK;
336 cluster_base += bs->sc->mp->m_inodes_per_cluster) { 329 cluster_base += M_IGEO(bs->sc->mp)->inodes_per_cluster) {
337 error = xchk_iallocbt_check_cluster(bs, irec, cluster_base); 330 error = xchk_iallocbt_check_cluster(bs, irec, cluster_base);
338 if (error) 331 if (error)
339 break; 332 break;
@@ -355,6 +348,7 @@ xchk_iallocbt_rec_alignment(
355{ 348{
356 struct xfs_mount *mp = bs->sc->mp; 349 struct xfs_mount *mp = bs->sc->mp;
357 struct xchk_iallocbt *iabt = bs->private; 350 struct xchk_iallocbt *iabt = bs->private;
351 struct xfs_ino_geometry *igeo = M_IGEO(mp);
358 352
359 /* 353 /*
360 * finobt records have different positioning requirements than inobt 354 * finobt records have different positioning requirements than inobt
@@ -372,7 +366,7 @@ xchk_iallocbt_rec_alignment(
372 unsigned int imask; 366 unsigned int imask;
373 367
374 imask = min_t(unsigned int, XFS_INODES_PER_CHUNK, 368 imask = min_t(unsigned int, XFS_INODES_PER_CHUNK,
375 mp->m_cluster_align_inodes) - 1; 369 igeo->cluster_align_inodes) - 1;
376 if (irec->ir_startino & imask) 370 if (irec->ir_startino & imask)
377 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 371 xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
378 return; 372 return;
@@ -400,17 +394,17 @@ xchk_iallocbt_rec_alignment(
400 } 394 }
401 395
402 /* inobt records must be aligned to cluster and inoalignmnt size. */ 396 /* inobt records must be aligned to cluster and inoalignmnt size. */
403 if (irec->ir_startino & (mp->m_cluster_align_inodes - 1)) { 397 if (irec->ir_startino & (igeo->cluster_align_inodes - 1)) {
404 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 398 xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
405 return; 399 return;
406 } 400 }
407 401
408 if (irec->ir_startino & (mp->m_inodes_per_cluster - 1)) { 402 if (irec->ir_startino & (igeo->inodes_per_cluster - 1)) {
409 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 403 xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
410 return; 404 return;
411 } 405 }
412 406
413 if (mp->m_inodes_per_cluster <= XFS_INODES_PER_CHUNK) 407 if (igeo->inodes_per_cluster <= XFS_INODES_PER_CHUNK)
414 return; 408 return;
415 409
416 /* 410 /*
@@ -419,7 +413,7 @@ xchk_iallocbt_rec_alignment(
419 * after this one. 413 * after this one.
420 */ 414 */
421 iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK; 415 iabt->next_startino = irec->ir_startino + XFS_INODES_PER_CHUNK;
422 iabt->next_cluster_ino = irec->ir_startino + mp->m_inodes_per_cluster; 416 iabt->next_cluster_ino = irec->ir_startino + igeo->inodes_per_cluster;
423} 417}
424 418
425/* Scrub an inobt/finobt record. */ 419/* Scrub an inobt/finobt record. */
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index e213efc194a1..6d483ab29e63 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -9,27 +9,17 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h" 12#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h" 13#include "xfs_log_format.h"
16#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_inode.h" 14#include "xfs_inode.h"
19#include "xfs_icache.h"
20#include "xfs_inode_buf.h"
21#include "xfs_inode_fork.h"
22#include "xfs_ialloc.h" 15#include "xfs_ialloc.h"
23#include "xfs_da_format.h" 16#include "xfs_da_format.h"
24#include "xfs_reflink.h" 17#include "xfs_reflink.h"
25#include "xfs_rmap.h" 18#include "xfs_rmap.h"
26#include "xfs_bmap.h"
27#include "xfs_bmap_util.h" 19#include "xfs_bmap_util.h"
28#include "scrub/xfs_scrub.h"
29#include "scrub/scrub.h" 20#include "scrub/scrub.h"
30#include "scrub/common.h" 21#include "scrub/common.h"
31#include "scrub/btree.h" 22#include "scrub/btree.h"
32#include "scrub/trace.h"
33 23
34/* 24/*
35 * Grab total control of the inode metadata. It doesn't matter here if 25 * Grab total control of the inode metadata. It doesn't matter here if
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index d5d197f1b80f..c962bd534690 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -9,21 +9,13 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h" 12#include "xfs_log_format.h"
16#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_inode.h" 13#include "xfs_inode.h"
19#include "xfs_icache.h" 14#include "xfs_icache.h"
20#include "xfs_dir2.h" 15#include "xfs_dir2.h"
21#include "xfs_dir2_priv.h" 16#include "xfs_dir2_priv.h"
22#include "xfs_ialloc.h"
23#include "scrub/xfs_scrub.h"
24#include "scrub/scrub.h" 17#include "scrub/scrub.h"
25#include "scrub/common.h" 18#include "scrub/common.h"
26#include "scrub/trace.h"
27 19
28/* Set us up to scrub parents. */ 20/* Set us up to scrub parents. */
29int 21int
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 5dfe2b5924db..0a33b4421c32 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -9,24 +9,13 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h" 12#include "xfs_log_format.h"
16#include "xfs_trans.h" 13#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_inode.h" 14#include "xfs_inode.h"
19#include "xfs_inode_fork.h"
20#include "xfs_alloc.h"
21#include "xfs_bmap.h"
22#include "xfs_quota.h" 15#include "xfs_quota.h"
23#include "xfs_qm.h" 16#include "xfs_qm.h"
24#include "xfs_dquot.h"
25#include "xfs_dquot_item.h"
26#include "scrub/xfs_scrub.h"
27#include "scrub/scrub.h" 17#include "scrub/scrub.h"
28#include "scrub/common.h" 18#include "scrub/common.h"
29#include "scrub/trace.h"
30 19
31/* Convert a scrub type code to a DQ flag, or return 0 if error. */ 20/* Convert a scrub type code to a DQ flag, or return 0 if error. */
32static inline uint 21static inline uint
@@ -144,7 +133,7 @@ xchk_quota_item(
144 if (bsoft > bhard) 133 if (bsoft > bhard)
145 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); 134 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
146 135
147 if (ihard > mp->m_maxicount) 136 if (ihard > M_IGEO(mp)->maxicount)
148 xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset); 137 xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset);
149 if (isoft > ihard) 138 if (isoft > ihard)
150 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); 139 xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 708b4158eb90..93b3793bc5b3 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -7,22 +7,12 @@
7#include "xfs_fs.h" 7#include "xfs_fs.h"
8#include "xfs_shared.h" 8#include "xfs_shared.h"
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h" 10#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h"
16#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_alloc.h"
19#include "xfs_rmap.h" 11#include "xfs_rmap.h"
20#include "xfs_refcount.h" 12#include "xfs_refcount.h"
21#include "scrub/xfs_scrub.h"
22#include "scrub/scrub.h" 13#include "scrub/scrub.h"
23#include "scrub/common.h" 14#include "scrub/common.h"
24#include "scrub/btree.h" 15#include "scrub/btree.h"
25#include "scrub/trace.h"
26 16
27/* 17/*
28 * Set us up to scrub reference count btrees. 18 * Set us up to scrub reference count btrees.
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index eb358f0f5e0a..4cfeec57fb05 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -9,29 +9,21 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h" 12#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h" 13#include "xfs_log_format.h"
16#include "xfs_trans.h" 14#include "xfs_trans.h"
17#include "xfs_sb.h" 15#include "xfs_sb.h"
18#include "xfs_inode.h" 16#include "xfs_inode.h"
19#include "xfs_icache.h"
20#include "xfs_alloc.h" 17#include "xfs_alloc.h"
21#include "xfs_alloc_btree.h" 18#include "xfs_alloc_btree.h"
22#include "xfs_ialloc.h" 19#include "xfs_ialloc.h"
23#include "xfs_ialloc_btree.h" 20#include "xfs_ialloc_btree.h"
24#include "xfs_rmap.h" 21#include "xfs_rmap.h"
25#include "xfs_rmap_btree.h" 22#include "xfs_rmap_btree.h"
26#include "xfs_refcount.h"
27#include "xfs_refcount_btree.h" 23#include "xfs_refcount_btree.h"
28#include "xfs_extent_busy.h" 24#include "xfs_extent_busy.h"
29#include "xfs_ag_resv.h" 25#include "xfs_ag_resv.h"
30#include "xfs_trans_space.h"
31#include "xfs_quota.h" 26#include "xfs_quota.h"
32#include "xfs_attr.h"
33#include "xfs_reflink.h"
34#include "scrub/xfs_scrub.h"
35#include "scrub/scrub.h" 27#include "scrub/scrub.h"
36#include "scrub/common.h" 28#include "scrub/common.h"
37#include "scrub/trace.h" 29#include "scrub/trace.h"
@@ -357,7 +349,7 @@ xrep_init_btblock(
357 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb), 349 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb),
358 XFS_FSB_TO_BB(mp, 1), 0); 350 XFS_FSB_TO_BB(mp, 1), 0);
359 xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); 351 xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
360 xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno, 0); 352 xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno);
361 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF); 353 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
362 xfs_trans_log_buf(tp, bp, 0, bp->b_length); 354 xfs_trans_log_buf(tp, bp, 0, bp->b_length);
363 bp->b_ops = ops; 355 bp->b_ops = ops;
@@ -672,7 +664,7 @@ xrep_findroot_agfl_walk(
672{ 664{
673 xfs_agblock_t *agbno = priv; 665 xfs_agblock_t *agbno = priv;
674 666
675 return (*agbno == bno) ? XFS_BTREE_QUERY_RANGE_ABORT : 0; 667 return (*agbno == bno) ? XFS_ITER_ABORT : 0;
676} 668}
677 669
678/* Does this block match the btree information passed in? */ 670/* Does this block match the btree information passed in? */
@@ -702,7 +694,7 @@ xrep_findroot_block(
702 if (owner == XFS_RMAP_OWN_AG) { 694 if (owner == XFS_RMAP_OWN_AG) {
703 error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp, 695 error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp,
704 xrep_findroot_agfl_walk, &agbno); 696 xrep_findroot_agfl_walk, &agbno);
705 if (error == XFS_BTREE_QUERY_RANGE_ABORT) 697 if (error == XFS_ITER_ABORT)
706 return 0; 698 return 0;
707 if (error) 699 if (error)
708 return error; 700 return error;
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index 92a140c5b55e..8d4cefd761c1 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -9,21 +9,12 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h" 12#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h"
16#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_alloc.h"
19#include "xfs_ialloc.h"
20#include "xfs_rmap.h" 13#include "xfs_rmap.h"
21#include "xfs_refcount.h" 14#include "xfs_refcount.h"
22#include "scrub/xfs_scrub.h"
23#include "scrub/scrub.h" 15#include "scrub/scrub.h"
24#include "scrub/common.h" 16#include "scrub/common.h"
25#include "scrub/btree.h" 17#include "scrub/btree.h"
26#include "scrub/trace.h"
27 18
28/* 19/*
29 * Set us up to scrub reverse mapping btrees. 20 * Set us up to scrub reverse mapping btrees.
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index dbe115b075f7..c642bc206c41 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -9,19 +9,12 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h" 12#include "xfs_log_format.h"
16#include "xfs_trans.h" 13#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_alloc.h"
19#include "xfs_rtalloc.h" 14#include "xfs_rtalloc.h"
20#include "xfs_inode.h" 15#include "xfs_inode.h"
21#include "scrub/xfs_scrub.h"
22#include "scrub/scrub.h" 16#include "scrub/scrub.h"
23#include "scrub/common.h" 17#include "scrub/common.h"
24#include "scrub/trace.h"
25 18
26/* Set us up with the realtime metadata locked. */ 19/* Set us up with the realtime metadata locked. */
27int 20int
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index f630389ee176..15c8c5f3f688 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -9,36 +9,16 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h" 12#include "xfs_log_format.h"
16#include "xfs_trans.h" 13#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_inode.h" 14#include "xfs_inode.h"
19#include "xfs_icache.h"
20#include "xfs_itable.h"
21#include "xfs_alloc.h"
22#include "xfs_alloc_btree.h"
23#include "xfs_bmap.h"
24#include "xfs_bmap_btree.h"
25#include "xfs_ialloc.h"
26#include "xfs_ialloc_btree.h"
27#include "xfs_refcount.h"
28#include "xfs_refcount_btree.h"
29#include "xfs_rmap.h"
30#include "xfs_rmap_btree.h"
31#include "xfs_quota.h" 15#include "xfs_quota.h"
32#include "xfs_qm.h" 16#include "xfs_qm.h"
33#include "xfs_errortag.h" 17#include "xfs_errortag.h"
34#include "xfs_error.h" 18#include "xfs_error.h"
35#include "xfs_log.h"
36#include "xfs_trans_priv.h"
37#include "scrub/xfs_scrub.h"
38#include "scrub/scrub.h" 19#include "scrub/scrub.h"
39#include "scrub/common.h" 20#include "scrub/common.h"
40#include "scrub/trace.h" 21#include "scrub/trace.h"
41#include "scrub/btree.h"
42#include "scrub/repair.h" 22#include "scrub/repair.h"
43#include "scrub/health.h" 23#include "scrub/health.h"
44 24
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c
index f7ebaa946999..99c0b1234c3c 100644
--- a/fs/xfs/scrub/symlink.c
+++ b/fs/xfs/scrub/symlink.c
@@ -9,19 +9,11 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 11#include "xfs_mount.h"
12#include "xfs_defer.h"
13#include "xfs_btree.h"
14#include "xfs_bit.h"
15#include "xfs_log_format.h" 12#include "xfs_log_format.h"
16#include "xfs_trans.h"
17#include "xfs_sb.h"
18#include "xfs_inode.h" 13#include "xfs_inode.h"
19#include "xfs_inode_fork.h"
20#include "xfs_symlink.h" 14#include "xfs_symlink.h"
21#include "scrub/xfs_scrub.h"
22#include "scrub/scrub.h" 15#include "scrub/scrub.h"
23#include "scrub/common.h" 16#include "scrub/common.h"
24#include "scrub/trace.h"
25 17
26/* Set us up to scrub a symbolic link. */ 18/* Set us up to scrub a symbolic link. */
27int 19int
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 96feaf8dcdec..9eaab2eb5ed3 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -10,15 +10,9 @@
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_mount.h" 12#include "xfs_mount.h"
13#include "xfs_defer.h"
14#include "xfs_da_format.h"
15#include "xfs_inode.h" 13#include "xfs_inode.h"
16#include "xfs_btree.h" 14#include "xfs_btree.h"
17#include "xfs_trans.h"
18#include "xfs_bit.h"
19#include "scrub/xfs_scrub.h"
20#include "scrub/scrub.h" 15#include "scrub/scrub.h"
21#include "scrub/common.h"
22 16
23/* Figure out which block the btree cursor was pointing to. */ 17/* Figure out which block the btree cursor was pointing to. */
24static inline xfs_fsblock_t 18static inline xfs_fsblock_t
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 8039e35147dd..cbda40d40326 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -4,16 +4,14 @@
4 * All Rights Reserved. 4 * All Rights Reserved.
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_shared.h"
7#include "xfs_format.h" 8#include "xfs_format.h"
8#include "xfs_log_format.h" 9#include "xfs_log_format.h"
9#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
10#include "xfs_mount.h" 11#include "xfs_mount.h"
11#include "xfs_inode.h" 12#include "xfs_inode.h"
12#include "xfs_acl.h"
13#include "xfs_attr.h" 13#include "xfs_attr.h"
14#include "xfs_trace.h" 14#include "xfs_trace.h"
15#include <linux/slab.h>
16#include <linux/xattr.h>
17#include <linux/posix_acl_xattr.h> 15#include <linux/posix_acl_xattr.h>
18 16
19 17
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 11f703d4a605..761248ee2778 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -12,16 +12,11 @@
12#include "xfs_mount.h" 12#include "xfs_mount.h"
13#include "xfs_inode.h" 13#include "xfs_inode.h"
14#include "xfs_trans.h" 14#include "xfs_trans.h"
15#include "xfs_inode_item.h"
16#include "xfs_alloc.h"
17#include "xfs_error.h"
18#include "xfs_iomap.h" 15#include "xfs_iomap.h"
19#include "xfs_trace.h" 16#include "xfs_trace.h"
20#include "xfs_bmap.h" 17#include "xfs_bmap.h"
21#include "xfs_bmap_util.h" 18#include "xfs_bmap_util.h"
22#include "xfs_bmap_btree.h"
23#include "xfs_reflink.h" 19#include "xfs_reflink.h"
24#include <linux/writeback.h>
25 20
26/* 21/*
27 * structure owned by writepages passed to individual writepage calls 22 * structure owned by writepages passed to individual writepage calls
@@ -138,8 +133,7 @@ xfs_setfilesize_trans_alloc(
138 struct xfs_trans *tp; 133 struct xfs_trans *tp;
139 int error; 134 int error;
140 135
141 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 136 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
142 XFS_TRANS_NOFS, &tp);
143 if (error) 137 if (error)
144 return error; 138 return error;
145 139
@@ -240,9 +234,17 @@ xfs_end_ioend(
240 struct xfs_inode *ip = XFS_I(ioend->io_inode); 234 struct xfs_inode *ip = XFS_I(ioend->io_inode);
241 xfs_off_t offset = ioend->io_offset; 235 xfs_off_t offset = ioend->io_offset;
242 size_t size = ioend->io_size; 236 size_t size = ioend->io_size;
237 unsigned int nofs_flag;
243 int error; 238 int error;
244 239
245 /* 240 /*
241 * We can allocate memory here while doing writeback on behalf of
242 * memory reclaim. To avoid memory allocation deadlocks set the
243 * task-wide nofs context for the following operations.
244 */
245 nofs_flag = memalloc_nofs_save();
246
247 /*
246 * Just clean up the in-memory strutures if the fs has been shut down. 248 * Just clean up the in-memory strutures if the fs has been shut down.
247 */ 249 */
248 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 250 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
@@ -282,6 +284,8 @@ done:
282 list_del_init(&ioend->io_list); 284 list_del_init(&ioend->io_list);
283 xfs_destroy_ioend(ioend, error); 285 xfs_destroy_ioend(ioend, error);
284 } 286 }
287
288 memalloc_nofs_restore(nofs_flag);
285} 289}
286 290
287/* 291/*
@@ -290,13 +294,9 @@ done:
290static bool 294static bool
291xfs_ioend_can_merge( 295xfs_ioend_can_merge(
292 struct xfs_ioend *ioend, 296 struct xfs_ioend *ioend,
293 int ioend_error,
294 struct xfs_ioend *next) 297 struct xfs_ioend *next)
295{ 298{
296 int next_error; 299 if (ioend->io_bio->bi_status != next->io_bio->bi_status)
297
298 next_error = blk_status_to_errno(next->io_bio->bi_status);
299 if (ioend_error != next_error)
300 return false; 300 return false;
301 if ((ioend->io_fork == XFS_COW_FORK) ^ (next->io_fork == XFS_COW_FORK)) 301 if ((ioend->io_fork == XFS_COW_FORK) ^ (next->io_fork == XFS_COW_FORK))
302 return false; 302 return false;
@@ -305,11 +305,28 @@ xfs_ioend_can_merge(
305 return false; 305 return false;
306 if (ioend->io_offset + ioend->io_size != next->io_offset) 306 if (ioend->io_offset + ioend->io_size != next->io_offset)
307 return false; 307 return false;
308 if (xfs_ioend_is_append(ioend) != xfs_ioend_is_append(next))
309 return false;
310 return true; 308 return true;
311} 309}
312 310
311/*
312 * If the to be merged ioend has a preallocated transaction for file
313 * size updates we need to ensure the ioend it is merged into also
314 * has one. If it already has one we can simply cancel the transaction
315 * as it is guaranteed to be clean.
316 */
317static void
318xfs_ioend_merge_append_transactions(
319 struct xfs_ioend *ioend,
320 struct xfs_ioend *next)
321{
322 if (!ioend->io_append_trans) {
323 ioend->io_append_trans = next->io_append_trans;
324 next->io_append_trans = NULL;
325 } else {
326 xfs_setfilesize_ioend(next, -ECANCELED);
327 }
328}
329
313/* Try to merge adjacent completions. */ 330/* Try to merge adjacent completions. */
314STATIC void 331STATIC void
315xfs_ioend_try_merge( 332xfs_ioend_try_merge(
@@ -317,25 +334,16 @@ xfs_ioend_try_merge(
317 struct list_head *more_ioends) 334 struct list_head *more_ioends)
318{ 335{
319 struct xfs_ioend *next_ioend; 336 struct xfs_ioend *next_ioend;
320 int ioend_error;
321 int error;
322
323 if (list_empty(more_ioends))
324 return;
325
326 ioend_error = blk_status_to_errno(ioend->io_bio->bi_status);
327 337
328 while (!list_empty(more_ioends)) { 338 while (!list_empty(more_ioends)) {
329 next_ioend = list_first_entry(more_ioends, struct xfs_ioend, 339 next_ioend = list_first_entry(more_ioends, struct xfs_ioend,
330 io_list); 340 io_list);
331 if (!xfs_ioend_can_merge(ioend, ioend_error, next_ioend)) 341 if (!xfs_ioend_can_merge(ioend, next_ioend))
332 break; 342 break;
333 list_move_tail(&next_ioend->io_list, &ioend->io_list); 343 list_move_tail(&next_ioend->io_list, &ioend->io_list);
334 ioend->io_size += next_ioend->io_size; 344 ioend->io_size += next_ioend->io_size;
335 if (ioend->io_append_trans) { 345 if (next_ioend->io_append_trans)
336 error = xfs_setfilesize_ioend(next_ioend, 1); 346 xfs_ioend_merge_append_transactions(ioend, next_ioend);
337 ASSERT(error == 1);
338 }
339 } 347 }
340} 348}
341 349
@@ -626,7 +634,7 @@ allocate_blocks:
626 * reference to the ioend to ensure that the ioend completion is only done once 634 * reference to the ioend to ensure that the ioend completion is only done once
627 * all bios have been submitted and the ioend is really done. 635 * all bios have been submitted and the ioend is really done.
628 * 636 *
629 * If @fail is non-zero, it means that we have a situation where some part of 637 * If @status is non-zero, it means that we have a situation where some part of
630 * the submission process has failed after we have marked paged for writeback 638 * the submission process has failed after we have marked paged for writeback
631 * and unlocked them. In this situation, we need to fail the bio and ioend 639 * and unlocked them. In this situation, we need to fail the bio and ioend
632 * rather than submit it to IO. This typically only happens on a filesystem 640 * rather than submit it to IO. This typically only happens on a filesystem
@@ -638,21 +646,19 @@ xfs_submit_ioend(
638 struct xfs_ioend *ioend, 646 struct xfs_ioend *ioend,
639 int status) 647 int status)
640{ 648{
649 unsigned int nofs_flag;
650
651 /*
652 * We can allocate memory here while doing writeback on behalf of
653 * memory reclaim. To avoid memory allocation deadlocks set the
654 * task-wide nofs context for the following operations.
655 */
656 nofs_flag = memalloc_nofs_save();
657
641 /* Convert CoW extents to regular */ 658 /* Convert CoW extents to regular */
642 if (!status && ioend->io_fork == XFS_COW_FORK) { 659 if (!status && ioend->io_fork == XFS_COW_FORK) {
643 /*
644 * Yuk. This can do memory allocation, but is not a
645 * transactional operation so everything is done in GFP_KERNEL
646 * context. That can deadlock, because we hold pages in
647 * writeback state and GFP_KERNEL allocations can block on them.
648 * Hence we must operate in nofs conditions here.
649 */
650 unsigned nofs_flag;
651
652 nofs_flag = memalloc_nofs_save();
653 status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), 660 status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
654 ioend->io_offset, ioend->io_size); 661 ioend->io_offset, ioend->io_size);
655 memalloc_nofs_restore(nofs_flag);
656 } 662 }
657 663
658 /* Reserve log space if we might write beyond the on-disk inode size. */ 664 /* Reserve log space if we might write beyond the on-disk inode size. */
@@ -663,9 +669,10 @@ xfs_submit_ioend(
663 !ioend->io_append_trans) 669 !ioend->io_append_trans)
664 status = xfs_setfilesize_trans_alloc(ioend); 670 status = xfs_setfilesize_trans_alloc(ioend);
665 671
672 memalloc_nofs_restore(nofs_flag);
673
666 ioend->io_bio->bi_private = ioend; 674 ioend->io_bio->bi_private = ioend;
667 ioend->io_bio->bi_end_io = xfs_end_bio; 675 ioend->io_bio->bi_end_io = xfs_end_bio;
668 ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
669 676
670 /* 677 /*
671 * If we are failing the IO now, just mark the ioend with an 678 * If we are failing the IO now, just mark the ioend with an
@@ -679,7 +686,6 @@ xfs_submit_ioend(
679 return status; 686 return status;
680 } 687 }
681 688
682 ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint;
683 submit_bio(ioend->io_bio); 689 submit_bio(ioend->io_bio);
684 return 0; 690 return 0;
685} 691}
@@ -691,7 +697,8 @@ xfs_alloc_ioend(
691 xfs_exntst_t state, 697 xfs_exntst_t state,
692 xfs_off_t offset, 698 xfs_off_t offset,
693 struct block_device *bdev, 699 struct block_device *bdev,
694 sector_t sector) 700 sector_t sector,
701 struct writeback_control *wbc)
695{ 702{
696 struct xfs_ioend *ioend; 703 struct xfs_ioend *ioend;
697 struct bio *bio; 704 struct bio *bio;
@@ -699,6 +706,9 @@ xfs_alloc_ioend(
699 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset); 706 bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset);
700 bio_set_dev(bio, bdev); 707 bio_set_dev(bio, bdev);
701 bio->bi_iter.bi_sector = sector; 708 bio->bi_iter.bi_sector = sector;
709 bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
710 bio->bi_write_hint = inode->i_write_hint;
711 wbc_init_bio(wbc, bio);
702 712
703 ioend = container_of(bio, struct xfs_ioend, io_inline_bio); 713 ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
704 INIT_LIST_HEAD(&ioend->io_list); 714 INIT_LIST_HEAD(&ioend->io_list);
@@ -719,24 +729,22 @@ xfs_alloc_ioend(
719 * so that the bi_private linkage is set up in the right direction for the 729 * so that the bi_private linkage is set up in the right direction for the
720 * traversal in xfs_destroy_ioend(). 730 * traversal in xfs_destroy_ioend().
721 */ 731 */
722static void 732static struct bio *
723xfs_chain_bio( 733xfs_chain_bio(
724 struct xfs_ioend *ioend, 734 struct bio *prev)
725 struct writeback_control *wbc,
726 struct block_device *bdev,
727 sector_t sector)
728{ 735{
729 struct bio *new; 736 struct bio *new;
730 737
731 new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); 738 new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
732 bio_set_dev(new, bdev); 739 bio_copy_dev(new, prev);/* also copies over blkcg information */
733 new->bi_iter.bi_sector = sector; 740 new->bi_iter.bi_sector = bio_end_sector(prev);
734 bio_chain(ioend->io_bio, new); 741 new->bi_opf = prev->bi_opf;
735 bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ 742 new->bi_write_hint = prev->bi_write_hint;
736 ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 743
737 ioend->io_bio->bi_write_hint = ioend->io_inode->i_write_hint; 744 bio_chain(prev, new);
738 submit_bio(ioend->io_bio); 745 bio_get(prev); /* for xfs_destroy_ioend */
739 ioend->io_bio = new; 746 submit_bio(prev);
747 return new;
740} 748}
741 749
742/* 750/*
@@ -772,7 +780,7 @@ xfs_add_to_ioend(
772 if (wpc->ioend) 780 if (wpc->ioend)
773 list_add(&wpc->ioend->io_list, iolist); 781 list_add(&wpc->ioend->io_list, iolist);
774 wpc->ioend = xfs_alloc_ioend(inode, wpc->fork, 782 wpc->ioend = xfs_alloc_ioend(inode, wpc->fork,
775 wpc->imap.br_state, offset, bdev, sector); 783 wpc->imap.br_state, offset, bdev, sector, wbc);
776 } 784 }
777 785
778 merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, 786 merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
@@ -783,11 +791,12 @@ xfs_add_to_ioend(
783 791
784 if (!merged) { 792 if (!merged) {
785 if (bio_full(wpc->ioend->io_bio, len)) 793 if (bio_full(wpc->ioend->io_bio, len))
786 xfs_chain_bio(wpc->ioend, wbc, bdev, sector); 794 wpc->ioend->io_bio = xfs_chain_bio(wpc->ioend->io_bio);
787 bio_add_page(wpc->ioend->io_bio, page, len, poff); 795 bio_add_page(wpc->ioend->io_bio, page, len, poff);
788 } 796 }
789 797
790 wpc->ioend->io_size += len; 798 wpc->ioend->io_size += len;
799 wbc_account_io(wbc, page, len);
791} 800}
792 801
793STATIC void 802STATIC void
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index f62b03186c62..45a1ea240cbb 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -28,7 +28,6 @@ extern const struct address_space_operations xfs_dax_aops;
28 28
29int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size); 29int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
30 30
31extern void xfs_count_page_state(struct page *, int *, int *);
32extern struct block_device *xfs_find_bdev_for_inode(struct inode *); 31extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
33extern struct dax_device *xfs_find_daxdev_for_inode(struct inode *); 32extern struct dax_device *xfs_find_daxdev_for_inode(struct inode *);
34 33
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index 228821b2ebe0..dc93c51c17de 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -15,18 +15,13 @@
15#include "xfs_da_format.h" 15#include "xfs_da_format.h"
16#include "xfs_da_btree.h" 16#include "xfs_da_btree.h"
17#include "xfs_inode.h" 17#include "xfs_inode.h"
18#include "xfs_alloc.h"
19#include "xfs_attr_remote.h" 18#include "xfs_attr_remote.h"
20#include "xfs_trans.h" 19#include "xfs_trans.h"
21#include "xfs_inode_item.h"
22#include "xfs_bmap.h" 20#include "xfs_bmap.h"
23#include "xfs_attr.h" 21#include "xfs_attr.h"
24#include "xfs_attr_leaf.h" 22#include "xfs_attr_leaf.h"
25#include "xfs_error.h"
26#include "xfs_quota.h" 23#include "xfs_quota.h"
27#include "xfs_trace.h"
28#include "xfs_dir2.h" 24#include "xfs_dir2.h"
29#include "xfs_defer.h"
30 25
31/* 26/*
32 * Look at all the extents for this logical region, 27 * Look at all the extents for this logical region,
@@ -121,7 +116,7 @@ xfs_attr3_leaf_inactive(
121 int size; 116 int size;
122 int tmp; 117 int tmp;
123 int i; 118 int i;
124 struct xfs_mount *mp = bp->b_target->bt_mount; 119 struct xfs_mount *mp = bp->b_mount;
125 120
126 leaf = bp->b_addr; 121 leaf = bp->b_addr;
127 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf); 122 xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 3d213a7394c5..58fc820a70c6 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -6,25 +6,20 @@
6 */ 6 */
7#include "xfs.h" 7#include "xfs.h"
8#include "xfs_fs.h" 8#include "xfs_fs.h"
9#include "xfs_shared.h"
9#include "xfs_format.h" 10#include "xfs_format.h"
10#include "xfs_log_format.h" 11#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_da_format.h" 14#include "xfs_da_format.h"
15#include "xfs_da_btree.h"
16#include "xfs_inode.h" 15#include "xfs_inode.h"
17#include "xfs_trans.h" 16#include "xfs_trans.h"
18#include "xfs_inode_item.h"
19#include "xfs_bmap.h" 17#include "xfs_bmap.h"
20#include "xfs_attr.h" 18#include "xfs_attr.h"
21#include "xfs_attr_sf.h" 19#include "xfs_attr_sf.h"
22#include "xfs_attr_remote.h"
23#include "xfs_attr_leaf.h" 20#include "xfs_attr_leaf.h"
24#include "xfs_error.h" 21#include "xfs_error.h"
25#include "xfs_trace.h" 22#include "xfs_trace.h"
26#include "xfs_buf_item.h"
27#include "xfs_cksum.h"
28#include "xfs_dir2.h" 23#include "xfs_dir2.h"
29 24
30STATIC int 25STATIC int
diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
new file mode 100644
index 000000000000..e2148f2d5d6b
--- /dev/null
+++ b/fs/xfs/xfs_bio_io.c
@@ -0,0 +1,61 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2019 Christoph Hellwig.
4 */
5#include "xfs.h"
6
7static inline unsigned int bio_max_vecs(unsigned int count)
8{
9 return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES);
10}
11
12int
13xfs_rw_bdev(
14 struct block_device *bdev,
15 sector_t sector,
16 unsigned int count,
17 char *data,
18 unsigned int op)
19
20{
21 unsigned int is_vmalloc = is_vmalloc_addr(data);
22 unsigned int left = count;
23 int error;
24 struct bio *bio;
25
26 if (is_vmalloc && op == REQ_OP_WRITE)
27 flush_kernel_vmap_range(data, count);
28
29 bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
30 bio_set_dev(bio, bdev);
31 bio->bi_iter.bi_sector = sector;
32 bio->bi_opf = op | REQ_META | REQ_SYNC;
33
34 do {
35 struct page *page = kmem_to_page(data);
36 unsigned int off = offset_in_page(data);
37 unsigned int len = min_t(unsigned, left, PAGE_SIZE - off);
38
39 while (bio_add_page(bio, page, len, off) != len) {
40 struct bio *prev = bio;
41
42 bio = bio_alloc(GFP_KERNEL, bio_max_vecs(left));
43 bio_copy_dev(bio, prev);
44 bio->bi_iter.bi_sector = bio_end_sector(prev);
45 bio->bi_opf = prev->bi_opf;
46 bio_chain(prev, bio);
47
48 submit_bio(prev);
49 }
50
51 data += len;
52 left -= len;
53 } while (left > 0);
54
55 error = submit_bio_wait(bio);
56 bio_put(bio);
57
58 if (is_vmalloc && op == REQ_OP_READ)
59 invalidate_kernel_vmap_range(data, count);
60 return error;
61}
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index ce45f066995e..9fa4a7ee8cfc 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -9,17 +9,16 @@
9#include "xfs_log_format.h" 9#include "xfs_log_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_bit.h" 11#include "xfs_bit.h"
12#include "xfs_shared.h"
12#include "xfs_mount.h" 13#include "xfs_mount.h"
13#include "xfs_defer.h" 14#include "xfs_defer.h"
14#include "xfs_inode.h" 15#include "xfs_inode.h"
15#include "xfs_trans.h" 16#include "xfs_trans.h"
16#include "xfs_trans_priv.h" 17#include "xfs_trans_priv.h"
17#include "xfs_buf_item.h"
18#include "xfs_bmap_item.h" 18#include "xfs_bmap_item.h"
19#include "xfs_log.h" 19#include "xfs_log.h"
20#include "xfs_bmap.h" 20#include "xfs_bmap.h"
21#include "xfs_icache.h" 21#include "xfs_icache.h"
22#include "xfs_trace.h"
23#include "xfs_bmap_btree.h" 22#include "xfs_bmap_btree.h"
24#include "xfs_trans_space.h" 23#include "xfs_trans_space.h"
25 24
@@ -96,15 +95,6 @@ xfs_bui_item_format(
96} 95}
97 96
98/* 97/*
99 * Pinning has no meaning for an bui item, so just return.
100 */
101STATIC void
102xfs_bui_item_pin(
103 struct xfs_log_item *lip)
104{
105}
106
107/*
108 * The unpin operation is the last place an BUI is manipulated in the log. It is 98 * The unpin operation is the last place an BUI is manipulated in the log. It is
109 * either inserted in the AIL or aborted in the event of a log I/O error. In 99 * either inserted in the AIL or aborted in the event of a log I/O error. In
110 * either case, the BUI transaction has been successfully committed to make it 100 * either case, the BUI transaction has been successfully committed to make it
@@ -123,71 +113,22 @@ xfs_bui_item_unpin(
123} 113}
124 114
125/* 115/*
126 * BUI items have no locking or pushing. However, since BUIs are pulled from
127 * the AIL when their corresponding BUDs are committed to disk, their situation
128 * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
129 * will eventually flush the log. This should help in getting the BUI out of
130 * the AIL.
131 */
132STATIC uint
133xfs_bui_item_push(
134 struct xfs_log_item *lip,
135 struct list_head *buffer_list)
136{
137 return XFS_ITEM_PINNED;
138}
139
140/*
141 * The BUI has been either committed or aborted if the transaction has been 116 * The BUI has been either committed or aborted if the transaction has been
142 * cancelled. If the transaction was cancelled, an BUD isn't going to be 117 * cancelled. If the transaction was cancelled, an BUD isn't going to be
143 * constructed and thus we free the BUI here directly. 118 * constructed and thus we free the BUI here directly.
144 */ 119 */
145STATIC void 120STATIC void
146xfs_bui_item_unlock( 121xfs_bui_item_release(
147 struct xfs_log_item *lip) 122 struct xfs_log_item *lip)
148{ 123{
149 if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) 124 xfs_bui_release(BUI_ITEM(lip));
150 xfs_bui_release(BUI_ITEM(lip));
151}
152
153/*
154 * The BUI is logged only once and cannot be moved in the log, so simply return
155 * the lsn at which it's been logged.
156 */
157STATIC xfs_lsn_t
158xfs_bui_item_committed(
159 struct xfs_log_item *lip,
160 xfs_lsn_t lsn)
161{
162 return lsn;
163} 125}
164 126
165/*
166 * The BUI dependency tracking op doesn't do squat. It can't because
167 * it doesn't know where the free extent is coming from. The dependency
168 * tracking has to be handled by the "enclosing" metadata object. For
169 * example, for inodes, the inode is locked throughout the extent freeing
170 * so the dependency should be recorded there.
171 */
172STATIC void
173xfs_bui_item_committing(
174 struct xfs_log_item *lip,
175 xfs_lsn_t lsn)
176{
177}
178
179/*
180 * This is the ops vector shared by all bui log items.
181 */
182static const struct xfs_item_ops xfs_bui_item_ops = { 127static const struct xfs_item_ops xfs_bui_item_ops = {
183 .iop_size = xfs_bui_item_size, 128 .iop_size = xfs_bui_item_size,
184 .iop_format = xfs_bui_item_format, 129 .iop_format = xfs_bui_item_format,
185 .iop_pin = xfs_bui_item_pin,
186 .iop_unpin = xfs_bui_item_unpin, 130 .iop_unpin = xfs_bui_item_unpin,
187 .iop_unlock = xfs_bui_item_unlock, 131 .iop_release = xfs_bui_item_release,
188 .iop_committed = xfs_bui_item_committed,
189 .iop_push = xfs_bui_item_push,
190 .iop_committing = xfs_bui_item_committing,
191}; 132};
192 133
193/* 134/*
@@ -249,126 +190,241 @@ xfs_bud_item_format(
249} 190}
250 191
251/* 192/*
252 * Pinning has no meaning for an bud item, so just return. 193 * The BUD is either committed or aborted if the transaction is cancelled. If
194 * the transaction is cancelled, drop our reference to the BUI and free the
195 * BUD.
253 */ 196 */
254STATIC void 197STATIC void
255xfs_bud_item_pin( 198xfs_bud_item_release(
256 struct xfs_log_item *lip) 199 struct xfs_log_item *lip)
257{ 200{
201 struct xfs_bud_log_item *budp = BUD_ITEM(lip);
202
203 xfs_bui_release(budp->bud_buip);
204 kmem_zone_free(xfs_bud_zone, budp);
258} 205}
259 206
260/* 207static const struct xfs_item_ops xfs_bud_item_ops = {
261 * Since pinning has no meaning for an bud item, unpinning does 208 .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
262 * not either. 209 .iop_size = xfs_bud_item_size,
263 */ 210 .iop_format = xfs_bud_item_format,
264STATIC void 211 .iop_release = xfs_bud_item_release,
265xfs_bud_item_unpin( 212};
266 struct xfs_log_item *lip, 213
267 int remove) 214static struct xfs_bud_log_item *
215xfs_trans_get_bud(
216 struct xfs_trans *tp,
217 struct xfs_bui_log_item *buip)
268{ 218{
219 struct xfs_bud_log_item *budp;
220
221 budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP);
222 xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD,
223 &xfs_bud_item_ops);
224 budp->bud_buip = buip;
225 budp->bud_format.bud_bui_id = buip->bui_format.bui_id;
226
227 xfs_trans_add_item(tp, &budp->bud_item);
228 return budp;
269} 229}
270 230
271/* 231/*
272 * There isn't much you can do to push on an bud item. It is simply stuck 232 * Finish an bmap update and log it to the BUD. Note that the
273 * waiting for the log to be flushed to disk. 233 * transaction is marked dirty regardless of whether the bmap update
234 * succeeds or fails to support the BUI/BUD lifecycle rules.
274 */ 235 */
275STATIC uint 236static int
276xfs_bud_item_push( 237xfs_trans_log_finish_bmap_update(
277 struct xfs_log_item *lip, 238 struct xfs_trans *tp,
278 struct list_head *buffer_list) 239 struct xfs_bud_log_item *budp,
240 enum xfs_bmap_intent_type type,
241 struct xfs_inode *ip,
242 int whichfork,
243 xfs_fileoff_t startoff,
244 xfs_fsblock_t startblock,
245 xfs_filblks_t *blockcount,
246 xfs_exntst_t state)
279{ 247{
280 return XFS_ITEM_PINNED; 248 int error;
249
250 error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff,
251 startblock, blockcount, state);
252
253 /*
254 * Mark the transaction dirty, even on error. This ensures the
255 * transaction is aborted, which:
256 *
257 * 1.) releases the BUI and frees the BUD
258 * 2.) shuts down the filesystem
259 */
260 tp->t_flags |= XFS_TRANS_DIRTY;
261 set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
262
263 return error;
281} 264}
282 265
283/* 266/* Sort bmap intents by inode. */
284 * The BUD is either committed or aborted if the transaction is cancelled. If 267static int
285 * the transaction is cancelled, drop our reference to the BUI and free the 268xfs_bmap_update_diff_items(
286 * BUD. 269 void *priv,
287 */ 270 struct list_head *a,
288STATIC void 271 struct list_head *b)
289xfs_bud_item_unlock(
290 struct xfs_log_item *lip)
291{ 272{
292 struct xfs_bud_log_item *budp = BUD_ITEM(lip); 273 struct xfs_bmap_intent *ba;
274 struct xfs_bmap_intent *bb;
293 275
294 if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { 276 ba = container_of(a, struct xfs_bmap_intent, bi_list);
295 xfs_bui_release(budp->bud_buip); 277 bb = container_of(b, struct xfs_bmap_intent, bi_list);
296 kmem_zone_free(xfs_bud_zone, budp); 278 return ba->bi_owner->i_ino - bb->bi_owner->i_ino;
297 }
298} 279}
299 280
300/* 281/* Get an BUI. */
301 * When the bud item is committed to disk, all we need to do is delete our 282STATIC void *
302 * reference to our partner bui item and then free ourselves. Since we're 283xfs_bmap_update_create_intent(
303 * freeing ourselves we must return -1 to keep the transaction code from 284 struct xfs_trans *tp,
304 * further referencing this item. 285 unsigned int count)
305 */
306STATIC xfs_lsn_t
307xfs_bud_item_committed(
308 struct xfs_log_item *lip,
309 xfs_lsn_t lsn)
310{ 286{
311 struct xfs_bud_log_item *budp = BUD_ITEM(lip); 287 struct xfs_bui_log_item *buip;
288
289 ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS);
290 ASSERT(tp != NULL);
291
292 buip = xfs_bui_init(tp->t_mountp);
293 ASSERT(buip != NULL);
312 294
313 /* 295 /*
314 * Drop the BUI reference regardless of whether the BUD has been 296 * Get a log_item_desc to point at the new item.
315 * aborted. Once the BUD transaction is constructed, it is the sole
316 * responsibility of the BUD to release the BUI (even if the BUI is
317 * aborted due to log I/O error).
318 */ 297 */
319 xfs_bui_release(budp->bud_buip); 298 xfs_trans_add_item(tp, &buip->bui_item);
320 kmem_zone_free(xfs_bud_zone, budp); 299 return buip;
300}
321 301
322 return (xfs_lsn_t)-1; 302/* Set the map extent flags for this mapping. */
303static void
304xfs_trans_set_bmap_flags(
305 struct xfs_map_extent *bmap,
306 enum xfs_bmap_intent_type type,
307 int whichfork,
308 xfs_exntst_t state)
309{
310 bmap->me_flags = 0;
311 switch (type) {
312 case XFS_BMAP_MAP:
313 case XFS_BMAP_UNMAP:
314 bmap->me_flags = type;
315 break;
316 default:
317 ASSERT(0);
318 }
319 if (state == XFS_EXT_UNWRITTEN)
320 bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN;
321 if (whichfork == XFS_ATTR_FORK)
322 bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK;
323} 323}
324 324
325/* 325/* Log bmap updates in the intent item. */
326 * The BUD dependency tracking op doesn't do squat. It can't because
327 * it doesn't know where the free extent is coming from. The dependency
328 * tracking has to be handled by the "enclosing" metadata object. For
329 * example, for inodes, the inode is locked throughout the extent freeing
330 * so the dependency should be recorded there.
331 */
332STATIC void 326STATIC void
333xfs_bud_item_committing( 327xfs_bmap_update_log_item(
334 struct xfs_log_item *lip, 328 struct xfs_trans *tp,
335 xfs_lsn_t lsn) 329 void *intent,
330 struct list_head *item)
336{ 331{
332 struct xfs_bui_log_item *buip = intent;
333 struct xfs_bmap_intent *bmap;
334 uint next_extent;
335 struct xfs_map_extent *map;
336
337 bmap = container_of(item, struct xfs_bmap_intent, bi_list);
338
339 tp->t_flags |= XFS_TRANS_DIRTY;
340 set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
341
342 /*
343 * atomic_inc_return gives us the value after the increment;
344 * we want to use it as an array index so we need to subtract 1 from
345 * it.
346 */
347 next_extent = atomic_inc_return(&buip->bui_next_extent) - 1;
348 ASSERT(next_extent < buip->bui_format.bui_nextents);
349 map = &buip->bui_format.bui_extents[next_extent];
350 map->me_owner = bmap->bi_owner->i_ino;
351 map->me_startblock = bmap->bi_bmap.br_startblock;
352 map->me_startoff = bmap->bi_bmap.br_startoff;
353 map->me_len = bmap->bi_bmap.br_blockcount;
354 xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork,
355 bmap->bi_bmap.br_state);
337} 356}
338 357
339/* 358/* Get an BUD so we can process all the deferred rmap updates. */
340 * This is the ops vector shared by all bud log items. 359STATIC void *
341 */ 360xfs_bmap_update_create_done(
342static const struct xfs_item_ops xfs_bud_item_ops = { 361 struct xfs_trans *tp,
343 .iop_size = xfs_bud_item_size, 362 void *intent,
344 .iop_format = xfs_bud_item_format, 363 unsigned int count)
345 .iop_pin = xfs_bud_item_pin, 364{
346 .iop_unpin = xfs_bud_item_unpin, 365 return xfs_trans_get_bud(tp, intent);
347 .iop_unlock = xfs_bud_item_unlock, 366}
348 .iop_committed = xfs_bud_item_committed,
349 .iop_push = xfs_bud_item_push,
350 .iop_committing = xfs_bud_item_committing,
351};
352 367
353/* 368/* Process a deferred rmap update. */
354 * Allocate and initialize an bud item with the given number of extents. 369STATIC int
355 */ 370xfs_bmap_update_finish_item(
356struct xfs_bud_log_item * 371 struct xfs_trans *tp,
357xfs_bud_init( 372 struct list_head *item,
358 struct xfs_mount *mp, 373 void *done_item,
359 struct xfs_bui_log_item *buip) 374 void **state)
375{
376 struct xfs_bmap_intent *bmap;
377 xfs_filblks_t count;
378 int error;
379
380 bmap = container_of(item, struct xfs_bmap_intent, bi_list);
381 count = bmap->bi_bmap.br_blockcount;
382 error = xfs_trans_log_finish_bmap_update(tp, done_item,
383 bmap->bi_type,
384 bmap->bi_owner, bmap->bi_whichfork,
385 bmap->bi_bmap.br_startoff,
386 bmap->bi_bmap.br_startblock,
387 &count,
388 bmap->bi_bmap.br_state);
389 if (!error && count > 0) {
390 ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
391 bmap->bi_bmap.br_blockcount = count;
392 return -EAGAIN;
393 }
394 kmem_free(bmap);
395 return error;
396}
360 397
398/* Abort all pending BUIs. */
399STATIC void
400xfs_bmap_update_abort_intent(
401 void *intent)
361{ 402{
362 struct xfs_bud_log_item *budp; 403 xfs_bui_release(intent);
404}
363 405
364 budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP); 406/* Cancel a deferred rmap update. */
365 xfs_log_item_init(mp, &budp->bud_item, XFS_LI_BUD, &xfs_bud_item_ops); 407STATIC void
366 budp->bud_buip = buip; 408xfs_bmap_update_cancel_item(
367 budp->bud_format.bud_bui_id = buip->bui_format.bui_id; 409 struct list_head *item)
410{
411 struct xfs_bmap_intent *bmap;
368 412
369 return budp; 413 bmap = container_of(item, struct xfs_bmap_intent, bi_list);
414 kmem_free(bmap);
370} 415}
371 416
417const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
418 .max_items = XFS_BUI_MAX_FAST_EXTENTS,
419 .diff_items = xfs_bmap_update_diff_items,
420 .create_intent = xfs_bmap_update_create_intent,
421 .abort_intent = xfs_bmap_update_abort_intent,
422 .log_item = xfs_bmap_update_log_item,
423 .create_done = xfs_bmap_update_create_done,
424 .finish_item = xfs_bmap_update_finish_item,
425 .cancel_item = xfs_bmap_update_cancel_item,
426};
427
372/* 428/*
373 * Process a bmap update intent item that was recovered from the log. 429 * Process a bmap update intent item that was recovered from the log.
374 * We need to update some inode's bmbt. 430 * We need to update some inode's bmbt.
diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h
index 89e043a88bb8..ad479cc73de8 100644
--- a/fs/xfs/xfs_bmap_item.h
+++ b/fs/xfs/xfs_bmap_item.h
@@ -75,8 +75,6 @@ extern struct kmem_zone *xfs_bui_zone;
75extern struct kmem_zone *xfs_bud_zone; 75extern struct kmem_zone *xfs_bud_zone;
76 76
77struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *); 77struct xfs_bui_log_item *xfs_bui_init(struct xfs_mount *);
78struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *,
79 struct xfs_bui_log_item *);
80void xfs_bui_item_free(struct xfs_bui_log_item *); 78void xfs_bui_item_free(struct xfs_bui_log_item *);
81void xfs_bui_release(struct xfs_bui_log_item *); 79void xfs_bui_release(struct xfs_bui_log_item *);
82int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip); 80int xfs_bui_recover(struct xfs_trans *parent_tp, struct xfs_bui_log_item *buip);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 06d07f1e310b..98c6a7a71427 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -12,12 +12,10 @@
12#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
13#include "xfs_bit.h" 13#include "xfs_bit.h"
14#include "xfs_mount.h" 14#include "xfs_mount.h"
15#include "xfs_da_format.h"
16#include "xfs_defer.h" 15#include "xfs_defer.h"
17#include "xfs_inode.h" 16#include "xfs_inode.h"
18#include "xfs_btree.h" 17#include "xfs_btree.h"
19#include "xfs_trans.h" 18#include "xfs_trans.h"
20#include "xfs_extfree_item.h"
21#include "xfs_alloc.h" 19#include "xfs_alloc.h"
22#include "xfs_bmap.h" 20#include "xfs_bmap.h"
23#include "xfs_bmap_util.h" 21#include "xfs_bmap_util.h"
@@ -28,11 +26,8 @@
28#include "xfs_trans_space.h" 26#include "xfs_trans_space.h"
29#include "xfs_trace.h" 27#include "xfs_trace.h"
30#include "xfs_icache.h" 28#include "xfs_icache.h"
31#include "xfs_log.h"
32#include "xfs_rmap_btree.h"
33#include "xfs_iomap.h" 29#include "xfs_iomap.h"
34#include "xfs_reflink.h" 30#include "xfs_reflink.h"
35#include "xfs_refcount.h"
36 31
37/* Kernel only BMAP related definitions and functions */ 32/* Kernel only BMAP related definitions and functions */
38 33
@@ -276,7 +271,7 @@ xfs_bmap_count_tree(
276 struct xfs_btree_block *block, *nextblock; 271 struct xfs_btree_block *block, *nextblock;
277 int numrecs; 272 int numrecs;
278 273
279 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF, 274 error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF,
280 &xfs_bmbt_buf_ops); 275 &xfs_bmbt_buf_ops);
281 if (error) 276 if (error)
282 return error; 277 return error;
@@ -287,7 +282,7 @@ xfs_bmap_count_tree(
287 /* Not at node above leaves, count this level of nodes */ 282 /* Not at node above leaves, count this level of nodes */
288 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib); 283 nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
289 while (nextbno != NULLFSBLOCK) { 284 while (nextbno != NULLFSBLOCK) {
290 error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp, 285 error = xfs_btree_read_bufl(mp, tp, nextbno, &nbp,
291 XFS_BMAP_BTREE_REF, 286 XFS_BMAP_BTREE_REF,
292 &xfs_bmbt_buf_ops); 287 &xfs_bmbt_buf_ops);
293 if (error) 288 if (error)
@@ -321,7 +316,7 @@ xfs_bmap_count_tree(
321 if (nextbno == NULLFSBLOCK) 316 if (nextbno == NULLFSBLOCK)
322 break; 317 break;
323 bno = nextbno; 318 bno = nextbno;
324 error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, 319 error = xfs_btree_read_bufl(mp, tp, bno, &bp,
325 XFS_BMAP_BTREE_REF, 320 XFS_BMAP_BTREE_REF,
326 &xfs_bmbt_buf_ops); 321 &xfs_bmbt_buf_ops);
327 if (error) 322 if (error)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 548344e25128..ca0849043f54 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -4,24 +4,9 @@
4 * All Rights Reserved. 4 * All Rights Reserved.
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include <linux/stddef.h>
8#include <linux/errno.h>
9#include <linux/gfp.h>
10#include <linux/pagemap.h>
11#include <linux/init.h>
12#include <linux/vmalloc.h>
13#include <linux/bio.h>
14#include <linux/sysctl.h>
15#include <linux/proc_fs.h>
16#include <linux/workqueue.h>
17#include <linux/percpu.h>
18#include <linux/blkdev.h>
19#include <linux/hash.h>
20#include <linux/kthread.h>
21#include <linux/migrate.h>
22#include <linux/backing-dev.h> 7#include <linux/backing-dev.h>
23#include <linux/freezer.h>
24 8
9#include "xfs_shared.h"
25#include "xfs_format.h" 10#include "xfs_format.h"
26#include "xfs_log_format.h" 11#include "xfs_log_format.h"
27#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
@@ -213,7 +198,7 @@ xfs_buf_free_maps(
213 } 198 }
214} 199}
215 200
216struct xfs_buf * 201static struct xfs_buf *
217_xfs_buf_alloc( 202_xfs_buf_alloc(
218 struct xfs_buftarg *target, 203 struct xfs_buftarg *target,
219 struct xfs_buf_map *map, 204 struct xfs_buf_map *map,
@@ -243,6 +228,7 @@ _xfs_buf_alloc(
243 sema_init(&bp->b_sema, 0); /* held, no waiters */ 228 sema_init(&bp->b_sema, 0); /* held, no waiters */
244 spin_lock_init(&bp->b_lock); 229 spin_lock_init(&bp->b_lock);
245 bp->b_target = target; 230 bp->b_target = target;
231 bp->b_mount = target->bt_mount;
246 bp->b_flags = flags; 232 bp->b_flags = flags;
247 233
248 /* 234 /*
@@ -263,12 +249,11 @@ _xfs_buf_alloc(
263 bp->b_maps[i].bm_len = map[i].bm_len; 249 bp->b_maps[i].bm_len = map[i].bm_len;
264 bp->b_length += map[i].bm_len; 250 bp->b_length += map[i].bm_len;
265 } 251 }
266 bp->b_io_length = bp->b_length;
267 252
268 atomic_set(&bp->b_pin_count, 0); 253 atomic_set(&bp->b_pin_count, 0);
269 init_waitqueue_head(&bp->b_waiters); 254 init_waitqueue_head(&bp->b_waiters);
270 255
271 XFS_STATS_INC(target->bt_mount, xb_create); 256 XFS_STATS_INC(bp->b_mount, xb_create);
272 trace_xfs_buf_init(bp, _RET_IP_); 257 trace_xfs_buf_init(bp, _RET_IP_);
273 258
274 return bp; 259 return bp;
@@ -425,12 +410,12 @@ retry:
425 current->comm, current->pid, 410 current->comm, current->pid,
426 __func__, gfp_mask); 411 __func__, gfp_mask);
427 412
428 XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries); 413 XFS_STATS_INC(bp->b_mount, xb_page_retries);
429 congestion_wait(BLK_RW_ASYNC, HZ/50); 414 congestion_wait(BLK_RW_ASYNC, HZ/50);
430 goto retry; 415 goto retry;
431 } 416 }
432 417
433 XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found); 418 XFS_STATS_INC(bp->b_mount, xb_page_found);
434 419
435 nbytes = min_t(size_t, size, PAGE_SIZE - offset); 420 nbytes = min_t(size_t, size, PAGE_SIZE - offset);
436 size -= nbytes; 421 size -= nbytes;
@@ -909,83 +894,6 @@ xfs_buf_read_uncached(
909 return 0; 894 return 0;
910} 895}
911 896
912/*
913 * Return a buffer allocated as an empty buffer and associated to external
914 * memory via xfs_buf_associate_memory() back to it's empty state.
915 */
916void
917xfs_buf_set_empty(
918 struct xfs_buf *bp,
919 size_t numblks)
920{
921 if (bp->b_pages)
922 _xfs_buf_free_pages(bp);
923
924 bp->b_pages = NULL;
925 bp->b_page_count = 0;
926 bp->b_addr = NULL;
927 bp->b_length = numblks;
928 bp->b_io_length = numblks;
929
930 ASSERT(bp->b_map_count == 1);
931 bp->b_bn = XFS_BUF_DADDR_NULL;
932 bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL;
933 bp->b_maps[0].bm_len = bp->b_length;
934}
935
936static inline struct page *
937mem_to_page(
938 void *addr)
939{
940 if ((!is_vmalloc_addr(addr))) {
941 return virt_to_page(addr);
942 } else {
943 return vmalloc_to_page(addr);
944 }
945}
946
947int
948xfs_buf_associate_memory(
949 xfs_buf_t *bp,
950 void *mem,
951 size_t len)
952{
953 int rval;
954 int i = 0;
955 unsigned long pageaddr;
956 unsigned long offset;
957 size_t buflen;
958 int page_count;
959
960 pageaddr = (unsigned long)mem & PAGE_MASK;
961 offset = (unsigned long)mem - pageaddr;
962 buflen = PAGE_ALIGN(len + offset);
963 page_count = buflen >> PAGE_SHIFT;
964
965 /* Free any previous set of page pointers */
966 if (bp->b_pages)
967 _xfs_buf_free_pages(bp);
968
969 bp->b_pages = NULL;
970 bp->b_addr = mem;
971
972 rval = _xfs_buf_get_pages(bp, page_count);
973 if (rval)
974 return rval;
975
976 bp->b_offset = offset;
977
978 for (i = 0; i < bp->b_page_count; i++) {
979 bp->b_pages[i] = mem_to_page((void *)pageaddr);
980 pageaddr += PAGE_SIZE;
981 }
982
983 bp->b_io_length = BTOBB(len);
984 bp->b_length = BTOBB(buflen);
985
986 return 0;
987}
988
989xfs_buf_t * 897xfs_buf_t *
990xfs_buf_get_uncached( 898xfs_buf_get_uncached(
991 struct xfs_buftarg *target, 899 struct xfs_buftarg *target,
@@ -1180,7 +1088,7 @@ xfs_buf_lock(
1180 trace_xfs_buf_lock(bp, _RET_IP_); 1088 trace_xfs_buf_lock(bp, _RET_IP_);
1181 1089
1182 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 1090 if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
1183 xfs_log_force(bp->b_target->bt_mount, 0); 1091 xfs_log_force(bp->b_mount, 0);
1184 down(&bp->b_sema); 1092 down(&bp->b_sema);
1185 1093
1186 trace_xfs_buf_lock_done(bp, _RET_IP_); 1094 trace_xfs_buf_lock_done(bp, _RET_IP_);
@@ -1269,7 +1177,7 @@ xfs_buf_ioend_async(
1269 struct xfs_buf *bp) 1177 struct xfs_buf *bp)
1270{ 1178{
1271 INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work); 1179 INIT_WORK(&bp->b_ioend_work, xfs_buf_ioend_work);
1272 queue_work(bp->b_ioend_wq, &bp->b_ioend_work); 1180 queue_work(bp->b_mount->m_buf_workqueue, &bp->b_ioend_work);
1273} 1181}
1274 1182
1275void 1183void
@@ -1288,7 +1196,7 @@ xfs_buf_ioerror_alert(
1288 struct xfs_buf *bp, 1196 struct xfs_buf *bp,
1289 const char *func) 1197 const char *func)
1290{ 1198{
1291 xfs_alert(bp->b_target->bt_mount, 1199 xfs_alert(bp->b_mount,
1292"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d", 1200"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d",
1293 func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, 1201 func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
1294 -bp->b_error); 1202 -bp->b_error);
@@ -1307,10 +1215,8 @@ xfs_bwrite(
1307 XBF_WRITE_FAIL | XBF_DONE); 1215 XBF_WRITE_FAIL | XBF_DONE);
1308 1216
1309 error = xfs_buf_submit(bp); 1217 error = xfs_buf_submit(bp);
1310 if (error) { 1218 if (error)
1311 xfs_force_shutdown(bp->b_target->bt_mount, 1219 xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
1312 SHUTDOWN_META_IO_ERROR);
1313 }
1314 return error; 1220 return error;
1315} 1221}
1316 1222
@@ -1436,21 +1342,8 @@ _xfs_buf_ioapply(
1436 */ 1342 */
1437 bp->b_error = 0; 1343 bp->b_error = 0;
1438 1344
1439 /*
1440 * Initialize the I/O completion workqueue if we haven't yet or the
1441 * submitter has not opted to specify a custom one.
1442 */
1443 if (!bp->b_ioend_wq)
1444 bp->b_ioend_wq = bp->b_target->bt_mount->m_buf_workqueue;
1445
1446 if (bp->b_flags & XBF_WRITE) { 1345 if (bp->b_flags & XBF_WRITE) {
1447 op = REQ_OP_WRITE; 1346 op = REQ_OP_WRITE;
1448 if (bp->b_flags & XBF_SYNCIO)
1449 op_flags = REQ_SYNC;
1450 if (bp->b_flags & XBF_FUA)
1451 op_flags |= REQ_FUA;
1452 if (bp->b_flags & XBF_FLUSH)
1453 op_flags |= REQ_PREFLUSH;
1454 1347
1455 /* 1348 /*
1456 * Run the write verifier callback function if it exists. If 1349 * Run the write verifier callback function if it exists. If
@@ -1460,12 +1353,12 @@ _xfs_buf_ioapply(
1460 if (bp->b_ops) { 1353 if (bp->b_ops) {
1461 bp->b_ops->verify_write(bp); 1354 bp->b_ops->verify_write(bp);
1462 if (bp->b_error) { 1355 if (bp->b_error) {
1463 xfs_force_shutdown(bp->b_target->bt_mount, 1356 xfs_force_shutdown(bp->b_mount,
1464 SHUTDOWN_CORRUPT_INCORE); 1357 SHUTDOWN_CORRUPT_INCORE);
1465 return; 1358 return;
1466 } 1359 }
1467 } else if (bp->b_bn != XFS_BUF_DADDR_NULL) { 1360 } else if (bp->b_bn != XFS_BUF_DADDR_NULL) {
1468 struct xfs_mount *mp = bp->b_target->bt_mount; 1361 struct xfs_mount *mp = bp->b_mount;
1469 1362
1470 /* 1363 /*
1471 * non-crc filesystems don't attach verifiers during 1364 * non-crc filesystems don't attach verifiers during
@@ -1497,7 +1390,7 @@ _xfs_buf_ioapply(
1497 * subsequent call. 1390 * subsequent call.
1498 */ 1391 */
1499 offset = bp->b_offset; 1392 offset = bp->b_offset;
1500 size = BBTOB(bp->b_io_length); 1393 size = BBTOB(bp->b_length);
1501 blk_start_plug(&plug); 1394 blk_start_plug(&plug);
1502 for (i = 0; i < bp->b_map_count; i++) { 1395 for (i = 0; i < bp->b_map_count; i++) {
1503 xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags); 1396 xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags);
@@ -1543,7 +1436,7 @@ __xfs_buf_submit(
1543 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); 1436 ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
1544 1437
1545 /* on shutdown we stale and complete the buffer immediately */ 1438 /* on shutdown we stale and complete the buffer immediately */
1546 if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { 1439 if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
1547 xfs_buf_ioerror(bp, -EIO); 1440 xfs_buf_ioerror(bp, -EIO);
1548 bp->b_flags &= ~XBF_DONE; 1441 bp->b_flags &= ~XBF_DONE;
1549 xfs_buf_stale(bp); 1442 xfs_buf_stale(bp);
@@ -1613,16 +1506,11 @@ xfs_buf_offset(
1613 return page_address(page) + (offset & (PAGE_SIZE-1)); 1506 return page_address(page) + (offset & (PAGE_SIZE-1));
1614} 1507}
1615 1508
1616/*
1617 * Move data into or out of a buffer.
1618 */
1619void 1509void
1620xfs_buf_iomove( 1510xfs_buf_zero(
1621 xfs_buf_t *bp, /* buffer to process */ 1511 struct xfs_buf *bp,
1622 size_t boff, /* starting buffer offset */ 1512 size_t boff,
1623 size_t bsize, /* length to copy */ 1513 size_t bsize)
1624 void *data, /* data address */
1625 xfs_buf_rw_t mode) /* read/write/zero flag */
1626{ 1514{
1627 size_t bend; 1515 size_t bend;
1628 1516
@@ -1635,23 +1523,13 @@ xfs_buf_iomove(
1635 page_offset = (boff + bp->b_offset) & ~PAGE_MASK; 1523 page_offset = (boff + bp->b_offset) & ~PAGE_MASK;
1636 page = bp->b_pages[page_index]; 1524 page = bp->b_pages[page_index];
1637 csize = min_t(size_t, PAGE_SIZE - page_offset, 1525 csize = min_t(size_t, PAGE_SIZE - page_offset,
1638 BBTOB(bp->b_io_length) - boff); 1526 BBTOB(bp->b_length) - boff);
1639 1527
1640 ASSERT((csize + page_offset) <= PAGE_SIZE); 1528 ASSERT((csize + page_offset) <= PAGE_SIZE);
1641 1529
1642 switch (mode) { 1530 memset(page_address(page) + page_offset, 0, csize);
1643 case XBRW_ZERO:
1644 memset(page_address(page) + page_offset, 0, csize);
1645 break;
1646 case XBRW_READ:
1647 memcpy(data, page_address(page) + page_offset, csize);
1648 break;
1649 case XBRW_WRITE:
1650 memcpy(page_address(page) + page_offset, data, csize);
1651 }
1652 1531
1653 boff += csize; 1532 boff += csize;
1654 data += csize;
1655 } 1533 }
1656} 1534}
1657 1535
@@ -2198,8 +2076,7 @@ void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
2198 * This allows userspace to disrupt buffer caching for debug/testing 2076 * This allows userspace to disrupt buffer caching for debug/testing
2199 * purposes. 2077 * purposes.
2200 */ 2078 */
2201 if (XFS_TEST_ERROR(false, bp->b_target->bt_mount, 2079 if (XFS_TEST_ERROR(false, bp->b_mount, XFS_ERRTAG_BUF_LRU_REF))
2202 XFS_ERRTAG_BUF_LRU_REF))
2203 lru_ref = 0; 2080 lru_ref = 0;
2204 2081
2205 atomic_set(&bp->b_lru_ref, lru_ref); 2082 atomic_set(&bp->b_lru_ref, lru_ref);
@@ -2215,7 +2092,7 @@ xfs_verify_magic(
2215 struct xfs_buf *bp, 2092 struct xfs_buf *bp,
2216 __be32 dmagic) 2093 __be32 dmagic)
2217{ 2094{
2218 struct xfs_mount *mp = bp->b_target->bt_mount; 2095 struct xfs_mount *mp = bp->b_mount;
2219 int idx; 2096 int idx;
2220 2097
2221 idx = xfs_sb_version_hascrc(&mp->m_sb); 2098 idx = xfs_sb_version_hascrc(&mp->m_sb);
@@ -2233,7 +2110,7 @@ xfs_verify_magic16(
2233 struct xfs_buf *bp, 2110 struct xfs_buf *bp,
2234 __be16 dmagic) 2111 __be16 dmagic)
2235{ 2112{
2236 struct xfs_mount *mp = bp->b_target->bt_mount; 2113 struct xfs_mount *mp = bp->b_mount;
2237 int idx; 2114 int idx;
2238 2115
2239 idx = xfs_sb_version_hascrc(&mp->m_sb); 2116 idx = xfs_sb_version_hascrc(&mp->m_sb);
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index d0b96e071cec..c6e57a3f409e 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -21,12 +21,6 @@
21 21
22#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL)) 22#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
23 23
24typedef enum {
25 XBRW_READ = 1, /* transfer into target memory */
26 XBRW_WRITE = 2, /* transfer from target memory */
27 XBRW_ZERO = 3, /* Zero target memory */
28} xfs_buf_rw_t;
29
30#define XBF_READ (1 << 0) /* buffer intended for reading from device */ 24#define XBF_READ (1 << 0) /* buffer intended for reading from device */
31#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ 25#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
32#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */ 26#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
@@ -34,12 +28,7 @@ typedef enum {
34#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ 28#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
35#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ 29#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
36#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */ 30#define XBF_STALE (1 << 6) /* buffer has been staled, do not find it */
37#define XBF_WRITE_FAIL (1 << 24)/* async writes have failed on this buffer */ 31#define XBF_WRITE_FAIL (1 << 7) /* async writes have failed on this buffer */
38
39/* I/O hints for the BIO layer */
40#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
41#define XBF_FUA (1 << 11)/* force cache write through mode */
42#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */
43 32
44/* flags used only as arguments to access routines */ 33/* flags used only as arguments to access routines */
45#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ 34#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */
@@ -49,7 +38,6 @@ typedef enum {
49#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ 38#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
50#define _XBF_KMEM (1 << 21)/* backed by heap memory */ 39#define _XBF_KMEM (1 << 21)/* backed by heap memory */
51#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ 40#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
52#define _XBF_COMPOUND (1 << 23)/* compound buffer */
53 41
54typedef unsigned int xfs_buf_flags_t; 42typedef unsigned int xfs_buf_flags_t;
55 43
@@ -62,15 +50,11 @@ typedef unsigned int xfs_buf_flags_t;
62 { XBF_DONE, "DONE" }, \ 50 { XBF_DONE, "DONE" }, \
63 { XBF_STALE, "STALE" }, \ 51 { XBF_STALE, "STALE" }, \
64 { XBF_WRITE_FAIL, "WRITE_FAIL" }, \ 52 { XBF_WRITE_FAIL, "WRITE_FAIL" }, \
65 { XBF_SYNCIO, "SYNCIO" }, \
66 { XBF_FUA, "FUA" }, \
67 { XBF_FLUSH, "FLUSH" }, \
68 { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\ 53 { XBF_TRYLOCK, "TRYLOCK" }, /* should never be set */\
69 { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\ 54 { XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\
70 { _XBF_PAGES, "PAGES" }, \ 55 { _XBF_PAGES, "PAGES" }, \
71 { _XBF_KMEM, "KMEM" }, \ 56 { _XBF_KMEM, "KMEM" }, \
72 { _XBF_DELWRI_Q, "DELWRI_Q" }, \ 57 { _XBF_DELWRI_Q, "DELWRI_Q" }
73 { _XBF_COMPOUND, "COMPOUND" }
74 58
75 59
76/* 60/*
@@ -161,13 +145,13 @@ typedef struct xfs_buf {
161 wait_queue_head_t b_waiters; /* unpin waiters */ 145 wait_queue_head_t b_waiters; /* unpin waiters */
162 struct list_head b_list; 146 struct list_head b_list;
163 struct xfs_perag *b_pag; /* contains rbtree root */ 147 struct xfs_perag *b_pag; /* contains rbtree root */
148 struct xfs_mount *b_mount;
164 xfs_buftarg_t *b_target; /* buffer target (device) */ 149 xfs_buftarg_t *b_target; /* buffer target (device) */
165 void *b_addr; /* virtual address of buffer */ 150 void *b_addr; /* virtual address of buffer */
166 struct work_struct b_ioend_work; 151 struct work_struct b_ioend_work;
167 struct workqueue_struct *b_ioend_wq; /* I/O completion wq */
168 xfs_buf_iodone_t b_iodone; /* I/O completion function */ 152 xfs_buf_iodone_t b_iodone; /* I/O completion function */
169 struct completion b_iowait; /* queue for I/O waiters */ 153 struct completion b_iowait; /* queue for I/O waiters */
170 void *b_log_item; 154 struct xfs_buf_log_item *b_log_item;
171 struct list_head b_li_list; /* Log items list head */ 155 struct list_head b_li_list; /* Log items list head */
172 struct xfs_trans *b_transp; 156 struct xfs_trans *b_transp;
173 struct page **b_pages; /* array of page pointers */ 157 struct page **b_pages; /* array of page pointers */
@@ -175,7 +159,6 @@ typedef struct xfs_buf {
175 struct xfs_buf_map *b_maps; /* compound buffer map */ 159 struct xfs_buf_map *b_maps; /* compound buffer map */
176 struct xfs_buf_map __b_map; /* inline compound buffer map */ 160 struct xfs_buf_map __b_map; /* inline compound buffer map */
177 int b_map_count; 161 int b_map_count;
178 int b_io_length; /* IO size in BBs */
179 atomic_t b_pin_count; /* pin count */ 162 atomic_t b_pin_count; /* pin count */
180 atomic_t b_io_remaining; /* #outstanding I/O requests */ 163 atomic_t b_io_remaining; /* #outstanding I/O requests */
181 unsigned int b_page_count; /* size of page array */ 164 unsigned int b_page_count; /* size of page array */
@@ -209,21 +192,6 @@ struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target,
209 xfs_daddr_t blkno, size_t numblks, 192 xfs_daddr_t blkno, size_t numblks,
210 xfs_buf_flags_t flags); 193 xfs_buf_flags_t flags);
211 194
212struct xfs_buf *_xfs_buf_alloc(struct xfs_buftarg *target,
213 struct xfs_buf_map *map, int nmaps,
214 xfs_buf_flags_t flags);
215
216static inline struct xfs_buf *
217xfs_buf_alloc(
218 struct xfs_buftarg *target,
219 xfs_daddr_t blkno,
220 size_t numblks,
221 xfs_buf_flags_t flags)
222{
223 DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
224 return _xfs_buf_alloc(target, &map, 1, flags);
225}
226
227struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target, 195struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target,
228 struct xfs_buf_map *map, int nmaps, 196 struct xfs_buf_map *map, int nmaps,
229 xfs_buf_flags_t flags); 197 xfs_buf_flags_t flags);
@@ -239,11 +207,10 @@ static inline struct xfs_buf *
239xfs_buf_get( 207xfs_buf_get(
240 struct xfs_buftarg *target, 208 struct xfs_buftarg *target,
241 xfs_daddr_t blkno, 209 xfs_daddr_t blkno,
242 size_t numblks, 210 size_t numblks)
243 xfs_buf_flags_t flags)
244{ 211{
245 DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); 212 DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
246 return xfs_buf_get_map(target, &map, 1, flags); 213 return xfs_buf_get_map(target, &map, 1, 0);
247} 214}
248 215
249static inline struct xfs_buf * 216static inline struct xfs_buf *
@@ -269,9 +236,6 @@ xfs_buf_readahead(
269 return xfs_buf_readahead_map(target, &map, 1, ops); 236 return xfs_buf_readahead_map(target, &map, 1, ops);
270} 237}
271 238
272void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks);
273int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
274
275struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, 239struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
276 int flags); 240 int flags);
277int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr, 241int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
@@ -305,10 +269,7 @@ static inline int xfs_buf_submit(struct xfs_buf *bp)
305 return __xfs_buf_submit(bp, wait); 269 return __xfs_buf_submit(bp, wait);
306} 270}
307 271
308extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, 272void xfs_buf_zero(struct xfs_buf *bp, size_t boff, size_t bsize);
309 xfs_buf_rw_t);
310#define xfs_buf_zero(bp, off, len) \
311 xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
312 273
313/* Buffer Utility Routines */ 274/* Buffer Utility Routines */
314extern void *xfs_buf_offset(struct xfs_buf *, size_t); 275extern void *xfs_buf_offset(struct xfs_buf *, size_t);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 65b32acfa0f6..7dcaec54a20b 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -5,19 +5,17 @@
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_fs.h" 7#include "xfs_fs.h"
8#include "xfs_shared.h"
8#include "xfs_format.h" 9#include "xfs_format.h"
9#include "xfs_log_format.h" 10#include "xfs_log_format.h"
10#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
11#include "xfs_bit.h" 12#include "xfs_bit.h"
12#include "xfs_sb.h"
13#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_trans.h" 14#include "xfs_trans.h"
15#include "xfs_buf_item.h" 15#include "xfs_buf_item.h"
16#include "xfs_trans_priv.h" 16#include "xfs_trans_priv.h"
17#include "xfs_error.h"
18#include "xfs_trace.h" 17#include "xfs_trace.h"
19#include "xfs_log.h" 18#include "xfs_log.h"
20#include "xfs_inode.h"
21 19
22 20
23kmem_zone_t *xfs_buf_item_zone; 21kmem_zone_t *xfs_buf_item_zone;
@@ -520,7 +518,7 @@ xfs_buf_item_push(
520 /* has a previous flush failed due to IO errors? */ 518 /* has a previous flush failed due to IO errors? */
521 if ((bp->b_flags & XBF_WRITE_FAIL) && 519 if ((bp->b_flags & XBF_WRITE_FAIL) &&
522 ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) { 520 ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS: Failing async write")) {
523 xfs_warn(bp->b_target->bt_mount, 521 xfs_warn(bp->b_mount,
524"Failing async write on buffer block 0x%llx. Retrying async write.", 522"Failing async write on buffer block 0x%llx. Retrying async write.",
525 (long long)bp->b_bn); 523 (long long)bp->b_bn);
526 } 524 }
@@ -594,7 +592,7 @@ xfs_buf_item_put(
594 * free the item. 592 * free the item.
595 */ 593 */
596STATIC void 594STATIC void
597xfs_buf_item_unlock( 595xfs_buf_item_release(
598 struct xfs_log_item *lip) 596 struct xfs_log_item *lip)
599{ 597{
600 struct xfs_buf_log_item *bip = BUF_ITEM(lip); 598 struct xfs_buf_log_item *bip = BUF_ITEM(lip);
@@ -609,7 +607,7 @@ xfs_buf_item_unlock(
609 &lip->li_flags); 607 &lip->li_flags);
610#endif 608#endif
611 609
612 trace_xfs_buf_item_unlock(bip); 610 trace_xfs_buf_item_release(bip);
613 611
614 /* 612 /*
615 * The bli dirty state should match whether the blf has logged segments 613 * The bli dirty state should match whether the blf has logged segments
@@ -639,6 +637,14 @@ xfs_buf_item_unlock(
639 xfs_buf_relse(bp); 637 xfs_buf_relse(bp);
640} 638}
641 639
640STATIC void
641xfs_buf_item_committing(
642 struct xfs_log_item *lip,
643 xfs_lsn_t commit_lsn)
644{
645 return xfs_buf_item_release(lip);
646}
647
642/* 648/*
643 * This is called to find out where the oldest active copy of the 649 * This is called to find out where the oldest active copy of the
644 * buf log item in the on disk log resides now that the last log 650 * buf log item in the on disk log resides now that the last log
@@ -671,25 +677,15 @@ xfs_buf_item_committed(
671 return lsn; 677 return lsn;
672} 678}
673 679
674STATIC void
675xfs_buf_item_committing(
676 struct xfs_log_item *lip,
677 xfs_lsn_t commit_lsn)
678{
679}
680
681/*
682 * This is the ops vector shared by all buf log items.
683 */
684static const struct xfs_item_ops xfs_buf_item_ops = { 680static const struct xfs_item_ops xfs_buf_item_ops = {
685 .iop_size = xfs_buf_item_size, 681 .iop_size = xfs_buf_item_size,
686 .iop_format = xfs_buf_item_format, 682 .iop_format = xfs_buf_item_format,
687 .iop_pin = xfs_buf_item_pin, 683 .iop_pin = xfs_buf_item_pin,
688 .iop_unpin = xfs_buf_item_unpin, 684 .iop_unpin = xfs_buf_item_unpin,
689 .iop_unlock = xfs_buf_item_unlock, 685 .iop_release = xfs_buf_item_release,
686 .iop_committing = xfs_buf_item_committing,
690 .iop_committed = xfs_buf_item_committed, 687 .iop_committed = xfs_buf_item_committed,
691 .iop_push = xfs_buf_item_push, 688 .iop_push = xfs_buf_item_push,
692 .iop_committing = xfs_buf_item_committing
693}; 689};
694 690
695STATIC int 691STATIC int
@@ -743,7 +739,7 @@ xfs_buf_item_init(
743 * this buffer. If we do already have one, there is 739 * this buffer. If we do already have one, there is
744 * nothing to do here so return. 740 * nothing to do here so return.
745 */ 741 */
746 ASSERT(bp->b_target->bt_mount == mp); 742 ASSERT(bp->b_mount == mp);
747 if (bip) { 743 if (bip) {
748 ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 744 ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
749 ASSERT(!bp->b_transp); 745 ASSERT(!bp->b_transp);
@@ -980,9 +976,9 @@ xfs_buf_item_relse(
980 */ 976 */
981void 977void
982xfs_buf_attach_iodone( 978xfs_buf_attach_iodone(
983 xfs_buf_t *bp, 979 struct xfs_buf *bp,
984 void (*cb)(xfs_buf_t *, xfs_log_item_t *), 980 void (*cb)(struct xfs_buf *, struct xfs_log_item *),
985 xfs_log_item_t *lip) 981 struct xfs_log_item *lip)
986{ 982{
987 ASSERT(xfs_buf_islocked(bp)); 983 ASSERT(xfs_buf_islocked(bp));
988 984
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 90f65f891fab..4a054b11011a 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -39,7 +39,7 @@ struct xfs_buf_log_item;
39 * locked, and which 128 byte chunks of the buffer are dirty. 39 * locked, and which 128 byte chunks of the buffer are dirty.
40 */ 40 */
41struct xfs_buf_log_item { 41struct xfs_buf_log_item {
42 xfs_log_item_t bli_item; /* common item structure */ 42 struct xfs_log_item bli_item; /* common item structure */
43 struct xfs_buf *bli_buf; /* real buffer pointer */ 43 struct xfs_buf *bli_buf; /* real buffer pointer */
44 unsigned int bli_flags; /* misc flags */ 44 unsigned int bli_flags; /* misc flags */
45 unsigned int bli_recur; /* lock recursion count */ 45 unsigned int bli_recur; /* lock recursion count */
@@ -55,8 +55,8 @@ bool xfs_buf_item_put(struct xfs_buf_log_item *);
55void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint); 55void xfs_buf_item_log(struct xfs_buf_log_item *, uint, uint);
56bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *); 56bool xfs_buf_item_dirty_format(struct xfs_buf_log_item *);
57void xfs_buf_attach_iodone(struct xfs_buf *, 57void xfs_buf_attach_iodone(struct xfs_buf *,
58 void(*)(struct xfs_buf *, xfs_log_item_t *), 58 void(*)(struct xfs_buf *, struct xfs_log_item *),
59 xfs_log_item_t *); 59 struct xfs_log_item *);
60void xfs_buf_iodone_callbacks(struct xfs_buf *); 60void xfs_buf_iodone_callbacks(struct xfs_buf *);
61void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); 61void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
62bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *, 62bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 5142e64e2345..283df898dd9f 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -6,17 +6,14 @@
6 */ 6 */
7#include "xfs.h" 7#include "xfs.h"
8#include "xfs_fs.h" 8#include "xfs_fs.h"
9#include "xfs_shared.h"
9#include "xfs_format.h" 10#include "xfs_format.h"
10#include "xfs_log_format.h" 11#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_da_format.h"
15#include "xfs_da_btree.h"
16#include "xfs_inode.h" 14#include "xfs_inode.h"
17#include "xfs_dir2.h" 15#include "xfs_dir2.h"
18#include "xfs_dir2_priv.h" 16#include "xfs_dir2_priv.h"
19#include "xfs_error.h"
20#include "xfs_trace.h" 17#include "xfs_trace.h"
21#include "xfs_bmap.h" 18#include "xfs_bmap.h"
22#include "xfs_trans.h" 19#include "xfs_trans.h"
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index d0df0ed50f4b..8ec7aab89044 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -4,19 +4,17 @@
4 * All Rights Reserved. 4 * All Rights Reserved.
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_shared.h"
7#include "xfs_format.h" 8#include "xfs_format.h"
8#include "xfs_log_format.h" 9#include "xfs_log_format.h"
9#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
10#include "xfs_sb.h" 11#include "xfs_sb.h"
11#include "xfs_mount.h" 12#include "xfs_mount.h"
12#include "xfs_quota.h"
13#include "xfs_inode.h"
14#include "xfs_btree.h" 13#include "xfs_btree.h"
15#include "xfs_alloc_btree.h" 14#include "xfs_alloc_btree.h"
16#include "xfs_alloc.h" 15#include "xfs_alloc.h"
17#include "xfs_error.h" 16#include "xfs_error.h"
18#include "xfs_extent_busy.h" 17#include "xfs_extent_busy.h"
19#include "xfs_discard.h"
20#include "xfs_trace.h" 18#include "xfs_trace.h"
21#include "xfs_log.h" 19#include "xfs_log.h"
22 20
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index a1af984e4913..fb1ad4483081 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -14,16 +14,12 @@
14#include "xfs_defer.h" 14#include "xfs_defer.h"
15#include "xfs_inode.h" 15#include "xfs_inode.h"
16#include "xfs_bmap.h" 16#include "xfs_bmap.h"
17#include "xfs_bmap_util.h"
18#include "xfs_alloc.h"
19#include "xfs_quota.h" 17#include "xfs_quota.h"
20#include "xfs_error.h"
21#include "xfs_trans.h" 18#include "xfs_trans.h"
22#include "xfs_buf_item.h" 19#include "xfs_buf_item.h"
23#include "xfs_trans_space.h" 20#include "xfs_trans_space.h"
24#include "xfs_trans_priv.h" 21#include "xfs_trans_priv.h"
25#include "xfs_qm.h" 22#include "xfs_qm.h"
26#include "xfs_cksum.h"
27#include "xfs_trace.h" 23#include "xfs_trace.h"
28#include "xfs_log.h" 24#include "xfs_log.h"
29#include "xfs_bmap_btree.h" 25#include "xfs_bmap_btree.h"
@@ -1243,7 +1239,7 @@ xfs_qm_exit(void)
1243/* 1239/*
1244 * Iterate every dquot of a particular type. The caller must ensure that the 1240 * Iterate every dquot of a particular type. The caller must ensure that the
1245 * particular quota type is active. iter_fn can return negative error codes, 1241 * particular quota type is active. iter_fn can return negative error codes,
1246 * or XFS_BTREE_QUERY_RANGE_ABORT to indicate that it wants to stop iterating. 1242 * or XFS_ITER_ABORT to indicate that it wants to stop iterating.
1247 */ 1243 */
1248int 1244int
1249xfs_qm_dqiterate( 1245xfs_qm_dqiterate(
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 64bd8640f6e8..4fe85709d55d 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -34,7 +34,6 @@ typedef struct xfs_dquot {
34 uint dq_flags; /* various flags (XFS_DQ_*) */ 34 uint dq_flags; /* various flags (XFS_DQ_*) */
35 struct list_head q_lru; /* global free list of dquots */ 35 struct list_head q_lru; /* global free list of dquots */
36 struct xfs_mount*q_mount; /* filesystem this relates to */ 36 struct xfs_mount*q_mount; /* filesystem this relates to */
37 struct xfs_trans*q_transp; /* trans this belongs to currently */
38 uint q_nrefs; /* # active refs from inodes */ 37 uint q_nrefs; /* # active refs from inodes */
39 xfs_daddr_t q_blkno; /* blkno of dquot buffer */ 38 xfs_daddr_t q_blkno; /* blkno of dquot buffer */
40 int q_bufoffset; /* off of dq in buffer (# dquots) */ 39 int q_bufoffset; /* off of dq in buffer (# dquots) */
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 7dedd17c4813..282ec5af293e 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -5,13 +5,13 @@
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_fs.h" 7#include "xfs_fs.h"
8#include "xfs_shared.h"
8#include "xfs_format.h" 9#include "xfs_format.h"
9#include "xfs_log_format.h" 10#include "xfs_log_format.h"
10#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
11#include "xfs_mount.h" 12#include "xfs_mount.h"
12#include "xfs_inode.h" 13#include "xfs_inode.h"
13#include "xfs_quota.h" 14#include "xfs_quota.h"
14#include "xfs_error.h"
15#include "xfs_trans.h" 15#include "xfs_trans.h"
16#include "xfs_buf_item.h" 16#include "xfs_buf_item.h"
17#include "xfs_trans_priv.h" 17#include "xfs_trans_priv.h"
@@ -94,18 +94,6 @@ xfs_qm_dquot_logitem_unpin(
94 wake_up(&dqp->q_pinwait); 94 wake_up(&dqp->q_pinwait);
95} 95}
96 96
97STATIC xfs_lsn_t
98xfs_qm_dquot_logitem_committed(
99 struct xfs_log_item *lip,
100 xfs_lsn_t lsn)
101{
102 /*
103 * We always re-log the entire dquot when it becomes dirty,
104 * so, the latest copy _is_ the only one that matters.
105 */
106 return lsn;
107}
108
109/* 97/*
110 * This is called to wait for the given dquot to be unpinned. 98 * This is called to wait for the given dquot to be unpinned.
111 * Most of these pin/unpin routines are plagiarized from inode code. 99 * Most of these pin/unpin routines are plagiarized from inode code.
@@ -209,14 +197,8 @@ out_unlock:
209 return rval; 197 return rval;
210} 198}
211 199
212/*
213 * Unlock the dquot associated with the log item.
214 * Clear the fields of the dquot and dquot log item that
215 * are specific to the current transaction. If the
216 * hold flags is set, do not unlock the dquot.
217 */
218STATIC void 200STATIC void
219xfs_qm_dquot_logitem_unlock( 201xfs_qm_dquot_logitem_release(
220 struct xfs_log_item *lip) 202 struct xfs_log_item *lip)
221{ 203{
222 struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; 204 struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
@@ -224,11 +206,6 @@ xfs_qm_dquot_logitem_unlock(
224 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 206 ASSERT(XFS_DQ_IS_LOCKED(dqp));
225 207
226 /* 208 /*
227 * Clear the transaction pointer in the dquot
228 */
229 dqp->q_transp = NULL;
230
231 /*
232 * dquots are never 'held' from getting unlocked at the end of 209 * dquots are never 'held' from getting unlocked at the end of
233 * a transaction. Their locking and unlocking is hidden inside the 210 * a transaction. Their locking and unlocking is hidden inside the
234 * transaction layer, within trans_commit. Hence, no LI_HOLD flag 211 * transaction layer, within trans_commit. Hence, no LI_HOLD flag
@@ -237,30 +214,22 @@ xfs_qm_dquot_logitem_unlock(
237 xfs_dqunlock(dqp); 214 xfs_dqunlock(dqp);
238} 215}
239 216
240/*
241 * this needs to stamp an lsn into the dquot, I think.
242 * rpc's that look at user dquot's would then have to
243 * push on the dependency recorded in the dquot
244 */
245STATIC void 217STATIC void
246xfs_qm_dquot_logitem_committing( 218xfs_qm_dquot_logitem_committing(
247 struct xfs_log_item *lip, 219 struct xfs_log_item *lip,
248 xfs_lsn_t lsn) 220 xfs_lsn_t commit_lsn)
249{ 221{
222 return xfs_qm_dquot_logitem_release(lip);
250} 223}
251 224
252/*
253 * This is the ops vector for dquots
254 */
255static const struct xfs_item_ops xfs_dquot_item_ops = { 225static const struct xfs_item_ops xfs_dquot_item_ops = {
256 .iop_size = xfs_qm_dquot_logitem_size, 226 .iop_size = xfs_qm_dquot_logitem_size,
257 .iop_format = xfs_qm_dquot_logitem_format, 227 .iop_format = xfs_qm_dquot_logitem_format,
258 .iop_pin = xfs_qm_dquot_logitem_pin, 228 .iop_pin = xfs_qm_dquot_logitem_pin,
259 .iop_unpin = xfs_qm_dquot_logitem_unpin, 229 .iop_unpin = xfs_qm_dquot_logitem_unpin,
260 .iop_unlock = xfs_qm_dquot_logitem_unlock, 230 .iop_release = xfs_qm_dquot_logitem_release,
261 .iop_committed = xfs_qm_dquot_logitem_committed, 231 .iop_committing = xfs_qm_dquot_logitem_committing,
262 .iop_push = xfs_qm_dquot_logitem_push, 232 .iop_push = xfs_qm_dquot_logitem_push,
263 .iop_committing = xfs_qm_dquot_logitem_committing,
264 .iop_error = xfs_dquot_item_error 233 .iop_error = xfs_dquot_item_error
265}; 234};
266 235
@@ -320,26 +289,6 @@ xfs_qm_qoff_logitem_format(
320} 289}
321 290
322/* 291/*
323 * Pinning has no meaning for an quotaoff item, so just return.
324 */
325STATIC void
326xfs_qm_qoff_logitem_pin(
327 struct xfs_log_item *lip)
328{
329}
330
331/*
332 * Since pinning has no meaning for an quotaoff item, unpinning does
333 * not either.
334 */
335STATIC void
336xfs_qm_qoff_logitem_unpin(
337 struct xfs_log_item *lip,
338 int remove)
339{
340}
341
342/*
343 * There isn't much you can do to push a quotaoff item. It is simply 292 * There isn't much you can do to push a quotaoff item. It is simply
344 * stuck waiting for the log to be flushed to disk. 293 * stuck waiting for the log to be flushed to disk.
345 */ 294 */
@@ -351,28 +300,6 @@ xfs_qm_qoff_logitem_push(
351 return XFS_ITEM_LOCKED; 300 return XFS_ITEM_LOCKED;
352} 301}
353 302
354/*
355 * Quotaoff items have no locking or pushing, so return failure
356 * so that the caller doesn't bother with us.
357 */
358STATIC void
359xfs_qm_qoff_logitem_unlock(
360 struct xfs_log_item *lip)
361{
362}
363
364/*
365 * The quotaoff-start-item is logged only once and cannot be moved in the log,
366 * so simply return the lsn at which it's been logged.
367 */
368STATIC xfs_lsn_t
369xfs_qm_qoff_logitem_committed(
370 struct xfs_log_item *lip,
371 xfs_lsn_t lsn)
372{
373 return lsn;
374}
375
376STATIC xfs_lsn_t 303STATIC xfs_lsn_t
377xfs_qm_qoffend_logitem_committed( 304xfs_qm_qoffend_logitem_committed(
378 struct xfs_log_item *lip, 305 struct xfs_log_item *lip,
@@ -396,50 +323,17 @@ xfs_qm_qoffend_logitem_committed(
396 return (xfs_lsn_t)-1; 323 return (xfs_lsn_t)-1;
397} 324}
398 325
399/*
400 * XXX rcc - don't know quite what to do with this. I think we can
401 * just ignore it. The only time that isn't the case is if we allow
402 * the client to somehow see that quotas have been turned off in which
403 * we can't allow that to get back until the quotaoff hits the disk.
404 * So how would that happen? Also, do we need different routines for
405 * quotaoff start and quotaoff end? I suspect the answer is yes but
406 * to be sure, I need to look at the recovery code and see how quota off
407 * recovery is handled (do we roll forward or back or do something else).
408 * If we roll forwards or backwards, then we need two separate routines,
409 * one that does nothing and one that stamps in the lsn that matters
410 * (truly makes the quotaoff irrevocable). If we do something else,
411 * then maybe we don't need two.
412 */
413STATIC void
414xfs_qm_qoff_logitem_committing(
415 struct xfs_log_item *lip,
416 xfs_lsn_t commit_lsn)
417{
418}
419
420static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = { 326static const struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
421 .iop_size = xfs_qm_qoff_logitem_size, 327 .iop_size = xfs_qm_qoff_logitem_size,
422 .iop_format = xfs_qm_qoff_logitem_format, 328 .iop_format = xfs_qm_qoff_logitem_format,
423 .iop_pin = xfs_qm_qoff_logitem_pin,
424 .iop_unpin = xfs_qm_qoff_logitem_unpin,
425 .iop_unlock = xfs_qm_qoff_logitem_unlock,
426 .iop_committed = xfs_qm_qoffend_logitem_committed, 329 .iop_committed = xfs_qm_qoffend_logitem_committed,
427 .iop_push = xfs_qm_qoff_logitem_push, 330 .iop_push = xfs_qm_qoff_logitem_push,
428 .iop_committing = xfs_qm_qoff_logitem_committing
429}; 331};
430 332
431/*
432 * This is the ops vector shared by all quotaoff-start log items.
433 */
434static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = { 333static const struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
435 .iop_size = xfs_qm_qoff_logitem_size, 334 .iop_size = xfs_qm_qoff_logitem_size,
436 .iop_format = xfs_qm_qoff_logitem_format, 335 .iop_format = xfs_qm_qoff_logitem_format,
437 .iop_pin = xfs_qm_qoff_logitem_pin,
438 .iop_unpin = xfs_qm_qoff_logitem_unpin,
439 .iop_unlock = xfs_qm_qoff_logitem_unlock,
440 .iop_committed = xfs_qm_qoff_logitem_committed,
441 .iop_push = xfs_qm_qoff_logitem_push, 336 .iop_push = xfs_qm_qoff_logitem_push,
442 .iop_committing = xfs_qm_qoff_logitem_committing
443}; 337};
444 338
445/* 339/*
diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h
index db9df710a308..1aed34ccdabc 100644
--- a/fs/xfs/xfs_dquot_item.h
+++ b/fs/xfs/xfs_dquot_item.h
@@ -12,13 +12,13 @@ struct xfs_mount;
12struct xfs_qoff_logitem; 12struct xfs_qoff_logitem;
13 13
14typedef struct xfs_dq_logitem { 14typedef struct xfs_dq_logitem {
15 xfs_log_item_t qli_item; /* common portion */ 15 struct xfs_log_item qli_item; /* common portion */
16 struct xfs_dquot *qli_dquot; /* dquot ptr */ 16 struct xfs_dquot *qli_dquot; /* dquot ptr */
17 xfs_lsn_t qli_flush_lsn; /* lsn at last flush */ 17 xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
18} xfs_dq_logitem_t; 18} xfs_dq_logitem_t;
19 19
20typedef struct xfs_qoff_logitem { 20typedef struct xfs_qoff_logitem {
21 xfs_log_item_t qql_item; /* common portion */ 21 struct xfs_log_item qql_item; /* common portion */
22 struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ 22 struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
23 unsigned int qql_flags; 23 unsigned int qql_flags;
24} xfs_qoff_logitem_t; 24} xfs_qoff_logitem_t;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index a1e177f66404..544c9482a0ef 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -4,6 +4,7 @@
4 * All Rights Reserved. 4 * All Rights Reserved.
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_shared.h"
7#include "xfs_format.h" 8#include "xfs_format.h"
8#include "xfs_fs.h" 9#include "xfs_fs.h"
9#include "xfs_log_format.h" 10#include "xfs_log_format.h"
@@ -353,7 +354,7 @@ xfs_buf_verifier_error(
353 size_t bufsz, 354 size_t bufsz,
354 xfs_failaddr_t failaddr) 355 xfs_failaddr_t failaddr)
355{ 356{
356 struct xfs_mount *mp = bp->b_target->bt_mount; 357 struct xfs_mount *mp = bp->b_mount;
357 xfs_failaddr_t fa; 358 xfs_failaddr_t fa;
358 int sz; 359 int sz;
359 360
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index f2284ceb129f..f1372f9046e3 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -4,18 +4,16 @@
4 * All Rights Reserved. 4 * All Rights Reserved.
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_shared.h"
7#include "xfs_format.h" 8#include "xfs_format.h"
8#include "xfs_log_format.h" 9#include "xfs_log_format.h"
9#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
10#include "xfs_mount.h" 11#include "xfs_mount.h"
11#include "xfs_da_format.h"
12#include "xfs_da_btree.h"
13#include "xfs_dir2.h" 12#include "xfs_dir2.h"
14#include "xfs_export.h" 13#include "xfs_export.h"
15#include "xfs_inode.h" 14#include "xfs_inode.h"
16#include "xfs_trans.h" 15#include "xfs_trans.h"
17#include "xfs_inode_item.h" 16#include "xfs_inode_item.h"
18#include "xfs_trace.h"
19#include "xfs_icache.h" 17#include "xfs_icache.h"
20#include "xfs_log.h" 18#include "xfs_log.h"
21#include "xfs_pnfs.h" 19#include "xfs_pnfs.h"
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 74ddf66f4cfe..86f6512d6864 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -9,14 +9,18 @@
9#include "xfs_log_format.h" 9#include "xfs_log_format.h"
10#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
11#include "xfs_bit.h" 11#include "xfs_bit.h"
12#include "xfs_shared.h"
12#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_defer.h"
13#include "xfs_trans.h" 15#include "xfs_trans.h"
14#include "xfs_trans_priv.h" 16#include "xfs_trans_priv.h"
15#include "xfs_buf_item.h"
16#include "xfs_extfree_item.h" 17#include "xfs_extfree_item.h"
17#include "xfs_log.h" 18#include "xfs_log.h"
18#include "xfs_btree.h" 19#include "xfs_btree.h"
19#include "xfs_rmap.h" 20#include "xfs_rmap.h"
21#include "xfs_alloc.h"
22#include "xfs_bmap.h"
23#include "xfs_trace.h"
20 24
21 25
22kmem_zone_t *xfs_efi_zone; 26kmem_zone_t *xfs_efi_zone;
@@ -107,15 +111,6 @@ xfs_efi_item_format(
107 111
108 112
109/* 113/*
110 * Pinning has no meaning for an efi item, so just return.
111 */
112STATIC void
113xfs_efi_item_pin(
114 struct xfs_log_item *lip)
115{
116}
117
118/*
119 * The unpin operation is the last place an EFI is manipulated in the log. It is 114 * The unpin operation is the last place an EFI is manipulated in the log. It is
120 * either inserted in the AIL or aborted in the event of a log I/O error. In 115 * either inserted in the AIL or aborted in the event of a log I/O error. In
121 * either case, the EFI transaction has been successfully committed to make it 116 * either case, the EFI transaction has been successfully committed to make it
@@ -133,71 +128,22 @@ xfs_efi_item_unpin(
133} 128}
134 129
135/* 130/*
136 * Efi items have no locking or pushing. However, since EFIs are pulled from
137 * the AIL when their corresponding EFDs are committed to disk, their situation
138 * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
139 * will eventually flush the log. This should help in getting the EFI out of
140 * the AIL.
141 */
142STATIC uint
143xfs_efi_item_push(
144 struct xfs_log_item *lip,
145 struct list_head *buffer_list)
146{
147 return XFS_ITEM_PINNED;
148}
149
150/*
151 * The EFI has been either committed or aborted if the transaction has been 131 * The EFI has been either committed or aborted if the transaction has been
152 * cancelled. If the transaction was cancelled, an EFD isn't going to be 132 * cancelled. If the transaction was cancelled, an EFD isn't going to be
153 * constructed and thus we free the EFI here directly. 133 * constructed and thus we free the EFI here directly.
154 */ 134 */
155STATIC void 135STATIC void
156xfs_efi_item_unlock( 136xfs_efi_item_release(
157 struct xfs_log_item *lip) 137 struct xfs_log_item *lip)
158{ 138{
159 if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) 139 xfs_efi_release(EFI_ITEM(lip));
160 xfs_efi_release(EFI_ITEM(lip));
161}
162
163/*
164 * The EFI is logged only once and cannot be moved in the log, so simply return
165 * the lsn at which it's been logged.
166 */
167STATIC xfs_lsn_t
168xfs_efi_item_committed(
169 struct xfs_log_item *lip,
170 xfs_lsn_t lsn)
171{
172 return lsn;
173}
174
175/*
176 * The EFI dependency tracking op doesn't do squat. It can't because
177 * it doesn't know where the free extent is coming from. The dependency
178 * tracking has to be handled by the "enclosing" metadata object. For
179 * example, for inodes, the inode is locked throughout the extent freeing
180 * so the dependency should be recorded there.
181 */
182STATIC void
183xfs_efi_item_committing(
184 struct xfs_log_item *lip,
185 xfs_lsn_t lsn)
186{
187} 140}
188 141
189/*
190 * This is the ops vector shared by all efi log items.
191 */
192static const struct xfs_item_ops xfs_efi_item_ops = { 142static const struct xfs_item_ops xfs_efi_item_ops = {
193 .iop_size = xfs_efi_item_size, 143 .iop_size = xfs_efi_item_size,
194 .iop_format = xfs_efi_item_format, 144 .iop_format = xfs_efi_item_format,
195 .iop_pin = xfs_efi_item_pin,
196 .iop_unpin = xfs_efi_item_unpin, 145 .iop_unpin = xfs_efi_item_unpin,
197 .iop_unlock = xfs_efi_item_unlock, 146 .iop_release = xfs_efi_item_release,
198 .iop_committed = xfs_efi_item_committed,
199 .iop_push = xfs_efi_item_push,
200 .iop_committing = xfs_efi_item_committing
201}; 147};
202 148
203 149
@@ -349,136 +295,298 @@ xfs_efd_item_format(
349} 295}
350 296
351/* 297/*
352 * Pinning has no meaning for an efd item, so just return. 298 * The EFD is either committed or aborted if the transaction is cancelled. If
299 * the transaction is cancelled, drop our reference to the EFI and free the EFD.
353 */ 300 */
354STATIC void 301STATIC void
355xfs_efd_item_pin( 302xfs_efd_item_release(
356 struct xfs_log_item *lip) 303 struct xfs_log_item *lip)
357{ 304{
305 struct xfs_efd_log_item *efdp = EFD_ITEM(lip);
306
307 xfs_efi_release(efdp->efd_efip);
308 xfs_efd_item_free(efdp);
358} 309}
359 310
311static const struct xfs_item_ops xfs_efd_item_ops = {
312 .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
313 .iop_size = xfs_efd_item_size,
314 .iop_format = xfs_efd_item_format,
315 .iop_release = xfs_efd_item_release,
316};
317
360/* 318/*
361 * Since pinning has no meaning for an efd item, unpinning does 319 * Allocate an "extent free done" log item that will hold nextents worth of
362 * not either. 320 * extents. The caller must use all nextents extents, because we are not
321 * flexible about this at all.
363 */ 322 */
364STATIC void 323static struct xfs_efd_log_item *
365xfs_efd_item_unpin( 324xfs_trans_get_efd(
366 struct xfs_log_item *lip, 325 struct xfs_trans *tp,
367 int remove) 326 struct xfs_efi_log_item *efip,
327 unsigned int nextents)
368{ 328{
329 struct xfs_efd_log_item *efdp;
330
331 ASSERT(nextents > 0);
332
333 if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
334 efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) +
335 (nextents - 1) * sizeof(struct xfs_extent),
336 KM_SLEEP);
337 } else {
338 efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP);
339 }
340
341 xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD,
342 &xfs_efd_item_ops);
343 efdp->efd_efip = efip;
344 efdp->efd_format.efd_nextents = nextents;
345 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
346
347 xfs_trans_add_item(tp, &efdp->efd_item);
348 return efdp;
369} 349}
370 350
371/* 351/*
372 * There isn't much you can do to push on an efd item. It is simply stuck 352 * Free an extent and log it to the EFD. Note that the transaction is marked
373 * waiting for the log to be flushed to disk. 353 * dirty regardless of whether the extent free succeeds or fails to support the
354 * EFI/EFD lifecycle rules.
374 */ 355 */
375STATIC uint 356static int
376xfs_efd_item_push( 357xfs_trans_free_extent(
377 struct xfs_log_item *lip, 358 struct xfs_trans *tp,
378 struct list_head *buffer_list) 359 struct xfs_efd_log_item *efdp,
360 xfs_fsblock_t start_block,
361 xfs_extlen_t ext_len,
362 const struct xfs_owner_info *oinfo,
363 bool skip_discard)
379{ 364{
380 return XFS_ITEM_PINNED; 365 struct xfs_mount *mp = tp->t_mountp;
366 struct xfs_extent *extp;
367 uint next_extent;
368 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block);
369 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp,
370 start_block);
371 int error;
372
373 trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
374
375 error = __xfs_free_extent(tp, start_block, ext_len,
376 oinfo, XFS_AG_RESV_NONE, skip_discard);
377 /*
378 * Mark the transaction dirty, even on error. This ensures the
379 * transaction is aborted, which:
380 *
381 * 1.) releases the EFI and frees the EFD
382 * 2.) shuts down the filesystem
383 */
384 tp->t_flags |= XFS_TRANS_DIRTY;
385 set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
386
387 next_extent = efdp->efd_next_extent;
388 ASSERT(next_extent < efdp->efd_format.efd_nextents);
389 extp = &(efdp->efd_format.efd_extents[next_extent]);
390 extp->ext_start = start_block;
391 extp->ext_len = ext_len;
392 efdp->efd_next_extent++;
393
394 return error;
381} 395}
382 396
383/* 397/* Sort bmap items by AG. */
384 * The EFD is either committed or aborted if the transaction is cancelled. If 398static int
385 * the transaction is cancelled, drop our reference to the EFI and free the EFD. 399xfs_extent_free_diff_items(
386 */ 400 void *priv,
387STATIC void 401 struct list_head *a,
388xfs_efd_item_unlock( 402 struct list_head *b)
389 struct xfs_log_item *lip)
390{ 403{
391 struct xfs_efd_log_item *efdp = EFD_ITEM(lip); 404 struct xfs_mount *mp = priv;
405 struct xfs_extent_free_item *ra;
406 struct xfs_extent_free_item *rb;
407
408 ra = container_of(a, struct xfs_extent_free_item, xefi_list);
409 rb = container_of(b, struct xfs_extent_free_item, xefi_list);
410 return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
411 XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
412}
392 413
393 if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { 414/* Get an EFI. */
394 xfs_efi_release(efdp->efd_efip); 415STATIC void *
395 xfs_efd_item_free(efdp); 416xfs_extent_free_create_intent(
396 } 417 struct xfs_trans *tp,
418 unsigned int count)
419{
420 struct xfs_efi_log_item *efip;
421
422 ASSERT(tp != NULL);
423 ASSERT(count > 0);
424
425 efip = xfs_efi_init(tp->t_mountp, count);
426 ASSERT(efip != NULL);
427
428 /*
429 * Get a log_item_desc to point at the new item.
430 */
431 xfs_trans_add_item(tp, &efip->efi_item);
432 return efip;
397} 433}
398 434
399/* 435/* Log a free extent to the intent item. */
400 * When the efd item is committed to disk, all we need to do is delete our 436STATIC void
401 * reference to our partner efi item and then free ourselves. Since we're 437xfs_extent_free_log_item(
402 * freeing ourselves we must return -1 to keep the transaction code from further 438 struct xfs_trans *tp,
403 * referencing this item. 439 void *intent,
404 */ 440 struct list_head *item)
405STATIC xfs_lsn_t
406xfs_efd_item_committed(
407 struct xfs_log_item *lip,
408 xfs_lsn_t lsn)
409{ 441{
410 struct xfs_efd_log_item *efdp = EFD_ITEM(lip); 442 struct xfs_efi_log_item *efip = intent;
443 struct xfs_extent_free_item *free;
444 uint next_extent;
445 struct xfs_extent *extp;
446
447 free = container_of(item, struct xfs_extent_free_item, xefi_list);
448
449 tp->t_flags |= XFS_TRANS_DIRTY;
450 set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
411 451
412 /* 452 /*
413 * Drop the EFI reference regardless of whether the EFD has been 453 * atomic_inc_return gives us the value after the increment;
414 * aborted. Once the EFD transaction is constructed, it is the sole 454 * we want to use it as an array index so we need to subtract 1 from
415 * responsibility of the EFD to release the EFI (even if the EFI is 455 * it.
416 * aborted due to log I/O error).
417 */ 456 */
418 xfs_efi_release(efdp->efd_efip); 457 next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
419 xfs_efd_item_free(efdp); 458 ASSERT(next_extent < efip->efi_format.efi_nextents);
459 extp = &efip->efi_format.efi_extents[next_extent];
460 extp->ext_start = free->xefi_startblock;
461 extp->ext_len = free->xefi_blockcount;
462}
420 463
421 return (xfs_lsn_t)-1; 464/* Get an EFD so we can process all the free extents. */
465STATIC void *
466xfs_extent_free_create_done(
467 struct xfs_trans *tp,
468 void *intent,
469 unsigned int count)
470{
471 return xfs_trans_get_efd(tp, intent, count);
422} 472}
423 473
424/* 474/* Process a free extent. */
425 * The EFD dependency tracking op doesn't do squat. It can't because 475STATIC int
426 * it doesn't know where the free extent is coming from. The dependency 476xfs_extent_free_finish_item(
427 * tracking has to be handled by the "enclosing" metadata object. For 477 struct xfs_trans *tp,
428 * example, for inodes, the inode is locked throughout the extent freeing 478 struct list_head *item,
429 * so the dependency should be recorded there. 479 void *done_item,
430 */ 480 void **state)
481{
482 struct xfs_extent_free_item *free;
483 int error;
484
485 free = container_of(item, struct xfs_extent_free_item, xefi_list);
486 error = xfs_trans_free_extent(tp, done_item,
487 free->xefi_startblock,
488 free->xefi_blockcount,
489 &free->xefi_oinfo, free->xefi_skip_discard);
490 kmem_free(free);
491 return error;
492}
493
494/* Abort all pending EFIs. */
431STATIC void 495STATIC void
432xfs_efd_item_committing( 496xfs_extent_free_abort_intent(
433 struct xfs_log_item *lip, 497 void *intent)
434 xfs_lsn_t lsn)
435{ 498{
499 xfs_efi_release(intent);
436} 500}
437 501
438/* 502/* Cancel a free extent. */
439 * This is the ops vector shared by all efd log items. 503STATIC void
440 */ 504xfs_extent_free_cancel_item(
441static const struct xfs_item_ops xfs_efd_item_ops = { 505 struct list_head *item)
442 .iop_size = xfs_efd_item_size, 506{
443 .iop_format = xfs_efd_item_format, 507 struct xfs_extent_free_item *free;
444 .iop_pin = xfs_efd_item_pin, 508
445 .iop_unpin = xfs_efd_item_unpin, 509 free = container_of(item, struct xfs_extent_free_item, xefi_list);
446 .iop_unlock = xfs_efd_item_unlock, 510 kmem_free(free);
447 .iop_committed = xfs_efd_item_committed, 511}
448 .iop_push = xfs_efd_item_push, 512
449 .iop_committing = xfs_efd_item_committing 513const struct xfs_defer_op_type xfs_extent_free_defer_type = {
514 .max_items = XFS_EFI_MAX_FAST_EXTENTS,
515 .diff_items = xfs_extent_free_diff_items,
516 .create_intent = xfs_extent_free_create_intent,
517 .abort_intent = xfs_extent_free_abort_intent,
518 .log_item = xfs_extent_free_log_item,
519 .create_done = xfs_extent_free_create_done,
520 .finish_item = xfs_extent_free_finish_item,
521 .cancel_item = xfs_extent_free_cancel_item,
450}; 522};
451 523
452/* 524/*
453 * Allocate and initialize an efd item with the given number of extents. 525 * AGFL blocks are accounted differently in the reserve pools and are not
526 * inserted into the busy extent list.
454 */ 527 */
455struct xfs_efd_log_item * 528STATIC int
456xfs_efd_init( 529xfs_agfl_free_finish_item(
457 struct xfs_mount *mp, 530 struct xfs_trans *tp,
458 struct xfs_efi_log_item *efip, 531 struct list_head *item,
459 uint nextents) 532 void *done_item,
460 533 void **state)
461{ 534{
462 struct xfs_efd_log_item *efdp; 535 struct xfs_mount *mp = tp->t_mountp;
463 uint size; 536 struct xfs_efd_log_item *efdp = done_item;
537 struct xfs_extent_free_item *free;
538 struct xfs_extent *extp;
539 struct xfs_buf *agbp;
540 int error;
541 xfs_agnumber_t agno;
542 xfs_agblock_t agbno;
543 uint next_extent;
544
545 free = container_of(item, struct xfs_extent_free_item, xefi_list);
546 ASSERT(free->xefi_blockcount == 1);
547 agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
548 agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
549
550 trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount);
551
552 error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
553 if (!error)
554 error = xfs_free_agfl_block(tp, agno, agbno, agbp,
555 &free->xefi_oinfo);
464 556
465 ASSERT(nextents > 0); 557 /*
466 if (nextents > XFS_EFD_MAX_FAST_EXTENTS) { 558 * Mark the transaction dirty, even on error. This ensures the
467 size = (uint)(sizeof(xfs_efd_log_item_t) + 559 * transaction is aborted, which:
468 ((nextents - 1) * sizeof(xfs_extent_t))); 560 *
469 efdp = kmem_zalloc(size, KM_SLEEP); 561 * 1.) releases the EFI and frees the EFD
470 } else { 562 * 2.) shuts down the filesystem
471 efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP); 563 */
472 } 564 tp->t_flags |= XFS_TRANS_DIRTY;
565 set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
473 566
474 xfs_log_item_init(mp, &efdp->efd_item, XFS_LI_EFD, &xfs_efd_item_ops); 567 next_extent = efdp->efd_next_extent;
475 efdp->efd_efip = efip; 568 ASSERT(next_extent < efdp->efd_format.efd_nextents);
476 efdp->efd_format.efd_nextents = nextents; 569 extp = &(efdp->efd_format.efd_extents[next_extent]);
477 efdp->efd_format.efd_efi_id = efip->efi_format.efi_id; 570 extp->ext_start = free->xefi_startblock;
571 extp->ext_len = free->xefi_blockcount;
572 efdp->efd_next_extent++;
478 573
479 return efdp; 574 kmem_free(free);
575 return error;
480} 576}
481 577
578/* sub-type with special handling for AGFL deferred frees */
579const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
580 .max_items = XFS_EFI_MAX_FAST_EXTENTS,
581 .diff_items = xfs_extent_free_diff_items,
582 .create_intent = xfs_extent_free_create_intent,
583 .abort_intent = xfs_extent_free_abort_intent,
584 .log_item = xfs_extent_free_log_item,
585 .create_done = xfs_extent_free_create_done,
586 .finish_item = xfs_agfl_free_finish_item,
587 .cancel_item = xfs_extent_free_cancel_item,
588};
589
482/* 590/*
483 * Process an extent free intent item that was recovered from 591 * Process an extent free intent item that was recovered from
484 * the log. We need to free the extents that it describes. 592 * the log. We need to free the extents that it describes.
diff --git a/fs/xfs/xfs_extfree_item.h b/fs/xfs/xfs_extfree_item.h
index 2a6a895ca73e..16aaab06d4ec 100644
--- a/fs/xfs/xfs_extfree_item.h
+++ b/fs/xfs/xfs_extfree_item.h
@@ -51,7 +51,7 @@ struct kmem_zone;
51 * AIL, so at this point both the EFI and EFD are freed. 51 * AIL, so at this point both the EFI and EFD are freed.
52 */ 52 */
53typedef struct xfs_efi_log_item { 53typedef struct xfs_efi_log_item {
54 xfs_log_item_t efi_item; 54 struct xfs_log_item efi_item;
55 atomic_t efi_refcount; 55 atomic_t efi_refcount;
56 atomic_t efi_next_extent; 56 atomic_t efi_next_extent;
57 unsigned long efi_flags; /* misc flags */ 57 unsigned long efi_flags; /* misc flags */
@@ -64,7 +64,7 @@ typedef struct xfs_efi_log_item {
64 * have been freed. 64 * have been freed.
65 */ 65 */
66typedef struct xfs_efd_log_item { 66typedef struct xfs_efd_log_item {
67 xfs_log_item_t efd_item; 67 struct xfs_log_item efd_item;
68 xfs_efi_log_item_t *efd_efip; 68 xfs_efi_log_item_t *efd_efip;
69 uint efd_next_extent; 69 uint efd_next_extent;
70 xfs_efd_log_format_t efd_format; 70 xfs_efd_log_format_t efd_format;
@@ -79,8 +79,6 @@ extern struct kmem_zone *xfs_efi_zone;
79extern struct kmem_zone *xfs_efd_zone; 79extern struct kmem_zone *xfs_efd_zone;
80 80
81xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint); 81xfs_efi_log_item_t *xfs_efi_init(struct xfs_mount *, uint);
82xfs_efd_log_item_t *xfs_efd_init(struct xfs_mount *, xfs_efi_log_item_t *,
83 uint);
84int xfs_efi_copy_format(xfs_log_iovec_t *buf, 82int xfs_efi_copy_format(xfs_log_iovec_t *buf,
85 xfs_efi_log_format_t *dst_efi_fmt); 83 xfs_efi_log_format_t *dst_efi_fmt);
86void xfs_efi_item_free(xfs_efi_log_item_t *); 84void xfs_efi_item_free(xfs_efi_log_item_t *);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 916a35cae5e9..e93bacbd49ae 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -10,14 +10,11 @@
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_mount.h" 12#include "xfs_mount.h"
13#include "xfs_da_format.h"
14#include "xfs_da_btree.h"
15#include "xfs_inode.h" 13#include "xfs_inode.h"
16#include "xfs_trans.h" 14#include "xfs_trans.h"
17#include "xfs_inode_item.h" 15#include "xfs_inode_item.h"
18#include "xfs_bmap.h" 16#include "xfs_bmap.h"
19#include "xfs_bmap_util.h" 17#include "xfs_bmap_util.h"
20#include "xfs_error.h"
21#include "xfs_dir2.h" 18#include "xfs_dir2.h"
22#include "xfs_dir2_priv.h" 19#include "xfs_dir2_priv.h"
23#include "xfs_ioctl.h" 20#include "xfs_ioctl.h"
@@ -28,9 +25,7 @@
28#include "xfs_iomap.h" 25#include "xfs_iomap.h"
29#include "xfs_reflink.h" 26#include "xfs_reflink.h"
30 27
31#include <linux/dcache.h>
32#include <linux/falloc.h> 28#include <linux/falloc.h>
33#include <linux/pagevec.h>
34#include <linux/backing-dev.h> 29#include <linux/backing-dev.h>
35#include <linux/mman.h> 30#include <linux/mman.h>
36 31
@@ -379,6 +374,7 @@ xfs_dio_write_end_io(
379 struct inode *inode = file_inode(iocb->ki_filp); 374 struct inode *inode = file_inode(iocb->ki_filp);
380 struct xfs_inode *ip = XFS_I(inode); 375 struct xfs_inode *ip = XFS_I(inode);
381 loff_t offset = iocb->ki_pos; 376 loff_t offset = iocb->ki_pos;
377 unsigned int nofs_flag;
382 int error = 0; 378 int error = 0;
383 379
384 trace_xfs_end_io_direct_write(ip, offset, size); 380 trace_xfs_end_io_direct_write(ip, offset, size);
@@ -395,10 +391,17 @@ xfs_dio_write_end_io(
395 */ 391 */
396 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size); 392 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size);
397 393
394 /*
395 * We can allocate memory here while doing writeback on behalf of
396 * memory reclaim. To avoid memory allocation deadlocks set the
397 * task-wide nofs context for the following operations.
398 */
399 nofs_flag = memalloc_nofs_save();
400
398 if (flags & IOMAP_DIO_COW) { 401 if (flags & IOMAP_DIO_COW) {
399 error = xfs_reflink_end_cow(ip, offset, size); 402 error = xfs_reflink_end_cow(ip, offset, size);
400 if (error) 403 if (error)
401 return error; 404 goto out;
402 } 405 }
403 406
404 /* 407 /*
@@ -407,8 +410,10 @@ xfs_dio_write_end_io(
407 * earlier allows a racing dio read to find unwritten extents before 410 * earlier allows a racing dio read to find unwritten extents before
408 * they are converted. 411 * they are converted.
409 */ 412 */
410 if (flags & IOMAP_DIO_UNWRITTEN) 413 if (flags & IOMAP_DIO_UNWRITTEN) {
411 return xfs_iomap_write_unwritten(ip, offset, size, true); 414 error = xfs_iomap_write_unwritten(ip, offset, size, true);
415 goto out;
416 }
412 417
413 /* 418 /*
414 * We need to update the in-core inode size here so that we don't end up 419 * We need to update the in-core inode size here so that we don't end up
@@ -430,6 +435,8 @@ xfs_dio_write_end_io(
430 spin_unlock(&ip->i_flags_lock); 435 spin_unlock(&ip->i_flags_lock);
431 } 436 }
432 437
438out:
439 memalloc_nofs_restore(nofs_flag);
433 return error; 440 return error;
434} 441}
435 442
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 182501373af2..574a7a8b4736 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -5,22 +5,19 @@
5 * All Rights Reserved. 5 * All Rights Reserved.
6 */ 6 */
7#include "xfs.h" 7#include "xfs.h"
8#include "xfs_shared.h"
8#include "xfs_format.h" 9#include "xfs_format.h"
9#include "xfs_log_format.h" 10#include "xfs_log_format.h"
10#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
11#include "xfs_sb.h" 12#include "xfs_sb.h"
12#include "xfs_mount.h" 13#include "xfs_mount.h"
13#include "xfs_defer.h"
14#include "xfs_inode.h" 14#include "xfs_inode.h"
15#include "xfs_bmap.h" 15#include "xfs_bmap.h"
16#include "xfs_bmap_util.h"
17#include "xfs_alloc.h" 16#include "xfs_alloc.h"
18#include "xfs_mru_cache.h" 17#include "xfs_mru_cache.h"
19#include "xfs_filestream.h"
20#include "xfs_trace.h" 18#include "xfs_trace.h"
21#include "xfs_ag_resv.h" 19#include "xfs_ag_resv.h"
22#include "xfs_trans.h" 20#include "xfs_trans.h"
23#include "xfs_shared.h"
24 21
25struct xfs_fstrm_item { 22struct xfs_fstrm_item {
26 struct xfs_mru_cache_elem mru; 23 struct xfs_mru_cache_elem mru;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 3d76a9e35870..5a8f9641562a 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -9,16 +9,12 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_sb.h"
13#include "xfs_mount.h" 12#include "xfs_mount.h"
14#include "xfs_defer.h"
15#include "xfs_inode.h" 13#include "xfs_inode.h"
16#include "xfs_trans.h" 14#include "xfs_trans.h"
17#include "xfs_error.h"
18#include "xfs_btree.h" 15#include "xfs_btree.h"
19#include "xfs_rmap_btree.h" 16#include "xfs_rmap_btree.h"
20#include "xfs_trace.h" 17#include "xfs_trace.h"
21#include "xfs_log.h"
22#include "xfs_rmap.h" 18#include "xfs_rmap.h"
23#include "xfs_alloc.h" 19#include "xfs_alloc.h"
24#include "xfs_bit.h" 20#include "xfs_bit.h"
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 3d0e0570e3aa..3e61d0cc23f8 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -11,15 +11,11 @@
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_sb.h" 12#include "xfs_sb.h"
13#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_defer.h"
15#include "xfs_trans.h" 14#include "xfs_trans.h"
16#include "xfs_error.h" 15#include "xfs_error.h"
17#include "xfs_btree.h"
18#include "xfs_alloc.h" 16#include "xfs_alloc.h"
19#include "xfs_fsops.h" 17#include "xfs_fsops.h"
20#include "xfs_trans_space.h" 18#include "xfs_trans_space.h"
21#include "xfs_rtalloc.h"
22#include "xfs_trace.h"
23#include "xfs_log.h" 19#include "xfs_log.h"
24#include "xfs_ag.h" 20#include "xfs_ag.h"
25#include "xfs_ag_resv.h" 21#include "xfs_ag_resv.h"
@@ -251,9 +247,9 @@ xfs_growfs_data(
251 if (mp->m_sb.sb_imax_pct) { 247 if (mp->m_sb.sb_imax_pct) {
252 uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct; 248 uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
253 do_div(icount, 100); 249 do_div(icount, 100);
254 mp->m_maxicount = XFS_FSB_TO_INO(mp, icount); 250 M_IGEO(mp)->maxicount = XFS_FSB_TO_INO(mp, icount);
255 } else 251 } else
256 mp->m_maxicount = 0; 252 M_IGEO(mp)->maxicount = 0;
257 253
258 /* Update secondary superblocks now the physical grow has completed */ 254 /* Update secondary superblocks now the physical grow has completed */
259 error = xfs_update_secondary_sbs(mp); 255 error = xfs_update_secondary_sbs(mp);
diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c
index d0d377384120..fa55ab8b8d80 100644
--- a/fs/xfs/xfs_globals.c
+++ b/fs/xfs/xfs_globals.c
@@ -4,7 +4,6 @@
4 * All Rights Reserved. 4 * All Rights Reserved.
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_sysctl.h"
8 7
9/* 8/*
10 * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, 9 * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n,
@@ -41,4 +40,7 @@ struct xfs_globals xfs_globals = {
41#else 40#else
42 .bug_on_assert = false, /* assert failures WARN() */ 41 .bug_on_assert = false, /* assert failures WARN() */
43#endif 42#endif
43#ifdef DEBUG
44 .pwork_threads = -1, /* automatic thread detection */
45#endif
44}; 46};
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index 4c4929f9e7bf..8e0cb05a7142 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -9,12 +9,8 @@
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_sb.h" 12#include "xfs_sb.h"
14#include "xfs_mount.h" 13#include "xfs_mount.h"
15#include "xfs_defer.h"
16#include "xfs_da_format.h"
17#include "xfs_da_btree.h"
18#include "xfs_inode.h" 14#include "xfs_inode.h"
19#include "xfs_trace.h" 15#include "xfs_trace.h"
20#include "xfs_health.h" 16#include "xfs_health.h"
@@ -373,7 +369,7 @@ static const struct ioctl_sick_map ino_map[] = {
373void 369void
374xfs_bulkstat_health( 370xfs_bulkstat_health(
375 struct xfs_inode *ip, 371 struct xfs_inode *ip,
376 struct xfs_bstat *bs) 372 struct xfs_bulkstat *bs)
377{ 373{
378 const struct ioctl_sick_map *m; 374 const struct ioctl_sick_map *m;
379 unsigned int sick; 375 unsigned int sick;
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index a76b27565a18..0b0fd10a36d4 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -5,13 +5,13 @@
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_fs.h" 7#include "xfs_fs.h"
8#include "xfs_shared.h"
8#include "xfs_format.h" 9#include "xfs_format.h"
9#include "xfs_log_format.h" 10#include "xfs_log_format.h"
10#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
11#include "xfs_sb.h" 12#include "xfs_sb.h"
12#include "xfs_mount.h" 13#include "xfs_mount.h"
13#include "xfs_inode.h" 14#include "xfs_inode.h"
14#include "xfs_error.h"
15#include "xfs_trans.h" 15#include "xfs_trans.h"
16#include "xfs_trans_priv.h" 16#include "xfs_trans_priv.h"
17#include "xfs_inode_item.h" 17#include "xfs_inode_item.h"
@@ -23,8 +23,6 @@
23#include "xfs_dquot.h" 23#include "xfs_dquot.h"
24#include "xfs_reflink.h" 24#include "xfs_reflink.h"
25 25
26#include <linux/kthread.h>
27#include <linux/freezer.h>
28#include <linux/iversion.h> 26#include <linux/iversion.h>
29 27
30/* 28/*
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index 8381d34cb102..d99a0a3e5f40 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -6,14 +6,9 @@
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_fs.h" 7#include "xfs_fs.h"
8#include "xfs_shared.h" 8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h" 9#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_mount.h"
14#include "xfs_trans.h" 10#include "xfs_trans.h"
15#include "xfs_trans_priv.h" 11#include "xfs_trans_priv.h"
16#include "xfs_error.h"
17#include "xfs_icreate_item.h" 12#include "xfs_icreate_item.h"
18#include "xfs_log.h" 13#include "xfs_log.h"
19 14
@@ -56,80 +51,18 @@ xfs_icreate_item_format(
56 sizeof(struct xfs_icreate_log)); 51 sizeof(struct xfs_icreate_log));
57} 52}
58 53
59
60/* Pinning has no meaning for the create item, so just return. */
61STATIC void 54STATIC void
62xfs_icreate_item_pin( 55xfs_icreate_item_release(
63 struct xfs_log_item *lip) 56 struct xfs_log_item *lip)
64{ 57{
58 kmem_zone_free(xfs_icreate_zone, ICR_ITEM(lip));
65} 59}
66 60
67
68/* pinning has no meaning for the create item, so just return. */
69STATIC void
70xfs_icreate_item_unpin(
71 struct xfs_log_item *lip,
72 int remove)
73{
74}
75
76STATIC void
77xfs_icreate_item_unlock(
78 struct xfs_log_item *lip)
79{
80 struct xfs_icreate_item *icp = ICR_ITEM(lip);
81
82 if (test_bit(XFS_LI_ABORTED, &lip->li_flags))
83 kmem_zone_free(xfs_icreate_zone, icp);
84 return;
85}
86
87/*
88 * Because we have ordered buffers being tracked in the AIL for the inode
89 * creation, we don't need the create item after this. Hence we can free
90 * the log item and return -1 to tell the caller we're done with the item.
91 */
92STATIC xfs_lsn_t
93xfs_icreate_item_committed(
94 struct xfs_log_item *lip,
95 xfs_lsn_t lsn)
96{
97 struct xfs_icreate_item *icp = ICR_ITEM(lip);
98
99 kmem_zone_free(xfs_icreate_zone, icp);
100 return (xfs_lsn_t)-1;
101}
102
103/* item can never get into the AIL */
104STATIC uint
105xfs_icreate_item_push(
106 struct xfs_log_item *lip,
107 struct list_head *buffer_list)
108{
109 ASSERT(0);
110 return XFS_ITEM_SUCCESS;
111}
112
113/* Ordered buffers do the dependency tracking here, so this does nothing. */
114STATIC void
115xfs_icreate_item_committing(
116 struct xfs_log_item *lip,
117 xfs_lsn_t lsn)
118{
119}
120
121/*
122 * This is the ops vector shared by all buf log items.
123 */
124static const struct xfs_item_ops xfs_icreate_item_ops = { 61static const struct xfs_item_ops xfs_icreate_item_ops = {
62 .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
125 .iop_size = xfs_icreate_item_size, 63 .iop_size = xfs_icreate_item_size,
126 .iop_format = xfs_icreate_item_format, 64 .iop_format = xfs_icreate_item_format,
127 .iop_pin = xfs_icreate_item_pin, 65 .iop_release = xfs_icreate_item_release,
128 .iop_unpin = xfs_icreate_item_unpin,
129 .iop_push = xfs_icreate_item_push,
130 .iop_unlock = xfs_icreate_item_unlock,
131 .iop_committed = xfs_icreate_item_committed,
132 .iop_committing = xfs_icreate_item_committing,
133}; 66};
134 67
135 68
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 71d216cf6f87..6467d5e1df2d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3,7 +3,6 @@
3 * Copyright (c) 2000-2006 Silicon Graphics, Inc. 3 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4 * All Rights Reserved. 4 * All Rights Reserved.
5 */ 5 */
6#include <linux/log2.h>
7#include <linux/iversion.h> 6#include <linux/iversion.h>
8 7
9#include "xfs.h" 8#include "xfs.h"
@@ -16,10 +15,7 @@
16#include "xfs_mount.h" 15#include "xfs_mount.h"
17#include "xfs_defer.h" 16#include "xfs_defer.h"
18#include "xfs_inode.h" 17#include "xfs_inode.h"
19#include "xfs_da_format.h"
20#include "xfs_da_btree.h"
21#include "xfs_dir2.h" 18#include "xfs_dir2.h"
22#include "xfs_attr_sf.h"
23#include "xfs_attr.h" 19#include "xfs_attr.h"
24#include "xfs_trans_space.h" 20#include "xfs_trans_space.h"
25#include "xfs_trans.h" 21#include "xfs_trans.h"
@@ -32,7 +28,6 @@
32#include "xfs_error.h" 28#include "xfs_error.h"
33#include "xfs_quota.h" 29#include "xfs_quota.h"
34#include "xfs_filestream.h" 30#include "xfs_filestream.h"
35#include "xfs_cksum.h"
36#include "xfs_trace.h" 31#include "xfs_trace.h"
37#include "xfs_icache.h" 32#include "xfs_icache.h"
38#include "xfs_symlink.h" 33#include "xfs_symlink.h"
@@ -40,7 +35,6 @@
40#include "xfs_log.h" 35#include "xfs_log.h"
41#include "xfs_bmap_btree.h" 36#include "xfs_bmap_btree.h"
42#include "xfs_reflink.h" 37#include "xfs_reflink.h"
43#include "xfs_dir2_priv.h"
44 38
45kmem_zone_t *xfs_inode_zone; 39kmem_zone_t *xfs_inode_zone;
46 40
@@ -441,12 +435,12 @@ xfs_lock_inumorder(int lock_mode, int subclass)
441 */ 435 */
442static void 436static void
443xfs_lock_inodes( 437xfs_lock_inodes(
444 xfs_inode_t **ips, 438 struct xfs_inode **ips,
445 int inodes, 439 int inodes,
446 uint lock_mode) 440 uint lock_mode)
447{ 441{
448 int attempts = 0, i, j, try_lock; 442 int attempts = 0, i, j, try_lock;
449 xfs_log_item_t *lp; 443 struct xfs_log_item *lp;
450 444
451 /* 445 /*
452 * Currently supports between 2 and 5 inodes with exclusive locking. We 446 * Currently supports between 2 and 5 inodes with exclusive locking. We
@@ -485,7 +479,7 @@ again:
485 */ 479 */
486 if (!try_lock) { 480 if (!try_lock) {
487 for (j = (i - 1); j >= 0 && !try_lock; j--) { 481 for (j = (i - 1); j >= 0 && !try_lock; j--) {
488 lp = (xfs_log_item_t *)ips[j]->i_itemp; 482 lp = &ips[j]->i_itemp->ili_item;
489 if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) 483 if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags))
490 try_lock++; 484 try_lock++;
491 } 485 }
@@ -551,7 +545,7 @@ xfs_lock_two_inodes(
551 struct xfs_inode *temp; 545 struct xfs_inode *temp;
552 uint mode_temp; 546 uint mode_temp;
553 int attempts = 0; 547 int attempts = 0;
554 xfs_log_item_t *lp; 548 struct xfs_log_item *lp;
555 549
556 ASSERT(hweight32(ip0_mode) == 1); 550 ASSERT(hweight32(ip0_mode) == 1);
557 ASSERT(hweight32(ip1_mode) == 1); 551 ASSERT(hweight32(ip1_mode) == 1);
@@ -585,7 +579,7 @@ xfs_lock_two_inodes(
585 * the second lock. If we can't get it, we must release the first one 579 * the second lock. If we can't get it, we must release the first one
586 * and try again. 580 * and try again.
587 */ 581 */
588 lp = (xfs_log_item_t *)ip0->i_itemp; 582 lp = &ip0->i_itemp->ili_item;
589 if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) { 583 if (lp && test_bit(XFS_LI_IN_AIL, &lp->li_flags)) {
590 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) { 584 if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(ip1_mode, 1))) {
591 xfs_iunlock(ip0, ip0_mode); 585 xfs_iunlock(ip0, ip0_mode);
@@ -2537,13 +2531,14 @@ xfs_ifree_cluster(
2537 xfs_inode_log_item_t *iip; 2531 xfs_inode_log_item_t *iip;
2538 struct xfs_log_item *lip; 2532 struct xfs_log_item *lip;
2539 struct xfs_perag *pag; 2533 struct xfs_perag *pag;
2534 struct xfs_ino_geometry *igeo = M_IGEO(mp);
2540 xfs_ino_t inum; 2535 xfs_ino_t inum;
2541 2536
2542 inum = xic->first_ino; 2537 inum = xic->first_ino;
2543 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); 2538 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
2544 nbufs = mp->m_ialloc_blks / mp->m_blocks_per_cluster; 2539 nbufs = igeo->ialloc_blks / igeo->blocks_per_cluster;
2545 2540
2546 for (j = 0; j < nbufs; j++, inum += mp->m_inodes_per_cluster) { 2541 for (j = 0; j < nbufs; j++, inum += igeo->inodes_per_cluster) {
2547 /* 2542 /*
2548 * The allocation bitmap tells us which inodes of the chunk were 2543 * The allocation bitmap tells us which inodes of the chunk were
2549 * physically allocated. Skip the cluster if an inode falls into 2544 * physically allocated. Skip the cluster if an inode falls into
@@ -2551,7 +2546,7 @@ xfs_ifree_cluster(
2551 */ 2546 */
2552 ioffset = inum - xic->first_ino; 2547 ioffset = inum - xic->first_ino;
2553 if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) { 2548 if ((xic->alloc & XFS_INOBT_MASK(ioffset)) == 0) {
2554 ASSERT(ioffset % mp->m_inodes_per_cluster == 0); 2549 ASSERT(ioffset % igeo->inodes_per_cluster == 0);
2555 continue; 2550 continue;
2556 } 2551 }
2557 2552
@@ -2567,7 +2562,7 @@ xfs_ifree_cluster(
2567 * to mark all the active inodes on the buffer stale. 2562 * to mark all the active inodes on the buffer stale.
2568 */ 2563 */
2569 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno, 2564 bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
2570 mp->m_bsize * mp->m_blocks_per_cluster, 2565 mp->m_bsize * igeo->blocks_per_cluster,
2571 XBF_UNMAPPED); 2566 XBF_UNMAPPED);
2572 2567
2573 if (!bp) 2568 if (!bp)
@@ -2614,7 +2609,7 @@ xfs_ifree_cluster(
2614 * transaction stale above, which means there is no point in 2609 * transaction stale above, which means there is no point in
2615 * even trying to lock them. 2610 * even trying to lock them.
2616 */ 2611 */
2617 for (i = 0; i < mp->m_inodes_per_cluster; i++) { 2612 for (i = 0; i < igeo->inodes_per_cluster; i++) {
2618retry: 2613retry:
2619 rcu_read_lock(); 2614 rcu_read_lock();
2620 ip = radix_tree_lookup(&pag->pag_ici_root, 2615 ip = radix_tree_lookup(&pag->pag_ici_root,
@@ -3472,28 +3467,27 @@ xfs_iflush_cluster(
3472 struct xfs_mount *mp = ip->i_mount; 3467 struct xfs_mount *mp = ip->i_mount;
3473 struct xfs_perag *pag; 3468 struct xfs_perag *pag;
3474 unsigned long first_index, mask; 3469 unsigned long first_index, mask;
3475 unsigned long inodes_per_cluster;
3476 int cilist_size; 3470 int cilist_size;
3477 struct xfs_inode **cilist; 3471 struct xfs_inode **cilist;
3478 struct xfs_inode *cip; 3472 struct xfs_inode *cip;
3473 struct xfs_ino_geometry *igeo = M_IGEO(mp);
3479 int nr_found; 3474 int nr_found;
3480 int clcount = 0; 3475 int clcount = 0;
3481 int i; 3476 int i;
3482 3477
3483 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 3478 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
3484 3479
3485 inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 3480 cilist_size = igeo->inodes_per_cluster * sizeof(struct xfs_inode *);
3486 cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
3487 cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS); 3481 cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS);
3488 if (!cilist) 3482 if (!cilist)
3489 goto out_put; 3483 goto out_put;
3490 3484
3491 mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); 3485 mask = ~(igeo->inodes_per_cluster - 1);
3492 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 3486 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
3493 rcu_read_lock(); 3487 rcu_read_lock();
3494 /* really need a gang lookup range call here */ 3488 /* really need a gang lookup range call here */
3495 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist, 3489 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist,
3496 first_index, inodes_per_cluster); 3490 first_index, igeo->inodes_per_cluster);
3497 if (nr_found == 0) 3491 if (nr_found == 0)
3498 goto out_free; 3492 goto out_free;
3499 3493
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index fa1c4fe2ffbf..c9a502eed204 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -5,6 +5,7 @@
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_fs.h" 7#include "xfs_fs.h"
8#include "xfs_shared.h"
8#include "xfs_format.h" 9#include "xfs_format.h"
9#include "xfs_log_format.h" 10#include "xfs_log_format.h"
10#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
@@ -12,7 +13,6 @@
12#include "xfs_inode.h" 13#include "xfs_inode.h"
13#include "xfs_trans.h" 14#include "xfs_trans.h"
14#include "xfs_inode_item.h" 15#include "xfs_inode_item.h"
15#include "xfs_error.h"
16#include "xfs_trace.h" 16#include "xfs_trace.h"
17#include "xfs_trans_priv.h" 17#include "xfs_trans_priv.h"
18#include "xfs_buf_item.h" 18#include "xfs_buf_item.h"
@@ -565,7 +565,7 @@ out_unlock:
565 * Unlock the inode associated with the inode log item. 565 * Unlock the inode associated with the inode log item.
566 */ 566 */
567STATIC void 567STATIC void
568xfs_inode_item_unlock( 568xfs_inode_item_release(
569 struct xfs_log_item *lip) 569 struct xfs_log_item *lip)
570{ 570{
571 struct xfs_inode_log_item *iip = INODE_ITEM(lip); 571 struct xfs_inode_log_item *iip = INODE_ITEM(lip);
@@ -621,23 +621,21 @@ xfs_inode_item_committed(
621STATIC void 621STATIC void
622xfs_inode_item_committing( 622xfs_inode_item_committing(
623 struct xfs_log_item *lip, 623 struct xfs_log_item *lip,
624 xfs_lsn_t lsn) 624 xfs_lsn_t commit_lsn)
625{ 625{
626 INODE_ITEM(lip)->ili_last_lsn = lsn; 626 INODE_ITEM(lip)->ili_last_lsn = commit_lsn;
627 return xfs_inode_item_release(lip);
627} 628}
628 629
629/*
630 * This is the ops vector shared by all buf log items.
631 */
632static const struct xfs_item_ops xfs_inode_item_ops = { 630static const struct xfs_item_ops xfs_inode_item_ops = {
633 .iop_size = xfs_inode_item_size, 631 .iop_size = xfs_inode_item_size,
634 .iop_format = xfs_inode_item_format, 632 .iop_format = xfs_inode_item_format,
635 .iop_pin = xfs_inode_item_pin, 633 .iop_pin = xfs_inode_item_pin,
636 .iop_unpin = xfs_inode_item_unpin, 634 .iop_unpin = xfs_inode_item_unpin,
637 .iop_unlock = xfs_inode_item_unlock, 635 .iop_release = xfs_inode_item_release,
638 .iop_committed = xfs_inode_item_committed, 636 .iop_committed = xfs_inode_item_committed,
639 .iop_push = xfs_inode_item_push, 637 .iop_push = xfs_inode_item_push,
640 .iop_committing = xfs_inode_item_committing, 638 .iop_committing = xfs_inode_item_committing,
641 .iop_error = xfs_inode_item_error 639 .iop_error = xfs_inode_item_error
642}; 640};
643 641
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 27081eba220c..07a60e74c39c 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -14,7 +14,7 @@ struct xfs_inode;
14struct xfs_mount; 14struct xfs_mount;
15 15
16typedef struct xfs_inode_log_item { 16typedef struct xfs_inode_log_item {
17 xfs_log_item_t ili_item; /* common portion */ 17 struct xfs_log_item ili_item; /* common portion */
18 struct xfs_inode *ili_inode; /* inode ptr */ 18 struct xfs_inode *ili_inode; /* inode ptr */
19 xfs_lsn_t ili_flush_lsn; /* lsn at last flush */ 19 xfs_lsn_t ili_flush_lsn; /* lsn at last flush */
20 xfs_lsn_t ili_last_lsn; /* lsn at last transaction */ 20 xfs_lsn_t ili_last_lsn; /* lsn at last transaction */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index fe29aa61293c..6f7848cd5527 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -11,9 +11,8 @@
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_mount.h" 12#include "xfs_mount.h"
13#include "xfs_inode.h" 13#include "xfs_inode.h"
14#include "xfs_ioctl.h"
15#include "xfs_alloc.h"
16#include "xfs_rtalloc.h" 14#include "xfs_rtalloc.h"
15#include "xfs_iwalk.h"
17#include "xfs_itable.h" 16#include "xfs_itable.h"
18#include "xfs_error.h" 17#include "xfs_error.h"
19#include "xfs_attr.h" 18#include "xfs_attr.h"
@@ -25,7 +24,6 @@
25#include "xfs_export.h" 24#include "xfs_export.h"
26#include "xfs_trace.h" 25#include "xfs_trace.h"
27#include "xfs_icache.h" 26#include "xfs_icache.h"
28#include "xfs_symlink.h"
29#include "xfs_trans.h" 27#include "xfs_trans.h"
30#include "xfs_acl.h" 28#include "xfs_acl.h"
31#include "xfs_btree.h" 29#include "xfs_btree.h"
@@ -36,14 +34,8 @@
36#include "xfs_ag.h" 34#include "xfs_ag.h"
37#include "xfs_health.h" 35#include "xfs_health.h"
38 36
39#include <linux/capability.h>
40#include <linux/cred.h>
41#include <linux/dcache.h>
42#include <linux/mount.h> 37#include <linux/mount.h>
43#include <linux/namei.h> 38#include <linux/namei.h>
44#include <linux/pagemap.h>
45#include <linux/slab.h>
46#include <linux/exportfs.h>
47 39
48/* 40/*
49 * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to 41 * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
@@ -721,16 +713,45 @@ out_unlock:
721 return error; 713 return error;
722} 714}
723 715
716/* Return 0 on success or positive error */
717int
718xfs_fsbulkstat_one_fmt(
719 struct xfs_ibulk *breq,
720 const struct xfs_bulkstat *bstat)
721{
722 struct xfs_bstat bs1;
723
724 xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat);
725 if (copy_to_user(breq->ubuffer, &bs1, sizeof(bs1)))
726 return -EFAULT;
727 return xfs_ibulk_advance(breq, sizeof(struct xfs_bstat));
728}
729
730int
731xfs_fsinumbers_fmt(
732 struct xfs_ibulk *breq,
733 const struct xfs_inumbers *igrp)
734{
735 struct xfs_inogrp ig1;
736
737 xfs_inumbers_to_inogrp(&ig1, igrp);
738 if (copy_to_user(breq->ubuffer, &ig1, sizeof(struct xfs_inogrp)))
739 return -EFAULT;
740 return xfs_ibulk_advance(breq, sizeof(struct xfs_inogrp));
741}
742
724STATIC int 743STATIC int
725xfs_ioc_bulkstat( 744xfs_ioc_fsbulkstat(
726 xfs_mount_t *mp, 745 xfs_mount_t *mp,
727 unsigned int cmd, 746 unsigned int cmd,
728 void __user *arg) 747 void __user *arg)
729{ 748{
730 xfs_fsop_bulkreq_t bulkreq; 749 struct xfs_fsop_bulkreq bulkreq;
731 int count; /* # of records returned */ 750 struct xfs_ibulk breq = {
732 xfs_ino_t inlast; /* last inode number */ 751 .mp = mp,
733 int done; 752 .ocount = 0,
753 };
754 xfs_ino_t lastino;
734 int error; 755 int error;
735 756
736 /* done = 1 if there are more stats to get and if bulkstat */ 757 /* done = 1 if there are more stats to get and if bulkstat */
@@ -742,41 +763,243 @@ xfs_ioc_bulkstat(
742 if (XFS_FORCED_SHUTDOWN(mp)) 763 if (XFS_FORCED_SHUTDOWN(mp))
743 return -EIO; 764 return -EIO;
744 765
745 if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t))) 766 if (copy_from_user(&bulkreq, arg, sizeof(struct xfs_fsop_bulkreq)))
746 return -EFAULT; 767 return -EFAULT;
747 768
748 if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) 769 if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64)))
749 return -EFAULT; 770 return -EFAULT;
750 771
751 if ((count = bulkreq.icount) <= 0) 772 if (bulkreq.icount <= 0)
752 return -EINVAL; 773 return -EINVAL;
753 774
754 if (bulkreq.ubuffer == NULL) 775 if (bulkreq.ubuffer == NULL)
755 return -EINVAL; 776 return -EINVAL;
756 777
757 if (cmd == XFS_IOC_FSINUMBERS) 778 breq.ubuffer = bulkreq.ubuffer;
758 error = xfs_inumbers(mp, &inlast, &count, 779 breq.icount = bulkreq.icount;
759 bulkreq.ubuffer, xfs_inumbers_fmt); 780
760 else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) 781 /*
761 error = xfs_bulkstat_one(mp, inlast, bulkreq.ubuffer, 782 * FSBULKSTAT_SINGLE expects that *lastip contains the inode number
762 sizeof(xfs_bstat_t), NULL, &done); 783 * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect
763 else /* XFS_IOC_FSBULKSTAT */ 784 * that *lastip contains either zero or the number of the last inode to
764 error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one, 785 * be examined by the previous call and return results starting with
765 sizeof(xfs_bstat_t), bulkreq.ubuffer, 786 * the next inode after that. The new bulk request back end functions
766 &done); 787 * take the inode to start with, so we have to compute the startino
788 * parameter from lastino to maintain correct function. lastino == 0
789 * is a special case because it has traditionally meant "first inode
790 * in filesystem".
791 */
792 if (cmd == XFS_IOC_FSINUMBERS) {
793 breq.startino = lastino ? lastino + 1 : 0;
794 error = xfs_inumbers(&breq, xfs_fsinumbers_fmt);
795 lastino = breq.startino - 1;
796 } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE) {
797 breq.startino = lastino;
798 breq.icount = 1;
799 error = xfs_bulkstat_one(&breq, xfs_fsbulkstat_one_fmt);
800 } else { /* XFS_IOC_FSBULKSTAT */
801 breq.startino = lastino ? lastino + 1 : 0;
802 error = xfs_bulkstat(&breq, xfs_fsbulkstat_one_fmt);
803 lastino = breq.startino - 1;
804 }
767 805
768 if (error) 806 if (error)
769 return error; 807 return error;
770 808
771 if (bulkreq.ocount != NULL) { 809 if (bulkreq.lastip != NULL &&
772 if (copy_to_user(bulkreq.lastip, &inlast, 810 copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t)))
773 sizeof(xfs_ino_t))) 811 return -EFAULT;
774 return -EFAULT;
775 812
776 if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) 813 if (bulkreq.ocount != NULL &&
777 return -EFAULT; 814 copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32)))
815 return -EFAULT;
816
817 return 0;
818}
819
820/* Return 0 on success or positive error */
821static int
822xfs_bulkstat_fmt(
823 struct xfs_ibulk *breq,
824 const struct xfs_bulkstat *bstat)
825{
826 if (copy_to_user(breq->ubuffer, bstat, sizeof(struct xfs_bulkstat)))
827 return -EFAULT;
828 return xfs_ibulk_advance(breq, sizeof(struct xfs_bulkstat));
829}
830
831/*
832 * Check the incoming bulk request @hdr from userspace and initialize the
833 * internal @breq bulk request appropriately. Returns 0 if the bulk request
834 * should proceed; XFS_ITER_ABORT if there's nothing to do; or the usual
835 * negative error code.
836 */
837static int
838xfs_bulk_ireq_setup(
839 struct xfs_mount *mp,
840 struct xfs_bulk_ireq *hdr,
841 struct xfs_ibulk *breq,
842 void __user *ubuffer)
843{
844 if (hdr->icount == 0 ||
845 (hdr->flags & ~XFS_BULK_IREQ_FLAGS_ALL) ||
846 memchr_inv(hdr->reserved, 0, sizeof(hdr->reserved)))
847 return -EINVAL;
848
849 breq->startino = hdr->ino;
850 breq->ubuffer = ubuffer;
851 breq->icount = hdr->icount;
852 breq->ocount = 0;
853 breq->flags = 0;
854
855 /*
856 * The @ino parameter is a special value, so we must look it up here.
857 * We're not allowed to have IREQ_AGNO, and we only return one inode
858 * worth of data.
859 */
860 if (hdr->flags & XFS_BULK_IREQ_SPECIAL) {
861 if (hdr->flags & XFS_BULK_IREQ_AGNO)
862 return -EINVAL;
863
864 switch (hdr->ino) {
865 case XFS_BULK_IREQ_SPECIAL_ROOT:
866 hdr->ino = mp->m_sb.sb_rootino;
867 break;
868 default:
869 return -EINVAL;
870 }
871 breq->icount = 1;
778 } 872 }
779 873
874 /*
875 * The IREQ_AGNO flag means that we only want results from a given AG.
876 * If @hdr->ino is zero, we start iterating in that AG. If @hdr->ino is
877 * beyond the specified AG then we return no results.
878 */
879 if (hdr->flags & XFS_BULK_IREQ_AGNO) {
880 if (hdr->agno >= mp->m_sb.sb_agcount)
881 return -EINVAL;
882
883 if (breq->startino == 0)
884 breq->startino = XFS_AGINO_TO_INO(mp, hdr->agno, 0);
885 else if (XFS_INO_TO_AGNO(mp, breq->startino) < hdr->agno)
886 return -EINVAL;
887
888 breq->flags |= XFS_IBULK_SAME_AG;
889
890 /* Asking for an inode past the end of the AG? We're done! */
891 if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno)
892 return XFS_ITER_ABORT;
893 } else if (hdr->agno)
894 return -EINVAL;
895
896 /* Asking for an inode past the end of the FS? We're done! */
897 if (XFS_INO_TO_AGNO(mp, breq->startino) >= mp->m_sb.sb_agcount)
898 return XFS_ITER_ABORT;
899
900 return 0;
901}
902
903/*
904 * Update the userspace bulk request @hdr to reflect the end state of the
905 * internal bulk request @breq.
906 */
907static void
908xfs_bulk_ireq_teardown(
909 struct xfs_bulk_ireq *hdr,
910 struct xfs_ibulk *breq)
911{
912 hdr->ino = breq->startino;
913 hdr->ocount = breq->ocount;
914}
915
916/* Handle the v5 bulkstat ioctl. */
917STATIC int
918xfs_ioc_bulkstat(
919 struct xfs_mount *mp,
920 unsigned int cmd,
921 struct xfs_bulkstat_req __user *arg)
922{
923 struct xfs_bulk_ireq hdr;
924 struct xfs_ibulk breq = {
925 .mp = mp,
926 };
927 int error;
928
929 if (!capable(CAP_SYS_ADMIN))
930 return -EPERM;
931
932 if (XFS_FORCED_SHUTDOWN(mp))
933 return -EIO;
934
935 if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr)))
936 return -EFAULT;
937
938 error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->bulkstat);
939 if (error == XFS_ITER_ABORT)
940 goto out_teardown;
941 if (error < 0)
942 return error;
943
944 error = xfs_bulkstat(&breq, xfs_bulkstat_fmt);
945 if (error)
946 return error;
947
948out_teardown:
949 xfs_bulk_ireq_teardown(&hdr, &breq);
950 if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr)))
951 return -EFAULT;
952
953 return 0;
954}
955
956STATIC int
957xfs_inumbers_fmt(
958 struct xfs_ibulk *breq,
959 const struct xfs_inumbers *igrp)
960{
961 if (copy_to_user(breq->ubuffer, igrp, sizeof(struct xfs_inumbers)))
962 return -EFAULT;
963 return xfs_ibulk_advance(breq, sizeof(struct xfs_inumbers));
964}
965
966/* Handle the v5 inumbers ioctl. */
967STATIC int
968xfs_ioc_inumbers(
969 struct xfs_mount *mp,
970 unsigned int cmd,
971 struct xfs_inumbers_req __user *arg)
972{
973 struct xfs_bulk_ireq hdr;
974 struct xfs_ibulk breq = {
975 .mp = mp,
976 };
977 int error;
978
979 if (!capable(CAP_SYS_ADMIN))
980 return -EPERM;
981
982 if (XFS_FORCED_SHUTDOWN(mp))
983 return -EIO;
984
985 if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr)))
986 return -EFAULT;
987
988 error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers);
989 if (error == XFS_ITER_ABORT)
990 goto out_teardown;
991 if (error < 0)
992 return error;
993
994 error = xfs_inumbers(&breq, xfs_inumbers_fmt);
995 if (error)
996 return error;
997
998out_teardown:
999 xfs_bulk_ireq_teardown(&hdr, &breq);
1000 if (copy_to_user(&arg->hdr, &hdr, sizeof(hdr)))
1001 return -EFAULT;
1002
780 return 0; 1003 return 0;
781} 1004}
782 1005
@@ -1926,7 +2149,12 @@ xfs_file_ioctl(
1926 case XFS_IOC_FSBULKSTAT_SINGLE: 2149 case XFS_IOC_FSBULKSTAT_SINGLE:
1927 case XFS_IOC_FSBULKSTAT: 2150 case XFS_IOC_FSBULKSTAT:
1928 case XFS_IOC_FSINUMBERS: 2151 case XFS_IOC_FSINUMBERS:
2152 return xfs_ioc_fsbulkstat(mp, cmd, arg);
2153
2154 case XFS_IOC_BULKSTAT:
1929 return xfs_ioc_bulkstat(mp, cmd, arg); 2155 return xfs_ioc_bulkstat(mp, cmd, arg);
2156 case XFS_IOC_INUMBERS:
2157 return xfs_ioc_inumbers(mp, cmd, arg);
1930 2158
1931 case XFS_IOC_FSGEOMETRY_V1: 2159 case XFS_IOC_FSGEOMETRY_V1:
1932 return xfs_ioc_fsgeometry(mp, arg, 3); 2160 return xfs_ioc_fsgeometry(mp, arg, 3);
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index 4b17f67c888a..654c0bb1bcf8 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -77,4 +77,12 @@ xfs_set_dmattrs(
77 uint evmask, 77 uint evmask,
78 uint16_t state); 78 uint16_t state);
79 79
80struct xfs_ibulk;
81struct xfs_bstat;
82struct xfs_inogrp;
83
84int xfs_fsbulkstat_one_fmt(struct xfs_ibulk *breq,
85 const struct xfs_bulkstat *bstat);
86int xfs_fsinumbers_fmt(struct xfs_ibulk *breq, const struct xfs_inumbers *igrp);
87
80#endif 88#endif
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 614fc6886d24..7fcf7569743f 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -3,23 +3,19 @@
3 * Copyright (c) 2004-2005 Silicon Graphics, Inc. 3 * Copyright (c) 2004-2005 Silicon Graphics, Inc.
4 * All Rights Reserved. 4 * All Rights Reserved.
5 */ 5 */
6#include <linux/compat.h>
7#include <linux/ioctl.h>
8#include <linux/mount.h> 6#include <linux/mount.h>
9#include <linux/slab.h>
10#include <linux/uaccess.h>
11#include <linux/fsmap.h> 7#include <linux/fsmap.h>
12#include "xfs.h" 8#include "xfs.h"
13#include "xfs_fs.h" 9#include "xfs_fs.h"
10#include "xfs_shared.h"
14#include "xfs_format.h" 11#include "xfs_format.h"
15#include "xfs_log_format.h" 12#include "xfs_log_format.h"
16#include "xfs_trans_resv.h" 13#include "xfs_trans_resv.h"
17#include "xfs_mount.h" 14#include "xfs_mount.h"
18#include "xfs_inode.h" 15#include "xfs_inode.h"
16#include "xfs_iwalk.h"
19#include "xfs_itable.h" 17#include "xfs_itable.h"
20#include "xfs_error.h"
21#include "xfs_fsops.h" 18#include "xfs_fsops.h"
22#include "xfs_alloc.h"
23#include "xfs_rtalloc.h" 19#include "xfs_rtalloc.h"
24#include "xfs_attr.h" 20#include "xfs_attr.h"
25#include "xfs_ioctl.h" 21#include "xfs_ioctl.h"
@@ -84,27 +80,26 @@ xfs_compat_growfs_rt_copyin(
84} 80}
85 81
86STATIC int 82STATIC int
87xfs_inumbers_fmt_compat( 83xfs_fsinumbers_fmt_compat(
88 void __user *ubuffer, 84 struct xfs_ibulk *breq,
89 const struct xfs_inogrp *buffer, 85 const struct xfs_inumbers *ig)
90 long count,
91 long *written)
92{ 86{
93 compat_xfs_inogrp_t __user *p32 = ubuffer; 87 struct compat_xfs_inogrp __user *p32 = breq->ubuffer;
94 long i; 88 struct xfs_inogrp ig1;
89 struct xfs_inogrp *igrp = &ig1;
95 90
96 for (i = 0; i < count; i++) { 91 xfs_inumbers_to_inogrp(&ig1, ig);
97 if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || 92
98 put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || 93 if (put_user(igrp->xi_startino, &p32->xi_startino) ||
99 put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) 94 put_user(igrp->xi_alloccount, &p32->xi_alloccount) ||
100 return -EFAULT; 95 put_user(igrp->xi_allocmask, &p32->xi_allocmask))
101 } 96 return -EFAULT;
102 *written = count * sizeof(*p32); 97
103 return 0; 98 return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_inogrp));
104} 99}
105 100
106#else 101#else
107#define xfs_inumbers_fmt_compat xfs_inumbers_fmt 102#define xfs_fsinumbers_fmt_compat xfs_fsinumbers_fmt
108#endif /* BROKEN_X86_ALIGNMENT */ 103#endif /* BROKEN_X86_ALIGNMENT */
109 104
110STATIC int 105STATIC int
@@ -121,11 +116,14 @@ xfs_ioctl32_bstime_copyin(
121 return 0; 116 return 0;
122} 117}
123 118
124/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */ 119/*
120 * struct xfs_bstat has differing alignment on intel, & bstime_t sizes
121 * everywhere
122 */
125STATIC int 123STATIC int
126xfs_ioctl32_bstat_copyin( 124xfs_ioctl32_bstat_copyin(
127 xfs_bstat_t *bstat, 125 struct xfs_bstat *bstat,
128 compat_xfs_bstat_t __user *bstat32) 126 struct compat_xfs_bstat __user *bstat32)
129{ 127{
130 if (get_user(bstat->bs_ino, &bstat32->bs_ino) || 128 if (get_user(bstat->bs_ino, &bstat32->bs_ino) ||
131 get_user(bstat->bs_mode, &bstat32->bs_mode) || 129 get_user(bstat->bs_mode, &bstat32->bs_mode) ||
@@ -171,16 +169,15 @@ xfs_bstime_store_compat(
171 169
172/* Return 0 on success or positive error (to xfs_bulkstat()) */ 170/* Return 0 on success or positive error (to xfs_bulkstat()) */
173STATIC int 171STATIC int
174xfs_bulkstat_one_fmt_compat( 172xfs_fsbulkstat_one_fmt_compat(
175 void __user *ubuffer, 173 struct xfs_ibulk *breq,
176 int ubsize, 174 const struct xfs_bulkstat *bstat)
177 int *ubused,
178 const xfs_bstat_t *buffer)
179{ 175{
180 compat_xfs_bstat_t __user *p32 = ubuffer; 176 struct compat_xfs_bstat __user *p32 = breq->ubuffer;
177 struct xfs_bstat bs1;
178 struct xfs_bstat *buffer = &bs1;
181 179
182 if (ubsize < sizeof(*p32)) 180 xfs_bulkstat_to_bstat(breq->mp, &bs1, bstat);
183 return -ENOMEM;
184 181
185 if (put_user(buffer->bs_ino, &p32->bs_ino) || 182 if (put_user(buffer->bs_ino, &p32->bs_ino) ||
186 put_user(buffer->bs_mode, &p32->bs_mode) || 183 put_user(buffer->bs_mode, &p32->bs_mode) ||
@@ -205,37 +202,24 @@ xfs_bulkstat_one_fmt_compat(
205 put_user(buffer->bs_dmstate, &p32->bs_dmstate) || 202 put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
206 put_user(buffer->bs_aextents, &p32->bs_aextents)) 203 put_user(buffer->bs_aextents, &p32->bs_aextents))
207 return -EFAULT; 204 return -EFAULT;
208 if (ubused)
209 *ubused = sizeof(*p32);
210 return 0;
211}
212 205
213STATIC int 206 return xfs_ibulk_advance(breq, sizeof(struct compat_xfs_bstat));
214xfs_bulkstat_one_compat(
215 xfs_mount_t *mp, /* mount point for filesystem */
216 xfs_ino_t ino, /* inode number to get data for */
217 void __user *buffer, /* buffer to place output in */
218 int ubsize, /* size of buffer */
219 int *ubused, /* bytes used by me */
220 int *stat) /* BULKSTAT_RV_... */
221{
222 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
223 xfs_bulkstat_one_fmt_compat,
224 ubused, stat);
225} 207}
226 208
227/* copied from xfs_ioctl.c */ 209/* copied from xfs_ioctl.c */
228STATIC int 210STATIC int
229xfs_compat_ioc_bulkstat( 211xfs_compat_ioc_fsbulkstat(
230 xfs_mount_t *mp, 212 xfs_mount_t *mp,
231 unsigned int cmd, 213 unsigned int cmd,
232 compat_xfs_fsop_bulkreq_t __user *p32) 214 struct compat_xfs_fsop_bulkreq __user *p32)
233{ 215{
234 u32 addr; 216 u32 addr;
235 xfs_fsop_bulkreq_t bulkreq; 217 struct xfs_fsop_bulkreq bulkreq;
236 int count; /* # of records returned */ 218 struct xfs_ibulk breq = {
237 xfs_ino_t inlast; /* last inode number */ 219 .mp = mp,
238 int done; 220 .ocount = 0,
221 };
222 xfs_ino_t lastino;
239 int error; 223 int error;
240 224
241 /* 225 /*
@@ -244,9 +228,8 @@ xfs_compat_ioc_bulkstat(
244 * to userpace memory via bulkreq.ubuffer. Normally the compat 228 * to userpace memory via bulkreq.ubuffer. Normally the compat
245 * functions and structure size are the correct ones to use ... 229 * functions and structure size are the correct ones to use ...
246 */ 230 */
247 inumbers_fmt_pf inumbers_func = xfs_inumbers_fmt_compat; 231 inumbers_fmt_pf inumbers_func = xfs_fsinumbers_fmt_compat;
248 bulkstat_one_pf bs_one_func = xfs_bulkstat_one_compat; 232 bulkstat_one_fmt_pf bs_one_func = xfs_fsbulkstat_one_fmt_compat;
249 size_t bs_one_size = sizeof(struct compat_xfs_bstat);
250 233
251#ifdef CONFIG_X86_X32 234#ifdef CONFIG_X86_X32
252 if (in_x32_syscall()) { 235 if (in_x32_syscall()) {
@@ -258,9 +241,8 @@ xfs_compat_ioc_bulkstat(
258 * the data written out in compat layout will not match what 241 * the data written out in compat layout will not match what
259 * x32 userspace expects. 242 * x32 userspace expects.
260 */ 243 */
261 inumbers_func = xfs_inumbers_fmt; 244 inumbers_func = xfs_fsinumbers_fmt;
262 bs_one_func = xfs_bulkstat_one; 245 bs_one_func = xfs_fsbulkstat_one_fmt;
263 bs_one_size = sizeof(struct xfs_bstat);
264 } 246 }
265#endif 247#endif
266 248
@@ -284,40 +266,55 @@ xfs_compat_ioc_bulkstat(
284 return -EFAULT; 266 return -EFAULT;
285 bulkreq.ocount = compat_ptr(addr); 267 bulkreq.ocount = compat_ptr(addr);
286 268
287 if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) 269 if (copy_from_user(&lastino, bulkreq.lastip, sizeof(__s64)))
288 return -EFAULT; 270 return -EFAULT;
289 271
290 if ((count = bulkreq.icount) <= 0) 272 if (bulkreq.icount <= 0)
291 return -EINVAL; 273 return -EINVAL;
292 274
293 if (bulkreq.ubuffer == NULL) 275 if (bulkreq.ubuffer == NULL)
294 return -EINVAL; 276 return -EINVAL;
295 277
278 breq.ubuffer = bulkreq.ubuffer;
279 breq.icount = bulkreq.icount;
280
281 /*
282 * FSBULKSTAT_SINGLE expects that *lastip contains the inode number
283 * that we want to stat. However, FSINUMBERS and FSBULKSTAT expect
284 * that *lastip contains either zero or the number of the last inode to
285 * be examined by the previous call and return results starting with
286 * the next inode after that. The new bulk request back end functions
287 * take the inode to start with, so we have to compute the startino
288 * parameter from lastino to maintain correct function. lastino == 0
289 * is a special case because it has traditionally meant "first inode
290 * in filesystem".
291 */
296 if (cmd == XFS_IOC_FSINUMBERS_32) { 292 if (cmd == XFS_IOC_FSINUMBERS_32) {
297 error = xfs_inumbers(mp, &inlast, &count, 293 breq.startino = lastino ? lastino + 1 : 0;
298 bulkreq.ubuffer, inumbers_func); 294 error = xfs_inumbers(&breq, inumbers_func);
295 lastino = breq.startino - 1;
299 } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { 296 } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
300 int res; 297 breq.startino = lastino;
301 298 breq.icount = 1;
302 error = bs_one_func(mp, inlast, bulkreq.ubuffer, 299 error = xfs_bulkstat_one(&breq, bs_one_func);
303 bs_one_size, NULL, &res); 300 lastino = breq.startino;
304 } else if (cmd == XFS_IOC_FSBULKSTAT_32) { 301 } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
305 error = xfs_bulkstat(mp, &inlast, &count, 302 breq.startino = lastino ? lastino + 1 : 0;
306 bs_one_func, bs_one_size, 303 error = xfs_bulkstat(&breq, bs_one_func);
307 bulkreq.ubuffer, &done); 304 lastino = breq.startino - 1;
308 } else 305 } else {
309 error = -EINVAL; 306 error = -EINVAL;
307 }
310 if (error) 308 if (error)
311 return error; 309 return error;
312 310
313 if (bulkreq.ocount != NULL) { 311 if (bulkreq.lastip != NULL &&
314 if (copy_to_user(bulkreq.lastip, &inlast, 312 copy_to_user(bulkreq.lastip, &lastino, sizeof(xfs_ino_t)))
315 sizeof(xfs_ino_t))) 313 return -EFAULT;
316 return -EFAULT;
317 314
318 if (copy_to_user(bulkreq.ocount, &count, sizeof(count))) 315 if (bulkreq.ocount != NULL &&
319 return -EFAULT; 316 copy_to_user(bulkreq.ocount, &breq.ocount, sizeof(__s32)))
320 } 317 return -EFAULT;
321 318
322 return 0; 319 return 0;
323} 320}
@@ -577,6 +574,8 @@ xfs_file_compat_ioctl(
577 case XFS_IOC_ERROR_CLEARALL: 574 case XFS_IOC_ERROR_CLEARALL:
578 case FS_IOC_GETFSMAP: 575 case FS_IOC_GETFSMAP:
579 case XFS_IOC_SCRUB_METADATA: 576 case XFS_IOC_SCRUB_METADATA:
577 case XFS_IOC_BULKSTAT:
578 case XFS_IOC_INUMBERS:
580 return xfs_file_ioctl(filp, cmd, p); 579 return xfs_file_ioctl(filp, cmd, p);
581#if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32) 580#if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32)
582 /* 581 /*
@@ -674,7 +673,7 @@ xfs_file_compat_ioctl(
674 case XFS_IOC_FSBULKSTAT_32: 673 case XFS_IOC_FSBULKSTAT_32:
675 case XFS_IOC_FSBULKSTAT_SINGLE_32: 674 case XFS_IOC_FSBULKSTAT_SINGLE_32:
676 case XFS_IOC_FSINUMBERS_32: 675 case XFS_IOC_FSINUMBERS_32:
677 return xfs_compat_ioc_bulkstat(mp, cmd, arg); 676 return xfs_compat_ioc_fsbulkstat(mp, cmd, arg);
678 case XFS_IOC_FD_TO_HANDLE_32: 677 case XFS_IOC_FD_TO_HANDLE_32:
679 case XFS_IOC_PATH_TO_HANDLE_32: 678 case XFS_IOC_PATH_TO_HANDLE_32:
680 case XFS_IOC_PATH_TO_FSHANDLE_32: { 679 case XFS_IOC_PATH_TO_FSHANDLE_32: {
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
index d28fa824284a..7985344d3aa6 100644
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h
@@ -36,7 +36,7 @@ typedef struct compat_xfs_bstime {
36 __s32 tv_nsec; /* and nanoseconds */ 36 __s32 tv_nsec; /* and nanoseconds */
37} compat_xfs_bstime_t; 37} compat_xfs_bstime_t;
38 38
39typedef struct compat_xfs_bstat { 39struct compat_xfs_bstat {
40 __u64 bs_ino; /* inode number */ 40 __u64 bs_ino; /* inode number */
41 __u16 bs_mode; /* type and mode */ 41 __u16 bs_mode; /* type and mode */
42 __u16 bs_nlink; /* number of links */ 42 __u16 bs_nlink; /* number of links */
@@ -61,14 +61,14 @@ typedef struct compat_xfs_bstat {
61 __u32 bs_dmevmask; /* DMIG event mask */ 61 __u32 bs_dmevmask; /* DMIG event mask */
62 __u16 bs_dmstate; /* DMIG state info */ 62 __u16 bs_dmstate; /* DMIG state info */
63 __u16 bs_aextents; /* attribute number of extents */ 63 __u16 bs_aextents; /* attribute number of extents */
64} __compat_packed compat_xfs_bstat_t; 64} __compat_packed;
65 65
66typedef struct compat_xfs_fsop_bulkreq { 66struct compat_xfs_fsop_bulkreq {
67 compat_uptr_t lastip; /* last inode # pointer */ 67 compat_uptr_t lastip; /* last inode # pointer */
68 __s32 icount; /* count of entries in buffer */ 68 __s32 icount; /* count of entries in buffer */
69 compat_uptr_t ubuffer; /* user buffer for inode desc. */ 69 compat_uptr_t ubuffer; /* user buffer for inode desc. */
70 compat_uptr_t ocount; /* output count pointer */ 70 compat_uptr_t ocount; /* output count pointer */
71} compat_xfs_fsop_bulkreq_t; 71};
72 72
73#define XFS_IOC_FSBULKSTAT_32 \ 73#define XFS_IOC_FSBULKSTAT_32 \
74 _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) 74 _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
@@ -106,7 +106,7 @@ typedef struct compat_xfs_swapext {
106 xfs_off_t sx_offset; /* offset into file */ 106 xfs_off_t sx_offset; /* offset into file */
107 xfs_off_t sx_length; /* leng from offset */ 107 xfs_off_t sx_length; /* leng from offset */
108 char sx_pad[16]; /* pad space, unused */ 108 char sx_pad[16]; /* pad space, unused */
109 compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */ 109 struct compat_xfs_bstat sx_stat; /* stat of target b4 copy */
110} __compat_packed compat_xfs_swapext_t; 110} __compat_packed compat_xfs_swapext_t;
111 111
112#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) 112#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext)
@@ -201,11 +201,11 @@ typedef struct compat_xfs_fsop_geom_v1 {
201#define XFS_IOC_FSGEOMETRY_V1_32 \ 201#define XFS_IOC_FSGEOMETRY_V1_32 \
202 _IOR('X', 100, struct compat_xfs_fsop_geom_v1) 202 _IOR('X', 100, struct compat_xfs_fsop_geom_v1)
203 203
204typedef struct compat_xfs_inogrp { 204struct compat_xfs_inogrp {
205 __u64 xi_startino; /* starting inode number */ 205 __u64 xi_startino; /* starting inode number */
206 __s32 xi_alloccount; /* # bits set in allocmask */ 206 __s32 xi_alloccount; /* # bits set in allocmask */
207 __u64 xi_allocmask; /* mask of allocated inodes */ 207 __u64 xi_allocmask; /* mask of allocated inodes */
208} __attribute__((packed)) compat_xfs_inogrp_t; 208} __attribute__((packed));
209 209
210/* These growfs input structures have padding on the end, so must translate */ 210/* These growfs input structures have padding on the end, so must translate */
211typedef struct compat_xfs_growfs_data { 211typedef struct compat_xfs_growfs_data {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 63d323916bba..3a4310d7cb59 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -4,7 +4,6 @@
4 * Copyright (c) 2016-2018 Christoph Hellwig. 4 * Copyright (c) 2016-2018 Christoph Hellwig.
5 * All Rights Reserved. 5 * All Rights Reserved.
6 */ 6 */
7#include <linux/iomap.h>
8#include "xfs.h" 7#include "xfs.h"
9#include "xfs_fs.h" 8#include "xfs_fs.h"
10#include "xfs_shared.h" 9#include "xfs_shared.h"
@@ -12,7 +11,6 @@
12#include "xfs_log_format.h" 11#include "xfs_log_format.h"
13#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
14#include "xfs_mount.h" 13#include "xfs_mount.h"
15#include "xfs_defer.h"
16#include "xfs_inode.h" 14#include "xfs_inode.h"
17#include "xfs_btree.h" 15#include "xfs_btree.h"
18#include "xfs_bmap_btree.h" 16#include "xfs_bmap_btree.h"
@@ -25,7 +23,6 @@
25#include "xfs_inode_item.h" 23#include "xfs_inode_item.h"
26#include "xfs_iomap.h" 24#include "xfs_iomap.h"
27#include "xfs_trace.h" 25#include "xfs_trace.h"
28#include "xfs_icache.h"
29#include "xfs_quota.h" 26#include "xfs_quota.h"
30#include "xfs_dquot_item.h" 27#include "xfs_dquot_item.h"
31#include "xfs_dquot.h" 28#include "xfs_dquot.h"
@@ -779,7 +776,7 @@ xfs_iomap_write_unwritten(
779 * complete here and might deadlock on the iolock. 776 * complete here and might deadlock on the iolock.
780 */ 777 */
781 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 778 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
782 XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp); 779 XFS_TRANS_RESERVE, &tp);
783 if (error) 780 if (error)
784 return error; 781 return error;
785 782
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 74047bd0c1ae..ff3c1fae5357 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -10,30 +10,20 @@
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_mount.h" 12#include "xfs_mount.h"
13#include "xfs_da_format.h"
14#include "xfs_inode.h" 13#include "xfs_inode.h"
15#include "xfs_bmap.h"
16#include "xfs_bmap_util.h"
17#include "xfs_acl.h" 14#include "xfs_acl.h"
18#include "xfs_quota.h" 15#include "xfs_quota.h"
19#include "xfs_error.h"
20#include "xfs_attr.h" 16#include "xfs_attr.h"
21#include "xfs_trans.h" 17#include "xfs_trans.h"
22#include "xfs_trace.h" 18#include "xfs_trace.h"
23#include "xfs_icache.h" 19#include "xfs_icache.h"
24#include "xfs_symlink.h" 20#include "xfs_symlink.h"
25#include "xfs_da_btree.h"
26#include "xfs_dir2.h" 21#include "xfs_dir2.h"
27#include "xfs_trans_space.h"
28#include "xfs_iomap.h" 22#include "xfs_iomap.h"
29#include "xfs_defer.h"
30 23
31#include <linux/capability.h>
32#include <linux/xattr.h> 24#include <linux/xattr.h>
33#include <linux/posix_acl.h> 25#include <linux/posix_acl.h>
34#include <linux/security.h> 26#include <linux/security.h>
35#include <linux/iomap.h>
36#include <linux/slab.h>
37#include <linux/iversion.h> 27#include <linux/iversion.h>
38 28
39/* 29/*
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 1e1a0af1dd34..a8a06bb78ea8 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -14,46 +14,66 @@
14#include "xfs_btree.h" 14#include "xfs_btree.h"
15#include "xfs_ialloc.h" 15#include "xfs_ialloc.h"
16#include "xfs_ialloc_btree.h" 16#include "xfs_ialloc_btree.h"
17#include "xfs_iwalk.h"
17#include "xfs_itable.h" 18#include "xfs_itable.h"
18#include "xfs_error.h" 19#include "xfs_error.h"
19#include "xfs_trace.h"
20#include "xfs_icache.h" 20#include "xfs_icache.h"
21#include "xfs_health.h" 21#include "xfs_health.h"
22 22
23/* 23/*
24 * Return stat information for one inode. 24 * Bulk Stat
25 * Return 0 if ok, else errno. 25 * =========
26 *
27 * Use the inode walking functions to fill out struct xfs_bulkstat for every
28 * allocated inode, then pass the stat information to some externally provided
29 * iteration function.
26 */ 30 */
27int 31
32struct xfs_bstat_chunk {
33 bulkstat_one_fmt_pf formatter;
34 struct xfs_ibulk *breq;
35 struct xfs_bulkstat *buf;
36};
37
38/*
39 * Fill out the bulkstat info for a single inode and report it somewhere.
40 *
41 * bc->breq->lastino is effectively the inode cursor as we walk through the
42 * filesystem. Therefore, we update it any time we need to move the cursor
43 * forward, regardless of whether or not we're sending any bstat information
44 * back to userspace. If the inode is internal metadata or, has been freed
45 * out from under us, we just simply keep going.
46 *
47 * However, if any other type of error happens we want to stop right where we
48 * are so that userspace will call back with exact number of the bad inode and
49 * we can send back an error code.
50 *
51 * Note that if the formatter tells us there's no space left in the buffer we
52 * move the cursor forward and abort the walk.
53 */
54STATIC int
28xfs_bulkstat_one_int( 55xfs_bulkstat_one_int(
29 struct xfs_mount *mp, /* mount point for filesystem */ 56 struct xfs_mount *mp,
30 xfs_ino_t ino, /* inode to get data for */ 57 struct xfs_trans *tp,
31 void __user *buffer, /* buffer to place output in */ 58 xfs_ino_t ino,
32 int ubsize, /* size of buffer */ 59 struct xfs_bstat_chunk *bc)
33 bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
34 int *ubused, /* bytes used by me */
35 int *stat) /* BULKSTAT_RV_... */
36{ 60{
37 struct xfs_icdinode *dic; /* dinode core info pointer */ 61 struct xfs_icdinode *dic; /* dinode core info pointer */
38 struct xfs_inode *ip; /* incore inode pointer */ 62 struct xfs_inode *ip; /* incore inode pointer */
39 struct inode *inode; 63 struct inode *inode;
40 struct xfs_bstat *buf; /* return buffer */ 64 struct xfs_bulkstat *buf = bc->buf;
41 int error = 0; /* error value */ 65 int error = -EINVAL;
42 66
43 *stat = BULKSTAT_RV_NOTHING; 67 if (xfs_internal_inum(mp, ino))
68 goto out_advance;
44 69
45 if (!buffer || xfs_internal_inum(mp, ino)) 70 error = xfs_iget(mp, tp, ino,
46 return -EINVAL;
47
48 buf = kmem_zalloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
49 if (!buf)
50 return -ENOMEM;
51
52 error = xfs_iget(mp, NULL, ino,
53 (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), 71 (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
54 XFS_ILOCK_SHARED, &ip); 72 XFS_ILOCK_SHARED, &ip);
73 if (error == -ENOENT || error == -EINVAL)
74 goto out_advance;
55 if (error) 75 if (error)
56 goto out_free; 76 goto out;
57 77
58 ASSERT(ip != NULL); 78 ASSERT(ip != NULL);
59 ASSERT(ip->i_imap.im_blkno != 0); 79 ASSERT(ip->i_imap.im_blkno != 0);
@@ -64,37 +84,35 @@ xfs_bulkstat_one_int(
64 /* xfs_iget returns the following without needing 84 /* xfs_iget returns the following without needing
65 * further change. 85 * further change.
66 */ 86 */
67 buf->bs_projid_lo = dic->di_projid_lo; 87 buf->bs_projectid = xfs_get_projid(ip);
68 buf->bs_projid_hi = dic->di_projid_hi;
69 buf->bs_ino = ino; 88 buf->bs_ino = ino;
70 buf->bs_uid = dic->di_uid; 89 buf->bs_uid = dic->di_uid;
71 buf->bs_gid = dic->di_gid; 90 buf->bs_gid = dic->di_gid;
72 buf->bs_size = dic->di_size; 91 buf->bs_size = dic->di_size;
73 92
74 buf->bs_nlink = inode->i_nlink; 93 buf->bs_nlink = inode->i_nlink;
75 buf->bs_atime.tv_sec = inode->i_atime.tv_sec; 94 buf->bs_atime = inode->i_atime.tv_sec;
76 buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec; 95 buf->bs_atime_nsec = inode->i_atime.tv_nsec;
77 buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec; 96 buf->bs_mtime = inode->i_mtime.tv_sec;
78 buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec; 97 buf->bs_mtime_nsec = inode->i_mtime.tv_nsec;
79 buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec; 98 buf->bs_ctime = inode->i_ctime.tv_sec;
80 buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec; 99 buf->bs_ctime_nsec = inode->i_ctime.tv_nsec;
100 buf->bs_btime = dic->di_crtime.t_sec;
101 buf->bs_btime_nsec = dic->di_crtime.t_nsec;
81 buf->bs_gen = inode->i_generation; 102 buf->bs_gen = inode->i_generation;
82 buf->bs_mode = inode->i_mode; 103 buf->bs_mode = inode->i_mode;
83 104
84 buf->bs_xflags = xfs_ip2xflags(ip); 105 buf->bs_xflags = xfs_ip2xflags(ip);
85 buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog; 106 buf->bs_extsize_blks = dic->di_extsize;
86 buf->bs_extents = dic->di_nextents; 107 buf->bs_extents = dic->di_nextents;
87 memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
88 xfs_bulkstat_health(ip, buf); 108 xfs_bulkstat_health(ip, buf);
89 buf->bs_dmevmask = dic->di_dmevmask;
90 buf->bs_dmstate = dic->di_dmstate;
91 buf->bs_aextents = dic->di_anextents; 109 buf->bs_aextents = dic->di_anextents;
92 buf->bs_forkoff = XFS_IFORK_BOFF(ip); 110 buf->bs_forkoff = XFS_IFORK_BOFF(ip);
111 buf->bs_version = XFS_BULKSTAT_VERSION_V5;
93 112
94 if (dic->di_version == 3) { 113 if (dic->di_version == 3) {
95 if (dic->di_flags2 & XFS_DIFLAG2_COWEXTSIZE) 114 if (dic->di_flags2 & XFS_DIFLAG2_COWEXTSIZE)
96 buf->bs_cowextsize = dic->di_cowextsize << 115 buf->bs_cowextsize_blks = dic->di_cowextsize;
97 mp->m_sb.sb_blocklog;
98 } 116 }
99 117
100 switch (dic->di_format) { 118 switch (dic->di_format) {
@@ -118,385 +136,121 @@ xfs_bulkstat_one_int(
118 xfs_iunlock(ip, XFS_ILOCK_SHARED); 136 xfs_iunlock(ip, XFS_ILOCK_SHARED);
119 xfs_irele(ip); 137 xfs_irele(ip);
120 138
121 error = formatter(buffer, ubsize, ubused, buf); 139 error = bc->formatter(bc->breq, buf);
122 if (!error) 140 if (error == XFS_IBULK_ABORT)
123 *stat = BULKSTAT_RV_DIDONE; 141 goto out_advance;
142 if (error)
143 goto out;
124 144
125 out_free: 145out_advance:
126 kmem_free(buf); 146 /*
147 * Advance the cursor to the inode that comes after the one we just
148 * looked at. We want the caller to move along if the bulkstat
149 * information was copied successfully; if we tried to grab the inode
150 * but it's no longer allocated; or if it's internal metadata.
151 */
152 bc->breq->startino = ino + 1;
153out:
127 return error; 154 return error;
128} 155}
129 156
130/* Return 0 on success or positive error */ 157/* Bulkstat a single inode. */
131STATIC int
132xfs_bulkstat_one_fmt(
133 void __user *ubuffer,
134 int ubsize,
135 int *ubused,
136 const xfs_bstat_t *buffer)
137{
138 if (ubsize < sizeof(*buffer))
139 return -ENOMEM;
140 if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
141 return -EFAULT;
142 if (ubused)
143 *ubused = sizeof(*buffer);
144 return 0;
145}
146
147int 158int
148xfs_bulkstat_one( 159xfs_bulkstat_one(
149 xfs_mount_t *mp, /* mount point for filesystem */ 160 struct xfs_ibulk *breq,
150 xfs_ino_t ino, /* inode number to get data for */ 161 bulkstat_one_fmt_pf formatter)
151 void __user *buffer, /* buffer to place output in */
152 int ubsize, /* size of buffer */
153 int *ubused, /* bytes used by me */
154 int *stat) /* BULKSTAT_RV_... */
155{ 162{
156 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, 163 struct xfs_bstat_chunk bc = {
157 xfs_bulkstat_one_fmt, ubused, stat); 164 .formatter = formatter,
158} 165 .breq = breq,
166 };
167 int error;
159 168
160/* 169 ASSERT(breq->icount == 1);
161 * Loop over all clusters in a chunk for a given incore inode allocation btree
162 * record. Do a readahead if there are any allocated inodes in that cluster.
163 */
164STATIC void
165xfs_bulkstat_ichunk_ra(
166 struct xfs_mount *mp,
167 xfs_agnumber_t agno,
168 struct xfs_inobt_rec_incore *irec)
169{
170 xfs_agblock_t agbno;
171 struct blk_plug plug;
172 int i; /* inode chunk index */
173
174 agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
175
176 blk_start_plug(&plug);
177 for (i = 0; i < XFS_INODES_PER_CHUNK;
178 i += mp->m_inodes_per_cluster, agbno += mp->m_blocks_per_cluster) {
179 if (xfs_inobt_maskn(i, mp->m_inodes_per_cluster) &
180 ~irec->ir_free) {
181 xfs_btree_reada_bufs(mp, agno, agbno,
182 mp->m_blocks_per_cluster,
183 &xfs_inode_buf_ops);
184 }
185 }
186 blk_finish_plug(&plug);
187}
188 170
189/* 171 bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
190 * Lookup the inode chunk that the given inode lives in and then get the record 172 KM_SLEEP | KM_MAYFAIL);
191 * if we found the chunk. If the inode was not the last in the chunk and there 173 if (!bc.buf)
192 * are some left allocated, update the data for the pointed-to record as well as 174 return -ENOMEM;
193 * return the count of grabbed inodes.
194 */
195STATIC int
196xfs_bulkstat_grab_ichunk(
197 struct xfs_btree_cur *cur, /* btree cursor */
198 xfs_agino_t agino, /* starting inode of chunk */
199 int *icount,/* return # of inodes grabbed */
200 struct xfs_inobt_rec_incore *irec) /* btree record */
201{
202 int idx; /* index into inode chunk */
203 int stat;
204 int error = 0;
205
206 /* Lookup the inode chunk that this inode lives in */
207 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &stat);
208 if (error)
209 return error;
210 if (!stat) {
211 *icount = 0;
212 return error;
213 }
214 175
215 /* Get the record, should always work */ 176 error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc);
216 error = xfs_inobt_get_rec(cur, irec, &stat);
217 if (error)
218 return error;
219 XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, stat == 1);
220 177
221 /* Check if the record contains the inode in request */ 178 kmem_free(bc.buf);
222 if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino) {
223 *icount = 0;
224 return 0;
225 }
226 179
227 idx = agino - irec->ir_startino + 1; 180 /*
228 if (idx < XFS_INODES_PER_CHUNK && 181 * If we reported one inode to userspace then we abort because we hit
229 (xfs_inobt_maskn(idx, XFS_INODES_PER_CHUNK - idx) & ~irec->ir_free)) { 182 * the end of the buffer. Don't leak that back to userspace.
230 int i; 183 */
231 184 if (error == XFS_IWALK_ABORT)
232 /* We got a right chunk with some left inodes allocated at it. 185 error = 0;
233 * Grab the chunk record. Mark all the uninteresting inodes
234 * free -- because they're before our start point.
235 */
236 for (i = 0; i < idx; i++) {
237 if (XFS_INOBT_MASK(i) & ~irec->ir_free)
238 irec->ir_freecount++;
239 }
240
241 irec->ir_free |= xfs_inobt_maskn(0, idx);
242 *icount = irec->ir_count - irec->ir_freecount;
243 }
244 186
245 return 0; 187 return error;
246} 188}
247 189
248#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
249
250struct xfs_bulkstat_agichunk {
251 char __user **ac_ubuffer;/* pointer into user's buffer */
252 int ac_ubleft; /* bytes left in user's buffer */
253 int ac_ubelem; /* spaces used in user's buffer */
254};
255
256/*
257 * Process inodes in chunk with a pointer to a formatter function
258 * that will iget the inode and fill in the appropriate structure.
259 */
260static int 190static int
261xfs_bulkstat_ag_ichunk( 191xfs_bulkstat_iwalk(
262 struct xfs_mount *mp, 192 struct xfs_mount *mp,
263 xfs_agnumber_t agno, 193 struct xfs_trans *tp,
264 struct xfs_inobt_rec_incore *irbp, 194 xfs_ino_t ino,
265 bulkstat_one_pf formatter, 195 void *data)
266 size_t statstruct_size,
267 struct xfs_bulkstat_agichunk *acp,
268 xfs_agino_t *last_agino)
269{ 196{
270 char __user **ubufp = acp->ac_ubuffer; 197 int error;
271 int chunkidx;
272 int error = 0;
273 xfs_agino_t agino = irbp->ir_startino;
274
275 for (chunkidx = 0; chunkidx < XFS_INODES_PER_CHUNK;
276 chunkidx++, agino++) {
277 int fmterror;
278 int ubused;
279
280 /* inode won't fit in buffer, we are done */
281 if (acp->ac_ubleft < statstruct_size)
282 break;
283
284 /* Skip if this inode is free */
285 if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free)
286 continue;
287
288 /* Get the inode and fill in a single buffer */
289 ubused = statstruct_size;
290 error = formatter(mp, XFS_AGINO_TO_INO(mp, agno, agino),
291 *ubufp, acp->ac_ubleft, &ubused, &fmterror);
292
293 if (fmterror == BULKSTAT_RV_GIVEUP ||
294 (error && error != -ENOENT && error != -EINVAL)) {
295 acp->ac_ubleft = 0;
296 ASSERT(error);
297 break;
298 }
299
300 /* be careful not to leak error if at end of chunk */
301 if (fmterror == BULKSTAT_RV_NOTHING || error) {
302 error = 0;
303 continue;
304 }
305
306 *ubufp += ubused;
307 acp->ac_ubleft -= ubused;
308 acp->ac_ubelem++;
309 }
310
311 /*
312 * Post-update *last_agino. At this point, agino will always point one
313 * inode past the last inode we processed successfully. Hence we
314 * substract that inode when setting the *last_agino cursor so that we
315 * return the correct cookie to userspace. On the next bulkstat call,
316 * the inode under the lastino cookie will be skipped as we have already
317 * processed it here.
318 */
319 *last_agino = agino - 1;
320 198
199 error = xfs_bulkstat_one_int(mp, tp, ino, data);
200 /* bulkstat just skips over missing inodes */
201 if (error == -ENOENT || error == -EINVAL)
202 return 0;
321 return error; 203 return error;
322} 204}
323 205
324/* 206/*
325 * Return stat information in bulk (by-inode) for the filesystem. 207 * Check the incoming lastino parameter.
208 *
209 * We allow any inode value that could map to physical space inside the
210 * filesystem because if there are no inodes there, bulkstat moves on to the
211 * next chunk. In other words, the magic agino value of zero takes us to the
212 * first chunk in the AG, and an agino value past the end of the AG takes us to
213 * the first chunk in the next AG.
214 *
215 * Therefore we can end early if the requested inode is beyond the end of the
216 * filesystem or doesn't map properly.
326 */ 217 */
327int /* error status */ 218static inline bool
328xfs_bulkstat( 219xfs_bulkstat_already_done(
329 xfs_mount_t *mp, /* mount point for filesystem */ 220 struct xfs_mount *mp,
330 xfs_ino_t *lastinop, /* last inode returned */ 221 xfs_ino_t startino)
331 int *ubcountp, /* size of buffer/count returned */
332 bulkstat_one_pf formatter, /* func that'd fill a single buf */
333 size_t statstruct_size, /* sizeof struct filling */
334 char __user *ubuffer, /* buffer with inode stats */
335 int *done) /* 1 if there are more stats to get */
336{ 222{
337 xfs_buf_t *agbp; /* agi header buffer */ 223 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
338 xfs_agino_t agino; /* inode # in allocation group */ 224 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, startino);
339 xfs_agnumber_t agno; /* allocation group number */
340 xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */
341 xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */
342 int nirbuf; /* size of irbuf */
343 int ubcount; /* size of user's buffer */
344 struct xfs_bulkstat_agichunk ac;
345 int error = 0;
346 225
347 /* 226 return agno >= mp->m_sb.sb_agcount ||
348 * Get the last inode value, see if there's nothing to do. 227 startino != XFS_AGINO_TO_INO(mp, agno, agino);
349 */ 228}
350 agno = XFS_INO_TO_AGNO(mp, *lastinop);
351 agino = XFS_INO_TO_AGINO(mp, *lastinop);
352 if (agno >= mp->m_sb.sb_agcount ||
353 *lastinop != XFS_AGINO_TO_INO(mp, agno, agino)) {
354 *done = 1;
355 *ubcountp = 0;
356 return 0;
357 }
358 229
359 ubcount = *ubcountp; /* statstruct's */ 230/* Return stat information in bulk (by-inode) for the filesystem. */
360 ac.ac_ubuffer = &ubuffer; 231int
361 ac.ac_ubleft = ubcount * statstruct_size; /* bytes */; 232xfs_bulkstat(
362 ac.ac_ubelem = 0; 233 struct xfs_ibulk *breq,
234 bulkstat_one_fmt_pf formatter)
235{
236 struct xfs_bstat_chunk bc = {
237 .formatter = formatter,
238 .breq = breq,
239 };
240 int error;
363 241
364 *ubcountp = 0; 242 if (xfs_bulkstat_already_done(breq->mp, breq->startino))
365 *done = 0; 243 return 0;
366 244
367 irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP); 245 bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
368 if (!irbuf) 246 KM_SLEEP | KM_MAYFAIL);
247 if (!bc.buf)
369 return -ENOMEM; 248 return -ENOMEM;
370 nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf);
371 249
372 /* 250 error = xfs_iwalk(breq->mp, NULL, breq->startino, breq->flags,
373 * Loop over the allocation groups, starting from the last 251 xfs_bulkstat_iwalk, breq->icount, &bc);
374 * inode returned; 0 means start of the allocation group. 252
375 */ 253 kmem_free(bc.buf);
376 while (agno < mp->m_sb.sb_agcount) {
377 struct xfs_inobt_rec_incore *irbp = irbuf;
378 struct xfs_inobt_rec_incore *irbufend = irbuf + nirbuf;
379 bool end_of_ag = false;
380 int icount = 0;
381 int stat;
382
383 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
384 if (error)
385 break;
386 /*
387 * Allocate and initialize a btree cursor for ialloc btree.
388 */
389 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
390 XFS_BTNUM_INO);
391 if (agino > 0) {
392 /*
393 * In the middle of an allocation group, we need to get
394 * the remainder of the chunk we're in.
395 */
396 struct xfs_inobt_rec_incore r;
397
398 error = xfs_bulkstat_grab_ichunk(cur, agino, &icount, &r);
399 if (error)
400 goto del_cursor;
401 if (icount) {
402 irbp->ir_startino = r.ir_startino;
403 irbp->ir_holemask = r.ir_holemask;
404 irbp->ir_count = r.ir_count;
405 irbp->ir_freecount = r.ir_freecount;
406 irbp->ir_free = r.ir_free;
407 irbp++;
408 }
409 /* Increment to the next record */
410 error = xfs_btree_increment(cur, 0, &stat);
411 } else {
412 /* Start of ag. Lookup the first inode chunk */
413 error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &stat);
414 }
415 if (error || stat == 0) {
416 end_of_ag = true;
417 goto del_cursor;
418 }
419
420 /*
421 * Loop through inode btree records in this ag,
422 * until we run out of inodes or space in the buffer.
423 */
424 while (irbp < irbufend && icount < ubcount) {
425 struct xfs_inobt_rec_incore r;
426
427 error = xfs_inobt_get_rec(cur, &r, &stat);
428 if (error || stat == 0) {
429 end_of_ag = true;
430 goto del_cursor;
431 }
432
433 /*
434 * If this chunk has any allocated inodes, save it.
435 * Also start read-ahead now for this chunk.
436 */
437 if (r.ir_freecount < r.ir_count) {
438 xfs_bulkstat_ichunk_ra(mp, agno, &r);
439 irbp->ir_startino = r.ir_startino;
440 irbp->ir_holemask = r.ir_holemask;
441 irbp->ir_count = r.ir_count;
442 irbp->ir_freecount = r.ir_freecount;
443 irbp->ir_free = r.ir_free;
444 irbp++;
445 icount += r.ir_count - r.ir_freecount;
446 }
447 error = xfs_btree_increment(cur, 0, &stat);
448 if (error || stat == 0) {
449 end_of_ag = true;
450 goto del_cursor;
451 }
452 cond_resched();
453 }
454
455 /*
456 * Drop the btree buffers and the agi buffer as we can't hold any
457 * of the locks these represent when calling iget. If there is a
458 * pending error, then we are done.
459 */
460del_cursor:
461 xfs_btree_del_cursor(cur, error);
462 xfs_buf_relse(agbp);
463 if (error)
464 break;
465 /*
466 * Now format all the good inodes into the user's buffer. The
467 * call to xfs_bulkstat_ag_ichunk() sets up the agino pointer
468 * for the next loop iteration.
469 */
470 irbufend = irbp;
471 for (irbp = irbuf;
472 irbp < irbufend && ac.ac_ubleft >= statstruct_size;
473 irbp++) {
474 error = xfs_bulkstat_ag_ichunk(mp, agno, irbp,
475 formatter, statstruct_size, &ac,
476 &agino);
477 if (error)
478 break;
479
480 cond_resched();
481 }
482
483 /*
484 * If we've run out of space or had a formatting error, we
485 * are now done
486 */
487 if (ac.ac_ubleft < statstruct_size || error)
488 break;
489
490 if (end_of_ag) {
491 agno++;
492 agino = 0;
493 }
494 }
495 /*
496 * Done, we're either out of filesystem or space to put the data.
497 */
498 kmem_free(irbuf);
499 *ubcountp = ac.ac_ubelem;
500 254
501 /* 255 /*
502 * We found some inodes, so clear the error status and return them. 256 * We found some inodes, so clear the error status and return them.
@@ -505,135 +259,136 @@ del_cursor:
505 * triggered again and propagated to userspace as there will be no 259 * triggered again and propagated to userspace as there will be no
506 * formatted inodes in the buffer. 260 * formatted inodes in the buffer.
507 */ 261 */
508 if (ac.ac_ubelem) 262 if (breq->ocount > 0)
509 error = 0; 263 error = 0;
510 264
511 /*
512 * If we ran out of filesystem, lastino will point off the end of
513 * the filesystem so the next call will return immediately.
514 */
515 *lastinop = XFS_AGINO_TO_INO(mp, agno, agino);
516 if (agno >= mp->m_sb.sb_agcount)
517 *done = 1;
518
519 return error; 265 return error;
520} 266}
521 267
522int 268/* Convert bulkstat (v5) to bstat (v1). */
523xfs_inumbers_fmt( 269void
524 void __user *ubuffer, /* buffer to write to */ 270xfs_bulkstat_to_bstat(
525 const struct xfs_inogrp *buffer, /* buffer to read from */ 271 struct xfs_mount *mp,
526 long count, /* # of elements to read */ 272 struct xfs_bstat *bs1,
527 long *written) /* # of bytes written */ 273 const struct xfs_bulkstat *bstat)
528{ 274{
529 if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer))) 275 memset(bs1, 0, sizeof(struct xfs_bstat));
530 return -EFAULT; 276 bs1->bs_ino = bstat->bs_ino;
531 *written = count * sizeof(*buffer); 277 bs1->bs_mode = bstat->bs_mode;
532 return 0; 278 bs1->bs_nlink = bstat->bs_nlink;
279 bs1->bs_uid = bstat->bs_uid;
280 bs1->bs_gid = bstat->bs_gid;
281 bs1->bs_rdev = bstat->bs_rdev;
282 bs1->bs_blksize = bstat->bs_blksize;
283 bs1->bs_size = bstat->bs_size;
284 bs1->bs_atime.tv_sec = bstat->bs_atime;
285 bs1->bs_mtime.tv_sec = bstat->bs_mtime;
286 bs1->bs_ctime.tv_sec = bstat->bs_ctime;
287 bs1->bs_atime.tv_nsec = bstat->bs_atime_nsec;
288 bs1->bs_mtime.tv_nsec = bstat->bs_mtime_nsec;
289 bs1->bs_ctime.tv_nsec = bstat->bs_ctime_nsec;
290 bs1->bs_blocks = bstat->bs_blocks;
291 bs1->bs_xflags = bstat->bs_xflags;
292 bs1->bs_extsize = XFS_FSB_TO_B(mp, bstat->bs_extsize_blks);
293 bs1->bs_extents = bstat->bs_extents;
294 bs1->bs_gen = bstat->bs_gen;
295 bs1->bs_projid_lo = bstat->bs_projectid & 0xFFFF;
296 bs1->bs_forkoff = bstat->bs_forkoff;
297 bs1->bs_projid_hi = bstat->bs_projectid >> 16;
298 bs1->bs_sick = bstat->bs_sick;
299 bs1->bs_checked = bstat->bs_checked;
300 bs1->bs_cowextsize = XFS_FSB_TO_B(mp, bstat->bs_cowextsize_blks);
301 bs1->bs_dmevmask = 0;
302 bs1->bs_dmstate = 0;
303 bs1->bs_aextents = bstat->bs_aextents;
304}
305
306struct xfs_inumbers_chunk {
307 inumbers_fmt_pf formatter;
308 struct xfs_ibulk *breq;
309};
310
311/*
312 * INUMBERS
313 * ========
314 * This is how we export inode btree records to userspace, so that XFS tools
315 * can figure out where inodes are allocated.
316 */
317
318/*
319 * Format the inode group structure and report it somewhere.
320 *
321 * Similar to xfs_bulkstat_one_int, lastino is the inode cursor as we walk
322 * through the filesystem so we move it forward unless there was a runtime
323 * error. If the formatter tells us the buffer is now full we also move the
324 * cursor forward and abort the walk.
325 */
326STATIC int
327xfs_inumbers_walk(
328 struct xfs_mount *mp,
329 struct xfs_trans *tp,
330 xfs_agnumber_t agno,
331 const struct xfs_inobt_rec_incore *irec,
332 void *data)
333{
334 struct xfs_inumbers inogrp = {
335 .xi_startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino),
336 .xi_alloccount = irec->ir_count - irec->ir_freecount,
337 .xi_allocmask = ~irec->ir_free,
338 .xi_version = XFS_INUMBERS_VERSION_V5,
339 };
340 struct xfs_inumbers_chunk *ic = data;
341 int error;
342
343 error = ic->formatter(ic->breq, &inogrp);
344 if (error && error != XFS_IBULK_ABORT)
345 return error;
346
347 ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) +
348 XFS_INODES_PER_CHUNK;
349 return error;
533} 350}
534 351
535/* 352/*
536 * Return inode number table for the filesystem. 353 * Return inode number table for the filesystem.
537 */ 354 */
538int /* error status */ 355int
539xfs_inumbers( 356xfs_inumbers(
540 struct xfs_mount *mp,/* mount point for filesystem */ 357 struct xfs_ibulk *breq,
541 xfs_ino_t *lastino,/* last inode returned */
542 int *count,/* size of buffer/count returned */
543 void __user *ubuffer,/* buffer with inode descriptions */
544 inumbers_fmt_pf formatter) 358 inumbers_fmt_pf formatter)
545{ 359{
546 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, *lastino); 360 struct xfs_inumbers_chunk ic = {
547 xfs_agino_t agino = XFS_INO_TO_AGINO(mp, *lastino); 361 .formatter = formatter,
548 struct xfs_btree_cur *cur = NULL; 362 .breq = breq,
549 struct xfs_buf *agbp = NULL; 363 };
550 struct xfs_inogrp *buffer;
551 int bcount;
552 int left = *count;
553 int bufidx = 0;
554 int error = 0; 364 int error = 0;
555 365
556 *count = 0; 366 if (xfs_bulkstat_already_done(breq->mp, breq->startino))
557 if (agno >= mp->m_sb.sb_agcount || 367 return 0;
558 *lastino != XFS_AGINO_TO_INO(mp, agno, agino))
559 return error;
560 368
561 bcount = min(left, (int)(PAGE_SIZE / sizeof(*buffer))); 369 error = xfs_inobt_walk(breq->mp, NULL, breq->startino, breq->flags,
562 buffer = kmem_zalloc(bcount * sizeof(*buffer), KM_SLEEP); 370 xfs_inumbers_walk, breq->icount, &ic);
563 do {
564 struct xfs_inobt_rec_incore r;
565 int stat;
566
567 if (!agbp) {
568 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
569 if (error)
570 break;
571
572 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
573 XFS_BTNUM_INO);
574 error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
575 &stat);
576 if (error)
577 break;
578 if (!stat)
579 goto next_ag;
580 }
581
582 error = xfs_inobt_get_rec(cur, &r, &stat);
583 if (error)
584 break;
585 if (!stat)
586 goto next_ag;
587
588 agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
589 buffer[bufidx].xi_startino =
590 XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
591 buffer[bufidx].xi_alloccount = r.ir_count - r.ir_freecount;
592 buffer[bufidx].xi_allocmask = ~r.ir_free;
593 if (++bufidx == bcount) {
594 long written;
595
596 error = formatter(ubuffer, buffer, bufidx, &written);
597 if (error)
598 break;
599 ubuffer += written;
600 *count += bufidx;
601 bufidx = 0;
602 }
603 if (!--left)
604 break;
605
606 error = xfs_btree_increment(cur, 0, &stat);
607 if (error)
608 break;
609 if (stat)
610 continue;
611
612next_ag:
613 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
614 cur = NULL;
615 xfs_buf_relse(agbp);
616 agbp = NULL;
617 agino = 0;
618 agno++;
619 } while (agno < mp->m_sb.sb_agcount);
620
621 if (!error) {
622 if (bufidx) {
623 long written;
624
625 error = formatter(ubuffer, buffer, bufidx, &written);
626 if (!error)
627 *count += bufidx;
628 }
629 *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
630 }
631 371
632 kmem_free(buffer); 372 /*
633 if (cur) 373 * We found some inode groups, so clear the error status and return
634 xfs_btree_del_cursor(cur, error); 374 * them. The lastino pointer will point directly at the inode that
635 if (agbp) 375 * triggered any error that occurred, so on the next call the error
636 xfs_buf_relse(agbp); 376 * will be triggered again and propagated to userspace as there will be
377 * no formatted inode groups in the buffer.
378 */
379 if (breq->ocount > 0)
380 error = 0;
637 381
638 return error; 382 return error;
639} 383}
384
385/* Convert an inumbers (v5) struct to a inogrp (v1) struct. */
386void
387xfs_inumbers_to_inogrp(
388 struct xfs_inogrp *ig1,
389 const struct xfs_inumbers *ig)
390{
391 ig1->xi_startino = ig->xi_startino;
392 ig1->xi_alloccount = ig->xi_alloccount;
393 ig1->xi_allocmask = ig->xi_allocmask;
394}
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 8a822285b671..e90c1fc5b981 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -5,83 +5,55 @@
5#ifndef __XFS_ITABLE_H__ 5#ifndef __XFS_ITABLE_H__
6#define __XFS_ITABLE_H__ 6#define __XFS_ITABLE_H__
7 7
8/* 8/* In-memory representation of a userspace request for batch inode data. */
9 * xfs_bulkstat() is used to fill in xfs_bstat structures as well as dm_stat 9struct xfs_ibulk {
10 * structures (by the dmi library). This is a pointer to a formatter function 10 struct xfs_mount *mp;
11 * that will iget the inode and fill in the appropriate structure. 11 void __user *ubuffer; /* user output buffer */
12 * see xfs_bulkstat_one() and xfs_dm_bulkstat_one() in dmapi_xfs.c 12 xfs_ino_t startino; /* start with this inode */
13 */ 13 unsigned int icount; /* number of elements in ubuffer */
14typedef int (*bulkstat_one_pf)(struct xfs_mount *mp, 14 unsigned int ocount; /* number of records returned */
15 xfs_ino_t ino, 15 unsigned int flags; /* see XFS_IBULK_FLAG_* */
16 void __user *buffer, 16};
17 int ubsize, 17
18 int *ubused, 18/* Only iterate within the same AG as startino */
19 int *stat); 19#define XFS_IBULK_SAME_AG (XFS_IWALK_SAME_AG)
20
21/* Return value that means we want to abort the walk. */
22#define XFS_IBULK_ABORT (XFS_IWALK_ABORT)
20 23
21/* 24/*
22 * Values for stat return value. 25 * Advance the user buffer pointer by one record of the given size. If the
26 * buffer is now full, return the appropriate error code.
23 */ 27 */
24#define BULKSTAT_RV_NOTHING 0 28static inline int
25#define BULKSTAT_RV_DIDONE 1 29xfs_ibulk_advance(
26#define BULKSTAT_RV_GIVEUP 2 30 struct xfs_ibulk *breq,
31 size_t bytes)
32{
33 char __user *b = breq->ubuffer;
34
35 breq->ubuffer = b + bytes;
36 breq->ocount++;
37 return breq->ocount == breq->icount ? XFS_IBULK_ABORT : 0;
38}
27 39
28/* 40/*
29 * Return stat information in bulk (by-inode) for the filesystem. 41 * Return stat information in bulk (by-inode) for the filesystem.
30 */ 42 */
31int /* error status */
32xfs_bulkstat(
33 xfs_mount_t *mp, /* mount point for filesystem */
34 xfs_ino_t *lastino, /* last inode returned */
35 int *count, /* size of buffer/count returned */
36 bulkstat_one_pf formatter, /* func that'd fill a single buf */
37 size_t statstruct_size,/* sizeof struct that we're filling */
38 char __user *ubuffer,/* buffer with inode stats */
39 int *done); /* 1 if there are more stats to get */
40
41typedef int (*bulkstat_one_fmt_pf)( /* used size in bytes or negative error */
42 void __user *ubuffer, /* buffer to write to */
43 int ubsize, /* remaining user buffer sz */
44 int *ubused, /* bytes used by formatter */
45 const xfs_bstat_t *buffer); /* buffer to read from */
46
47int
48xfs_bulkstat_one_int(
49 xfs_mount_t *mp,
50 xfs_ino_t ino,
51 void __user *buffer,
52 int ubsize,
53 bulkstat_one_fmt_pf formatter,
54 int *ubused,
55 int *stat);
56 43
57int 44typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq,
58xfs_bulkstat_one( 45 const struct xfs_bulkstat *bstat);
59 xfs_mount_t *mp,
60 xfs_ino_t ino,
61 void __user *buffer,
62 int ubsize,
63 int *ubused,
64 int *stat);
65 46
66typedef int (*inumbers_fmt_pf)( 47int xfs_bulkstat_one(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter);
67 void __user *ubuffer, /* buffer to write to */ 48int xfs_bulkstat(struct xfs_ibulk *breq, bulkstat_one_fmt_pf formatter);
68 const xfs_inogrp_t *buffer, /* buffer to read from */ 49void xfs_bulkstat_to_bstat(struct xfs_mount *mp, struct xfs_bstat *bs1,
69 long count, /* # of elements to read */ 50 const struct xfs_bulkstat *bstat);
70 long *written); /* # of bytes written */
71 51
72int 52typedef int (*inumbers_fmt_pf)(struct xfs_ibulk *breq,
73xfs_inumbers_fmt( 53 const struct xfs_inumbers *igrp);
74 void __user *ubuffer, /* buffer to write to */
75 const xfs_inogrp_t *buffer, /* buffer to read from */
76 long count, /* # of elements to read */
77 long *written); /* # of bytes written */
78 54
79int /* error status */ 55int xfs_inumbers(struct xfs_ibulk *breq, inumbers_fmt_pf formatter);
80xfs_inumbers( 56void xfs_inumbers_to_inogrp(struct xfs_inogrp *ig1,
81 xfs_mount_t *mp, /* mount point for filesystem */ 57 const struct xfs_inumbers *ig);
82 xfs_ino_t *last, /* last inode returned */
83 int *count, /* size of buffer/count returned */
84 void __user *buffer, /* buffer with inode info */
85 inumbers_fmt_pf formatter);
86 58
87#endif /* __XFS_ITABLE_H__ */ 59#endif /* __XFS_ITABLE_H__ */
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
new file mode 100644
index 000000000000..8c7d727149ea
--- /dev/null
+++ b/fs/xfs/xfs_iwalk.c
@@ -0,0 +1,720 @@
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2019 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_mount.h"
13#include "xfs_inode.h"
14#include "xfs_btree.h"
15#include "xfs_ialloc.h"
16#include "xfs_ialloc_btree.h"
17#include "xfs_iwalk.h"
18#include "xfs_error.h"
19#include "xfs_trace.h"
20#include "xfs_icache.h"
21#include "xfs_health.h"
22#include "xfs_trans.h"
23#include "xfs_pwork.h"
24
25/*
26 * Walking Inodes in the Filesystem
27 * ================================
28 *
29 * This iterator function walks a subset of filesystem inodes in increasing
30 * order from @startino until there are no more inodes. For each allocated
31 * inode it finds, it calls a walk function with the relevant inode number and
32 * a pointer to caller-provided data. The walk function can return the usual
33 * negative error code to stop the iteration; 0 to continue the iteration; or
34 * XFS_IWALK_ABORT to stop the iteration. This return value is returned to the
35 * caller.
36 *
37 * Internally, we allow the walk function to do anything, which means that we
38 * cannot maintain the inobt cursor or our lock on the AGI buffer. We
39 * therefore cache the inobt records in kernel memory and only call the walk
40 * function when our memory buffer is full. @nr_recs is the number of records
41 * that we've cached, and @sz_recs is the size of our cache.
42 *
43 * It is the responsibility of the walk function to ensure it accesses
44 * allocated inodes, as the inobt records may be stale by the time they are
45 * acted upon.
46 */
47
48struct xfs_iwalk_ag {
49 /* parallel work control data; will be null if single threaded */
50 struct xfs_pwork pwork;
51
52 struct xfs_mount *mp;
53 struct xfs_trans *tp;
54
55 /* Where do we start the traversal? */
56 xfs_ino_t startino;
57
58 /* Array of inobt records we cache. */
59 struct xfs_inobt_rec_incore *recs;
60
61 /* Number of entries allocated for the @recs array. */
62 unsigned int sz_recs;
63
64 /* Number of entries in the @recs array that are in use. */
65 unsigned int nr_recs;
66
67 /* Inode walk function and data pointer. */
68 xfs_iwalk_fn iwalk_fn;
69 xfs_inobt_walk_fn inobt_walk_fn;
70 void *data;
71
72 /*
73 * Make it look like the inodes up to startino are free so that
74 * bulkstat can start its inode iteration at the correct place without
75 * needing to special case everywhere.
76 */
77 unsigned int trim_start:1;
78
79 /* Skip empty inobt records? */
80 unsigned int skip_empty:1;
81};
82
83/*
84 * Loop over all clusters in a chunk for a given incore inode allocation btree
85 * record. Do a readahead if there are any allocated inodes in that cluster.
86 */
87STATIC void
88xfs_iwalk_ichunk_ra(
89 struct xfs_mount *mp,
90 xfs_agnumber_t agno,
91 struct xfs_inobt_rec_incore *irec)
92{
93 struct xfs_ino_geometry *igeo = M_IGEO(mp);
94 xfs_agblock_t agbno;
95 struct blk_plug plug;
96 int i; /* inode chunk index */
97
98 agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
99
100 blk_start_plug(&plug);
101 for (i = 0; i < XFS_INODES_PER_CHUNK; i += igeo->inodes_per_cluster) {
102 xfs_inofree_t imask;
103
104 imask = xfs_inobt_maskn(i, igeo->inodes_per_cluster);
105 if (imask & ~irec->ir_free) {
106 xfs_btree_reada_bufs(mp, agno, agbno,
107 igeo->blocks_per_cluster,
108 &xfs_inode_buf_ops);
109 }
110 agbno += igeo->blocks_per_cluster;
111 }
112 blk_finish_plug(&plug);
113}
114
115/*
116 * Set the bits in @irec's free mask that correspond to the inodes before
117 * @agino so that we skip them. This is how we restart an inode walk that was
118 * interrupted in the middle of an inode record.
119 */
120STATIC void
121xfs_iwalk_adjust_start(
122 xfs_agino_t agino, /* starting inode of chunk */
123 struct xfs_inobt_rec_incore *irec) /* btree record */
124{
125 int idx; /* index into inode chunk */
126 int i;
127
128 idx = agino - irec->ir_startino;
129
130 /*
131 * We got a right chunk with some left inodes allocated at it. Grab
132 * the chunk record. Mark all the uninteresting inodes free because
133 * they're before our start point.
134 */
135 for (i = 0; i < idx; i++) {
136 if (XFS_INOBT_MASK(i) & ~irec->ir_free)
137 irec->ir_freecount++;
138 }
139
140 irec->ir_free |= xfs_inobt_maskn(0, idx);
141}
142
143/* Allocate memory for a walk. */
144STATIC int
145xfs_iwalk_alloc(
146 struct xfs_iwalk_ag *iwag)
147{
148 size_t size;
149
150 ASSERT(iwag->recs == NULL);
151 iwag->nr_recs = 0;
152
153 /* Allocate a prefetch buffer for inobt records. */
154 size = iwag->sz_recs * sizeof(struct xfs_inobt_rec_incore);
155 iwag->recs = kmem_alloc(size, KM_MAYFAIL);
156 if (iwag->recs == NULL)
157 return -ENOMEM;
158
159 return 0;
160}
161
162/* Free memory we allocated for a walk. */
163STATIC void
164xfs_iwalk_free(
165 struct xfs_iwalk_ag *iwag)
166{
167 kmem_free(iwag->recs);
168 iwag->recs = NULL;
169}
170
171/* For each inuse inode in each cached inobt record, call our function. */
172STATIC int
173xfs_iwalk_ag_recs(
174 struct xfs_iwalk_ag *iwag)
175{
176 struct xfs_mount *mp = iwag->mp;
177 struct xfs_trans *tp = iwag->tp;
178 xfs_ino_t ino;
179 unsigned int i, j;
180 xfs_agnumber_t agno;
181 int error;
182
183 agno = XFS_INO_TO_AGNO(mp, iwag->startino);
184 for (i = 0; i < iwag->nr_recs; i++) {
185 struct xfs_inobt_rec_incore *irec = &iwag->recs[i];
186
187 trace_xfs_iwalk_ag_rec(mp, agno, irec);
188
189 if (xfs_pwork_want_abort(&iwag->pwork))
190 return 0;
191
192 if (iwag->inobt_walk_fn) {
193 error = iwag->inobt_walk_fn(mp, tp, agno, irec,
194 iwag->data);
195 if (error)
196 return error;
197 }
198
199 if (!iwag->iwalk_fn)
200 continue;
201
202 for (j = 0; j < XFS_INODES_PER_CHUNK; j++) {
203 if (xfs_pwork_want_abort(&iwag->pwork))
204 return 0;
205
206 /* Skip if this inode is free */
207 if (XFS_INOBT_MASK(j) & irec->ir_free)
208 continue;
209
210 /* Otherwise call our function. */
211 ino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino + j);
212 error = iwag->iwalk_fn(mp, tp, ino, iwag->data);
213 if (error)
214 return error;
215 }
216 }
217
218 return 0;
219}
220
221/* Delete cursor and let go of AGI. */
222static inline void
223xfs_iwalk_del_inobt(
224 struct xfs_trans *tp,
225 struct xfs_btree_cur **curpp,
226 struct xfs_buf **agi_bpp,
227 int error)
228{
229 if (*curpp) {
230 xfs_btree_del_cursor(*curpp, error);
231 *curpp = NULL;
232 }
233 if (*agi_bpp) {
234 xfs_trans_brelse(tp, *agi_bpp);
235 *agi_bpp = NULL;
236 }
237}
238
239/*
240 * Set ourselves up for walking inobt records starting from a given point in
241 * the filesystem.
242 *
243 * If caller passed in a nonzero start inode number, load the record from the
244 * inobt and make the record look like all the inodes before agino are free so
245 * that we skip them, and then move the cursor to the next inobt record. This
246 * is how we support starting an iwalk in the middle of an inode chunk.
247 *
248 * If the caller passed in a start number of zero, move the cursor to the first
249 * inobt record.
250 *
251 * The caller is responsible for cleaning up the cursor and buffer pointer
252 * regardless of the error status.
253 */
254STATIC int
255xfs_iwalk_ag_start(
256 struct xfs_iwalk_ag *iwag,
257 xfs_agnumber_t agno,
258 xfs_agino_t agino,
259 struct xfs_btree_cur **curpp,
260 struct xfs_buf **agi_bpp,
261 int *has_more)
262{
263 struct xfs_mount *mp = iwag->mp;
264 struct xfs_trans *tp = iwag->tp;
265 struct xfs_inobt_rec_incore *irec;
266 int error;
267
268 /* Set up a fresh cursor and empty the inobt cache. */
269 iwag->nr_recs = 0;
270 error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp);
271 if (error)
272 return error;
273
274 /* Starting at the beginning of the AG? That's easy! */
275 if (agino == 0)
276 return xfs_inobt_lookup(*curpp, 0, XFS_LOOKUP_GE, has_more);
277
278 /*
279 * Otherwise, we have to grab the inobt record where we left off, stuff
280 * the record into our cache, and then see if there are more records.
281 * We require a lookup cache of at least two elements so that the
282 * caller doesn't have to deal with tearing down the cursor to walk the
283 * records.
284 */
285 error = xfs_inobt_lookup(*curpp, agino, XFS_LOOKUP_LE, has_more);
286 if (error)
287 return error;
288
289 /*
290 * If the LE lookup at @agino yields no records, jump ahead to the
291 * inobt cursor increment to see if there are more records to process.
292 */
293 if (!*has_more)
294 goto out_advance;
295
296 /* Get the record, should always work */
297 irec = &iwag->recs[iwag->nr_recs];
298 error = xfs_inobt_get_rec(*curpp, irec, has_more);
299 if (error)
300 return error;
301 XFS_WANT_CORRUPTED_RETURN(mp, *has_more == 1);
302
303 /*
304 * If the LE lookup yielded an inobt record before the cursor position,
305 * skip it and see if there's another one after it.
306 */
307 if (irec->ir_startino + XFS_INODES_PER_CHUNK <= agino)
308 goto out_advance;
309
310 /*
311 * If agino fell in the middle of the inode record, make it look like
312 * the inodes up to agino are free so that we don't return them again.
313 */
314 if (iwag->trim_start)
315 xfs_iwalk_adjust_start(agino, irec);
316
317 /*
318 * The prefetch calculation is supposed to give us a large enough inobt
319 * record cache that grab_ichunk can stage a partial first record and
320 * the loop body can cache a record without having to check for cache
321 * space until after it reads an inobt record.
322 */
323 iwag->nr_recs++;
324 ASSERT(iwag->nr_recs < iwag->sz_recs);
325
326out_advance:
327 return xfs_btree_increment(*curpp, 0, has_more);
328}
329
330/*
331 * The inobt record cache is full, so preserve the inobt cursor state and
332 * run callbacks on the cached inobt records. When we're done, restore the
333 * cursor state to wherever the cursor would have been had the cache not been
334 * full (and therefore we could've just incremented the cursor) if *@has_more
335 * is true. On exit, *@has_more will indicate whether or not the caller should
336 * try for more inode records.
337 */
338STATIC int
339xfs_iwalk_run_callbacks(
340 struct xfs_iwalk_ag *iwag,
341 xfs_agnumber_t agno,
342 struct xfs_btree_cur **curpp,
343 struct xfs_buf **agi_bpp,
344 int *has_more)
345{
346 struct xfs_mount *mp = iwag->mp;
347 struct xfs_trans *tp = iwag->tp;
348 struct xfs_inobt_rec_incore *irec;
349 xfs_agino_t restart;
350 int error;
351
352 ASSERT(iwag->nr_recs > 0);
353
354 /* Delete cursor but remember the last record we cached... */
355 xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
356 irec = &iwag->recs[iwag->nr_recs - 1];
357 restart = irec->ir_startino + XFS_INODES_PER_CHUNK - 1;
358
359 error = xfs_iwalk_ag_recs(iwag);
360 if (error)
361 return error;
362
363 /* ...empty the cache... */
364 iwag->nr_recs = 0;
365
366 if (!has_more)
367 return 0;
368
369 /* ...and recreate the cursor just past where we left off. */
370 error = xfs_inobt_cur(mp, tp, agno, XFS_BTNUM_INO, curpp, agi_bpp);
371 if (error)
372 return error;
373
374 return xfs_inobt_lookup(*curpp, restart, XFS_LOOKUP_GE, has_more);
375}
376
377/* Walk all inodes in a single AG, from @iwag->startino to the end of the AG. */
378STATIC int
379xfs_iwalk_ag(
380 struct xfs_iwalk_ag *iwag)
381{
382 struct xfs_mount *mp = iwag->mp;
383 struct xfs_trans *tp = iwag->tp;
384 struct xfs_buf *agi_bp = NULL;
385 struct xfs_btree_cur *cur = NULL;
386 xfs_agnumber_t agno;
387 xfs_agino_t agino;
388 int has_more;
389 int error = 0;
390
391 /* Set up our cursor at the right place in the inode btree. */
392 agno = XFS_INO_TO_AGNO(mp, iwag->startino);
393 agino = XFS_INO_TO_AGINO(mp, iwag->startino);
394 error = xfs_iwalk_ag_start(iwag, agno, agino, &cur, &agi_bp, &has_more);
395
396 while (!error && has_more) {
397 struct xfs_inobt_rec_incore *irec;
398
399 cond_resched();
400 if (xfs_pwork_want_abort(&iwag->pwork))
401 goto out;
402
403 /* Fetch the inobt record. */
404 irec = &iwag->recs[iwag->nr_recs];
405 error = xfs_inobt_get_rec(cur, irec, &has_more);
406 if (error || !has_more)
407 break;
408
409 /* No allocated inodes in this chunk; skip it. */
410 if (iwag->skip_empty && irec->ir_freecount == irec->ir_count) {
411 error = xfs_btree_increment(cur, 0, &has_more);
412 if (error)
413 break;
414 continue;
415 }
416
417 /*
418 * Start readahead for this inode chunk in anticipation of
419 * walking the inodes.
420 */
421 if (iwag->iwalk_fn)
422 xfs_iwalk_ichunk_ra(mp, agno, irec);
423
424 /*
425 * If there's space in the buffer for more records, increment
426 * the btree cursor and grab more.
427 */
428 if (++iwag->nr_recs < iwag->sz_recs) {
429 error = xfs_btree_increment(cur, 0, &has_more);
430 if (error || !has_more)
431 break;
432 continue;
433 }
434
435 /*
436 * Otherwise, we need to save cursor state and run the callback
437 * function on the cached records. The run_callbacks function
438 * is supposed to return a cursor pointing to the record where
439 * we would be if we had been able to increment like above.
440 */
441 ASSERT(has_more);
442 error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp,
443 &has_more);
444 }
445
446 if (iwag->nr_recs == 0 || error)
447 goto out;
448
449 /* Walk the unprocessed records in the cache. */
450 error = xfs_iwalk_run_callbacks(iwag, agno, &cur, &agi_bp, &has_more);
451
452out:
453 xfs_iwalk_del_inobt(tp, &cur, &agi_bp, error);
454 return error;
455}
456
457/*
458 * We experimentally determined that the reduction in ioctl call overhead
459 * diminishes when userspace asks for more than 2048 inodes, so we'll cap
460 * prefetch at this point.
461 */
462#define IWALK_MAX_INODE_PREFETCH (2048U)
463
464/*
465 * Given the number of inodes to prefetch, set the number of inobt records that
466 * we cache in memory, which controls the number of inodes we try to read
467 * ahead. Set the maximum if @inodes == 0.
468 */
469static inline unsigned int
470xfs_iwalk_prefetch(
471 unsigned int inodes)
472{
473 unsigned int inobt_records;
474
475 /*
476 * If the caller didn't tell us the number of inodes they wanted,
477 * assume the maximum prefetch possible for best performance.
478 * Otherwise, cap prefetch at that maximum so that we don't start an
479 * absurd amount of prefetch.
480 */
481 if (inodes == 0)
482 inodes = IWALK_MAX_INODE_PREFETCH;
483 inodes = min(inodes, IWALK_MAX_INODE_PREFETCH);
484
485 /* Round the inode count up to a full chunk. */
486 inodes = round_up(inodes, XFS_INODES_PER_CHUNK);
487
488 /*
489 * In order to convert the number of inodes to prefetch into an
490 * estimate of the number of inobt records to cache, we require a
491 * conversion factor that reflects our expectations of the average
492 * loading factor of an inode chunk. Based on data gathered, most
493 * (but not all) filesystems manage to keep the inode chunks totally
494 * full, so we'll underestimate slightly so that our readahead will
495 * still deliver the performance we want on aging filesystems:
496 *
497 * inobt = inodes / (INODES_PER_CHUNK * (4 / 5));
498 *
499 * The funny math is to avoid integer division.
500 */
501 inobt_records = (inodes * 5) / (4 * XFS_INODES_PER_CHUNK);
502
503 /*
504 * Allocate enough space to prefetch at least two inobt records so that
505 * we can cache both the record where the iwalk started and the next
506 * record. This simplifies the AG inode walk loop setup code.
507 */
508 return max(inobt_records, 2U);
509}
510
511/*
512 * Walk all inodes in the filesystem starting from @startino. The @iwalk_fn
513 * will be called for each allocated inode, being passed the inode's number and
514 * @data. @max_prefetch controls how many inobt records' worth of inodes we
515 * try to readahead.
516 */
517int
518xfs_iwalk(
519 struct xfs_mount *mp,
520 struct xfs_trans *tp,
521 xfs_ino_t startino,
522 unsigned int flags,
523 xfs_iwalk_fn iwalk_fn,
524 unsigned int inode_records,
525 void *data)
526{
527 struct xfs_iwalk_ag iwag = {
528 .mp = mp,
529 .tp = tp,
530 .iwalk_fn = iwalk_fn,
531 .data = data,
532 .startino = startino,
533 .sz_recs = xfs_iwalk_prefetch(inode_records),
534 .trim_start = 1,
535 .skip_empty = 1,
536 .pwork = XFS_PWORK_SINGLE_THREADED,
537 };
538 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
539 int error;
540
541 ASSERT(agno < mp->m_sb.sb_agcount);
542 ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
543
544 error = xfs_iwalk_alloc(&iwag);
545 if (error)
546 return error;
547
548 for (; agno < mp->m_sb.sb_agcount; agno++) {
549 error = xfs_iwalk_ag(&iwag);
550 if (error)
551 break;
552 iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
553 if (flags & XFS_INOBT_WALK_SAME_AG)
554 break;
555 }
556
557 xfs_iwalk_free(&iwag);
558 return error;
559}
560
561/* Run per-thread iwalk work. */
562static int
563xfs_iwalk_ag_work(
564 struct xfs_mount *mp,
565 struct xfs_pwork *pwork)
566{
567 struct xfs_iwalk_ag *iwag;
568 int error = 0;
569
570 iwag = container_of(pwork, struct xfs_iwalk_ag, pwork);
571 if (xfs_pwork_want_abort(pwork))
572 goto out;
573
574 error = xfs_iwalk_alloc(iwag);
575 if (error)
576 goto out;
577
578 error = xfs_iwalk_ag(iwag);
579 xfs_iwalk_free(iwag);
580out:
581 kmem_free(iwag);
582 return error;
583}
584
585/*
586 * Walk all the inodes in the filesystem using multiple threads to process each
587 * AG.
588 */
589int
590xfs_iwalk_threaded(
591 struct xfs_mount *mp,
592 xfs_ino_t startino,
593 unsigned int flags,
594 xfs_iwalk_fn iwalk_fn,
595 unsigned int inode_records,
596 bool polled,
597 void *data)
598{
599 struct xfs_pwork_ctl pctl;
600 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
601 unsigned int nr_threads;
602 int error;
603
604 ASSERT(agno < mp->m_sb.sb_agcount);
605 ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
606
607 nr_threads = xfs_pwork_guess_datadev_parallelism(mp);
608 error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk",
609 nr_threads);
610 if (error)
611 return error;
612
613 for (; agno < mp->m_sb.sb_agcount; agno++) {
614 struct xfs_iwalk_ag *iwag;
615
616 if (xfs_pwork_ctl_want_abort(&pctl))
617 break;
618
619 iwag = kmem_zalloc(sizeof(struct xfs_iwalk_ag), KM_SLEEP);
620 iwag->mp = mp;
621 iwag->iwalk_fn = iwalk_fn;
622 iwag->data = data;
623 iwag->startino = startino;
624 iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
625 xfs_pwork_queue(&pctl, &iwag->pwork);
626 startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
627 if (flags & XFS_INOBT_WALK_SAME_AG)
628 break;
629 }
630
631 if (polled)
632 xfs_pwork_poll(&pctl);
633 return xfs_pwork_destroy(&pctl);
634}
635
636/*
637 * Allow callers to cache up to a page's worth of inobt records. This reflects
638 * the existing inumbers prefetching behavior. Since the inobt walk does not
639 * itself do anything with the inobt records, we can set a fairly high limit
640 * here.
641 */
642#define MAX_INOBT_WALK_PREFETCH \
643 (PAGE_SIZE / sizeof(struct xfs_inobt_rec_incore))
644
645/*
646 * Given the number of records that the user wanted, set the number of inobt
647 * records that we buffer in memory. Set the maximum if @inobt_records == 0.
648 */
649static inline unsigned int
650xfs_inobt_walk_prefetch(
651 unsigned int inobt_records)
652{
653 /*
654 * If the caller didn't tell us the number of inobt records they
655 * wanted, assume the maximum prefetch possible for best performance.
656 */
657 if (inobt_records == 0)
658 inobt_records = MAX_INOBT_WALK_PREFETCH;
659
660 /*
661 * Allocate enough space to prefetch at least two inobt records so that
662 * we can cache both the record where the iwalk started and the next
663 * record. This simplifies the AG inode walk loop setup code.
664 */
665 inobt_records = max(inobt_records, 2U);
666
667 /*
668 * Cap prefetch at that maximum so that we don't use an absurd amount
669 * of memory.
670 */
671 return min_t(unsigned int, inobt_records, MAX_INOBT_WALK_PREFETCH);
672}
673
674/*
675 * Walk all inode btree records in the filesystem starting from @startino. The
676 * @inobt_walk_fn will be called for each btree record, being passed the incore
677 * record and @data. @max_prefetch controls how many inobt records we try to
678 * cache ahead of time.
679 */
680int
681xfs_inobt_walk(
682 struct xfs_mount *mp,
683 struct xfs_trans *tp,
684 xfs_ino_t startino,
685 unsigned int flags,
686 xfs_inobt_walk_fn inobt_walk_fn,
687 unsigned int inobt_records,
688 void *data)
689{
690 struct xfs_iwalk_ag iwag = {
691 .mp = mp,
692 .tp = tp,
693 .inobt_walk_fn = inobt_walk_fn,
694 .data = data,
695 .startino = startino,
696 .sz_recs = xfs_inobt_walk_prefetch(inobt_records),
697 .pwork = XFS_PWORK_SINGLE_THREADED,
698 };
699 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
700 int error;
701
702 ASSERT(agno < mp->m_sb.sb_agcount);
703 ASSERT(!(flags & ~XFS_INOBT_WALK_FLAGS_ALL));
704
705 error = xfs_iwalk_alloc(&iwag);
706 if (error)
707 return error;
708
709 for (; agno < mp->m_sb.sb_agcount; agno++) {
710 error = xfs_iwalk_ag(&iwag);
711 if (error)
712 break;
713 iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
714 if (flags & XFS_INOBT_WALK_SAME_AG)
715 break;
716 }
717
718 xfs_iwalk_free(&iwag);
719 return error;
720}
diff --git a/fs/xfs/xfs_iwalk.h b/fs/xfs/xfs_iwalk.h
new file mode 100644
index 000000000000..6c960e10ed4d
--- /dev/null
+++ b/fs/xfs/xfs_iwalk.h
@@ -0,0 +1,46 @@
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * Copyright (C) 2019 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#ifndef __XFS_IWALK_H__
7#define __XFS_IWALK_H__
8
9/* Walk all inodes in the filesystem starting from @startino. */
10typedef int (*xfs_iwalk_fn)(struct xfs_mount *mp, struct xfs_trans *tp,
11 xfs_ino_t ino, void *data);
12/* Return values for xfs_iwalk_fn. */
13#define XFS_IWALK_CONTINUE (XFS_ITER_CONTINUE)
14#define XFS_IWALK_ABORT (XFS_ITER_ABORT)
15
16int xfs_iwalk(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t startino,
17 unsigned int flags, xfs_iwalk_fn iwalk_fn,
18 unsigned int inode_records, void *data);
19int xfs_iwalk_threaded(struct xfs_mount *mp, xfs_ino_t startino,
20 unsigned int flags, xfs_iwalk_fn iwalk_fn,
21 unsigned int inode_records, bool poll, void *data);
22
23/* Only iterate inodes within the same AG as @startino. */
24#define XFS_IWALK_SAME_AG (0x1)
25
26#define XFS_IWALK_FLAGS_ALL (XFS_IWALK_SAME_AG)
27
28/* Walk all inode btree records in the filesystem starting from @startino. */
29typedef int (*xfs_inobt_walk_fn)(struct xfs_mount *mp, struct xfs_trans *tp,
30 xfs_agnumber_t agno,
31 const struct xfs_inobt_rec_incore *irec,
32 void *data);
33/* Return value (for xfs_inobt_walk_fn) that aborts the walk immediately. */
34#define XFS_INOBT_WALK_ABORT (XFS_IWALK_ABORT)
35
36int xfs_inobt_walk(struct xfs_mount *mp, struct xfs_trans *tp,
37 xfs_ino_t startino, unsigned int flags,
38 xfs_inobt_walk_fn inobt_walk_fn, unsigned int inobt_records,
39 void *data);
40
41/* Only iterate inobt records within the same AG as @startino. */
42#define XFS_INOBT_WALK_SAME_AG (XFS_IWALK_SAME_AG)
43
44#define XFS_INOBT_WALK_FLAGS_ALL (XFS_INOBT_WALK_SAME_AG)
45
46#endif /* __XFS_IWALK_H__ */
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index edbd5a210df2..ca15105681ca 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -110,8 +110,6 @@ typedef __u32 xfs_nlink_t;
110#define current_restore_flags_nested(sp, f) \ 110#define current_restore_flags_nested(sp, f) \
111 (current->flags = ((current->flags & ~(f)) | (*(sp) & (f)))) 111 (current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
112 112
113#define spinlock_destroy(lock)
114
115#define NBBY 8 /* number of bits per byte */ 113#define NBBY 8 /* number of bits per byte */
116 114
117/* 115/*
@@ -221,6 +219,9 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
221 return x; 219 return x;
222} 220}
223 221
222int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
223 char *data, unsigned int op);
224
224#define ASSERT_ALWAYS(expr) \ 225#define ASSERT_ALWAYS(expr) \
225 (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) 226 (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
226 227
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 2466b0f5b6c4..00e9f5c388d3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -16,11 +16,7 @@
16#include "xfs_trans_priv.h" 16#include "xfs_trans_priv.h"
17#include "xfs_log.h" 17#include "xfs_log.h"
18#include "xfs_log_priv.h" 18#include "xfs_log_priv.h"
19#include "xfs_log_recover.h"
20#include "xfs_inode.h"
21#include "xfs_trace.h" 19#include "xfs_trace.h"
22#include "xfs_fsops.h"
23#include "xfs_cksum.h"
24#include "xfs_sysfs.h" 20#include "xfs_sysfs.h"
25#include "xfs_sb.h" 21#include "xfs_sb.h"
26#include "xfs_health.h" 22#include "xfs_health.h"
@@ -45,21 +41,14 @@ STATIC int
45xlog_space_left( 41xlog_space_left(
46 struct xlog *log, 42 struct xlog *log,
47 atomic64_t *head); 43 atomic64_t *head);
48STATIC int
49xlog_sync(
50 struct xlog *log,
51 struct xlog_in_core *iclog);
52STATIC void 44STATIC void
53xlog_dealloc_log( 45xlog_dealloc_log(
54 struct xlog *log); 46 struct xlog *log);
55 47
56/* local state machine functions */ 48/* local state machine functions */
57STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); 49STATIC void xlog_state_done_syncing(
58STATIC void 50 struct xlog_in_core *iclog,
59xlog_state_do_callback( 51 bool aborted);
60 struct xlog *log,
61 int aborted,
62 struct xlog_in_core *iclog);
63STATIC int 52STATIC int
64xlog_state_get_iclog_space( 53xlog_state_get_iclog_space(
65 struct xlog *log, 54 struct xlog *log,
@@ -107,8 +96,7 @@ STATIC void
107xlog_verify_iclog( 96xlog_verify_iclog(
108 struct xlog *log, 97 struct xlog *log,
109 struct xlog_in_core *iclog, 98 struct xlog_in_core *iclog,
110 int count, 99 int count);
111 bool syncing);
112STATIC void 100STATIC void
113xlog_verify_tail_lsn( 101xlog_verify_tail_lsn(
114 struct xlog *log, 102 struct xlog *log,
@@ -117,7 +105,7 @@ xlog_verify_tail_lsn(
117#else 105#else
118#define xlog_verify_dest_ptr(a,b) 106#define xlog_verify_dest_ptr(a,b)
119#define xlog_verify_grant_tail(a) 107#define xlog_verify_grant_tail(a)
120#define xlog_verify_iclog(a,b,c,d) 108#define xlog_verify_iclog(a,b,c)
121#define xlog_verify_tail_lsn(a,b,c) 109#define xlog_verify_tail_lsn(a,b,c)
122#endif 110#endif
123 111
@@ -541,32 +529,6 @@ xfs_log_done(
541 return lsn; 529 return lsn;
542} 530}
543 531
544/*
545 * Attaches a new iclog I/O completion callback routine during
546 * transaction commit. If the log is in error state, a non-zero
547 * return code is handed back and the caller is responsible for
548 * executing the callback at an appropriate time.
549 */
550int
551xfs_log_notify(
552 struct xlog_in_core *iclog,
553 xfs_log_callback_t *cb)
554{
555 int abortflg;
556
557 spin_lock(&iclog->ic_callback_lock);
558 abortflg = (iclog->ic_state & XLOG_STATE_IOERROR);
559 if (!abortflg) {
560 ASSERT_ALWAYS((iclog->ic_state == XLOG_STATE_ACTIVE) ||
561 (iclog->ic_state == XLOG_STATE_WANT_SYNC));
562 cb->cb_next = NULL;
563 *(iclog->ic_callback_tail) = cb;
564 iclog->ic_callback_tail = &(cb->cb_next);
565 }
566 spin_unlock(&iclog->ic_callback_lock);
567 return abortflg;
568}
569
570int 532int
571xfs_log_release_iclog( 533xfs_log_release_iclog(
572 struct xfs_mount *mp, 534 struct xfs_mount *mp,
@@ -807,16 +769,12 @@ xfs_log_mount_finish(
807 * The mount has failed. Cancel the recovery if it hasn't completed and destroy 769 * The mount has failed. Cancel the recovery if it hasn't completed and destroy
808 * the log. 770 * the log.
809 */ 771 */
810int 772void
811xfs_log_mount_cancel( 773xfs_log_mount_cancel(
812 struct xfs_mount *mp) 774 struct xfs_mount *mp)
813{ 775{
814 int error; 776 xlog_recover_cancel(mp->m_log);
815
816 error = xlog_recover_cancel(mp->m_log);
817 xfs_log_unmount(mp); 777 xfs_log_unmount(mp);
818
819 return error;
820} 778}
821 779
822/* 780/*
@@ -932,7 +890,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
932 * Or, if we are doing a forced umount (typically because of IO errors). 890 * Or, if we are doing a forced umount (typically because of IO errors).
933 */ 891 */
934 if (mp->m_flags & XFS_MOUNT_NORECOVERY || 892 if (mp->m_flags & XFS_MOUNT_NORECOVERY ||
935 xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) { 893 xfs_readonly_buftarg(log->l_targ)) {
936 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); 894 ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
937 return 0; 895 return 0;
938 } 896 }
@@ -1244,53 +1202,49 @@ xlog_space_left(
1244} 1202}
1245 1203
1246 1204
1247/*
1248 * Log function which is called when an io completes.
1249 *
1250 * The log manager needs its own routine, in order to control what
1251 * happens with the buffer after the write completes.
1252 */
1253static void 1205static void
1254xlog_iodone(xfs_buf_t *bp) 1206xlog_ioend_work(
1207 struct work_struct *work)
1255{ 1208{
1256 struct xlog_in_core *iclog = bp->b_log_item; 1209 struct xlog_in_core *iclog =
1257 struct xlog *l = iclog->ic_log; 1210 container_of(work, struct xlog_in_core, ic_end_io_work);
1258 int aborted = 0; 1211 struct xlog *log = iclog->ic_log;
1212 bool aborted = false;
1213 int error;
1214
1215 error = blk_status_to_errno(iclog->ic_bio.bi_status);
1216#ifdef DEBUG
1217 /* treat writes with injected CRC errors as failed */
1218 if (iclog->ic_fail_crc)
1219 error = -EIO;
1220#endif
1259 1221
1260 /* 1222 /*
1261 * Race to shutdown the filesystem if we see an error or the iclog is in 1223 * Race to shutdown the filesystem if we see an error.
1262 * IOABORT state. The IOABORT state is only set in DEBUG mode to inject
1263 * CRC errors into log recovery.
1264 */ 1224 */
1265 if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR) || 1225 if (XFS_TEST_ERROR(error, log->l_mp, XFS_ERRTAG_IODONE_IOERR)) {
1266 iclog->ic_state & XLOG_STATE_IOABORT) { 1226 xfs_alert(log->l_mp, "log I/O error %d", error);
1267 if (iclog->ic_state & XLOG_STATE_IOABORT) 1227 xfs_force_shutdown(log->l_mp, SHUTDOWN_LOG_IO_ERROR);
1268 iclog->ic_state &= ~XLOG_STATE_IOABORT;
1269
1270 xfs_buf_ioerror_alert(bp, __func__);
1271 xfs_buf_stale(bp);
1272 xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
1273 /* 1228 /*
1274 * This flag will be propagated to the trans-committed 1229 * This flag will be propagated to the trans-committed
1275 * callback routines to let them know that the log-commit 1230 * callback routines to let them know that the log-commit
1276 * didn't succeed. 1231 * didn't succeed.
1277 */ 1232 */
1278 aborted = XFS_LI_ABORTED; 1233 aborted = true;
1279 } else if (iclog->ic_state & XLOG_STATE_IOERROR) { 1234 } else if (iclog->ic_state & XLOG_STATE_IOERROR) {
1280 aborted = XFS_LI_ABORTED; 1235 aborted = true;
1281 } 1236 }
1282 1237
1283 /* log I/O is always issued ASYNC */
1284 ASSERT(bp->b_flags & XBF_ASYNC);
1285 xlog_state_done_syncing(iclog, aborted); 1238 xlog_state_done_syncing(iclog, aborted);
1239 bio_uninit(&iclog->ic_bio);
1286 1240
1287 /* 1241 /*
1288 * drop the buffer lock now that we are done. Nothing references 1242 * Drop the lock to signal that we are done. Nothing references the
1289 * the buffer after this, so an unmount waiting on this lock can now 1243 * iclog after this, so an unmount waiting on this lock can now tear it
1290 * tear it down safely. As such, it is unsafe to reference the buffer 1244 * down safely. As such, it is unsafe to reference the iclog after the
1291 * (bp) after the unlock as we could race with it being freed. 1245 * unlock as we could race with it being freed.
1292 */ 1246 */
1293 xfs_buf_unlock(bp); 1247 up(&iclog->ic_sema);
1294} 1248}
1295 1249
1296/* 1250/*
@@ -1301,65 +1255,26 @@ xlog_iodone(xfs_buf_t *bp)
1301 * If the filesystem blocksize is too large, we may need to choose a 1255 * If the filesystem blocksize is too large, we may need to choose a
1302 * larger size since the directory code currently logs entire blocks. 1256 * larger size since the directory code currently logs entire blocks.
1303 */ 1257 */
1304
1305STATIC void 1258STATIC void
1306xlog_get_iclog_buffer_size( 1259xlog_get_iclog_buffer_size(
1307 struct xfs_mount *mp, 1260 struct xfs_mount *mp,
1308 struct xlog *log) 1261 struct xlog *log)
1309{ 1262{
1310 int size;
1311 int xhdrs;
1312
1313 if (mp->m_logbufs <= 0) 1263 if (mp->m_logbufs <= 0)
1314 log->l_iclog_bufs = XLOG_MAX_ICLOGS; 1264 mp->m_logbufs = XLOG_MAX_ICLOGS;
1315 else 1265 if (mp->m_logbsize <= 0)
1316 log->l_iclog_bufs = mp->m_logbufs; 1266 mp->m_logbsize = XLOG_BIG_RECORD_BSIZE;
1267
1268 log->l_iclog_bufs = mp->m_logbufs;
1269 log->l_iclog_size = mp->m_logbsize;
1317 1270
1318 /* 1271 /*
1319 * Buffer size passed in from mount system call. 1272 * # headers = size / 32k - one header holds cycles from 32k of data.
1320 */ 1273 */
1321 if (mp->m_logbsize > 0) { 1274 log->l_iclog_heads =
1322 size = log->l_iclog_size = mp->m_logbsize; 1275 DIV_ROUND_UP(mp->m_logbsize, XLOG_HEADER_CYCLE_SIZE);
1323 log->l_iclog_size_log = 0; 1276 log->l_iclog_hsize = log->l_iclog_heads << BBSHIFT;
1324 while (size != 1) { 1277}
1325 log->l_iclog_size_log++;
1326 size >>= 1;
1327 }
1328
1329 if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1330 /* # headers = size / 32k
1331 * one header holds cycles from 32k of data
1332 */
1333
1334 xhdrs = mp->m_logbsize / XLOG_HEADER_CYCLE_SIZE;
1335 if (mp->m_logbsize % XLOG_HEADER_CYCLE_SIZE)
1336 xhdrs++;
1337 log->l_iclog_hsize = xhdrs << BBSHIFT;
1338 log->l_iclog_heads = xhdrs;
1339 } else {
1340 ASSERT(mp->m_logbsize <= XLOG_BIG_RECORD_BSIZE);
1341 log->l_iclog_hsize = BBSIZE;
1342 log->l_iclog_heads = 1;
1343 }
1344 goto done;
1345 }
1346
1347 /* All machines use 32kB buffers by default. */
1348 log->l_iclog_size = XLOG_BIG_RECORD_BSIZE;
1349 log->l_iclog_size_log = XLOG_BIG_RECORD_BSHIFT;
1350
1351 /* the default log size is 16k or 32k which is one header sector */
1352 log->l_iclog_hsize = BBSIZE;
1353 log->l_iclog_heads = 1;
1354
1355done:
1356 /* are we being asked to make the sizes selected above visible? */
1357 if (mp->m_logbufs == 0)
1358 mp->m_logbufs = log->l_iclog_bufs;
1359 if (mp->m_logbsize == 0)
1360 mp->m_logbsize = log->l_iclog_size;
1361} /* xlog_get_iclog_buffer_size */
1362
1363 1278
1364void 1279void
1365xfs_log_work_queue( 1280xfs_log_work_queue(
@@ -1422,7 +1337,6 @@ xlog_alloc_log(
1422 xlog_rec_header_t *head; 1337 xlog_rec_header_t *head;
1423 xlog_in_core_t **iclogp; 1338 xlog_in_core_t **iclogp;
1424 xlog_in_core_t *iclog, *prev_iclog=NULL; 1339 xlog_in_core_t *iclog, *prev_iclog=NULL;
1425 xfs_buf_t *bp;
1426 int i; 1340 int i;
1427 int error = -ENOMEM; 1341 int error = -ENOMEM;
1428 uint log2_size = 0; 1342 uint log2_size = 0;
@@ -1480,30 +1394,6 @@ xlog_alloc_log(
1480 1394
1481 xlog_get_iclog_buffer_size(mp, log); 1395 xlog_get_iclog_buffer_size(mp, log);
1482 1396
1483 /*
1484 * Use a NULL block for the extra log buffer used during splits so that
1485 * it will trigger errors if we ever try to do IO on it without first
1486 * having set it up properly.
1487 */
1488 error = -ENOMEM;
1489 bp = xfs_buf_alloc(mp->m_logdev_targp, XFS_BUF_DADDR_NULL,
1490 BTOBB(log->l_iclog_size), XBF_NO_IOACCT);
1491 if (!bp)
1492 goto out_free_log;
1493
1494 /*
1495 * The iclogbuf buffer locks are held over IO but we are not going to do
1496 * IO yet. Hence unlock the buffer so that the log IO path can grab it
1497 * when appropriately.
1498 */
1499 ASSERT(xfs_buf_islocked(bp));
1500 xfs_buf_unlock(bp);
1501
1502 /* use high priority wq for log I/O completion */
1503 bp->b_ioend_wq = mp->m_log_workqueue;
1504 bp->b_iodone = xlog_iodone;
1505 log->l_xbuf = bp;
1506
1507 spin_lock_init(&log->l_icloglock); 1397 spin_lock_init(&log->l_icloglock);
1508 init_waitqueue_head(&log->l_flush_wait); 1398 init_waitqueue_head(&log->l_flush_wait);
1509 1399
@@ -1516,29 +1406,22 @@ xlog_alloc_log(
1516 * xlog_in_core_t in xfs_log_priv.h for details. 1406 * xlog_in_core_t in xfs_log_priv.h for details.
1517 */ 1407 */
1518 ASSERT(log->l_iclog_size >= 4096); 1408 ASSERT(log->l_iclog_size >= 4096);
1519 for (i=0; i < log->l_iclog_bufs; i++) { 1409 for (i = 0; i < log->l_iclog_bufs; i++) {
1520 *iclogp = kmem_zalloc(sizeof(xlog_in_core_t), KM_MAYFAIL); 1410 size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) *
1521 if (!*iclogp) 1411 sizeof(struct bio_vec);
1412
1413 iclog = kmem_zalloc(sizeof(*iclog) + bvec_size, KM_MAYFAIL);
1414 if (!iclog)
1522 goto out_free_iclog; 1415 goto out_free_iclog;
1523 1416
1524 iclog = *iclogp; 1417 *iclogp = iclog;
1525 iclog->ic_prev = prev_iclog; 1418 iclog->ic_prev = prev_iclog;
1526 prev_iclog = iclog; 1419 prev_iclog = iclog;
1527 1420
1528 bp = xfs_buf_get_uncached(mp->m_logdev_targp, 1421 iclog->ic_data = kmem_alloc_large(log->l_iclog_size,
1529 BTOBB(log->l_iclog_size), 1422 KM_MAYFAIL);
1530 XBF_NO_IOACCT); 1423 if (!iclog->ic_data)
1531 if (!bp)
1532 goto out_free_iclog; 1424 goto out_free_iclog;
1533
1534 ASSERT(xfs_buf_islocked(bp));
1535 xfs_buf_unlock(bp);
1536
1537 /* use high priority wq for log I/O completion */
1538 bp->b_ioend_wq = mp->m_log_workqueue;
1539 bp->b_iodone = xlog_iodone;
1540 iclog->ic_bp = bp;
1541 iclog->ic_data = bp->b_addr;
1542#ifdef DEBUG 1425#ifdef DEBUG
1543 log->l_iclog_bak[i] = &iclog->ic_header; 1426 log->l_iclog_bak[i] = &iclog->ic_header;
1544#endif 1427#endif
@@ -1552,36 +1435,43 @@ xlog_alloc_log(
1552 head->h_fmt = cpu_to_be32(XLOG_FMT); 1435 head->h_fmt = cpu_to_be32(XLOG_FMT);
1553 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); 1436 memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
1554 1437
1555 iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize; 1438 iclog->ic_size = log->l_iclog_size - log->l_iclog_hsize;
1556 iclog->ic_state = XLOG_STATE_ACTIVE; 1439 iclog->ic_state = XLOG_STATE_ACTIVE;
1557 iclog->ic_log = log; 1440 iclog->ic_log = log;
1558 atomic_set(&iclog->ic_refcnt, 0); 1441 atomic_set(&iclog->ic_refcnt, 0);
1559 spin_lock_init(&iclog->ic_callback_lock); 1442 spin_lock_init(&iclog->ic_callback_lock);
1560 iclog->ic_callback_tail = &(iclog->ic_callback); 1443 INIT_LIST_HEAD(&iclog->ic_callbacks);
1561 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; 1444 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
1562 1445
1563 init_waitqueue_head(&iclog->ic_force_wait); 1446 init_waitqueue_head(&iclog->ic_force_wait);
1564 init_waitqueue_head(&iclog->ic_write_wait); 1447 init_waitqueue_head(&iclog->ic_write_wait);
1448 INIT_WORK(&iclog->ic_end_io_work, xlog_ioend_work);
1449 sema_init(&iclog->ic_sema, 1);
1565 1450
1566 iclogp = &iclog->ic_next; 1451 iclogp = &iclog->ic_next;
1567 } 1452 }
1568 *iclogp = log->l_iclog; /* complete ring */ 1453 *iclogp = log->l_iclog; /* complete ring */
1569 log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */ 1454 log->l_iclog->ic_prev = prev_iclog; /* re-write 1st prev ptr */
1570 1455
1456 log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
1457 WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0,
1458 mp->m_fsname);
1459 if (!log->l_ioend_workqueue)
1460 goto out_free_iclog;
1461
1571 error = xlog_cil_init(log); 1462 error = xlog_cil_init(log);
1572 if (error) 1463 if (error)
1573 goto out_free_iclog; 1464 goto out_destroy_workqueue;
1574 return log; 1465 return log;
1575 1466
1467out_destroy_workqueue:
1468 destroy_workqueue(log->l_ioend_workqueue);
1576out_free_iclog: 1469out_free_iclog:
1577 for (iclog = log->l_iclog; iclog; iclog = prev_iclog) { 1470 for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
1578 prev_iclog = iclog->ic_next; 1471 prev_iclog = iclog->ic_next;
1579 if (iclog->ic_bp) 1472 kmem_free(iclog->ic_data);
1580 xfs_buf_free(iclog->ic_bp);
1581 kmem_free(iclog); 1473 kmem_free(iclog);
1582 } 1474 }
1583 spinlock_destroy(&log->l_icloglock);
1584 xfs_buf_free(log->l_xbuf);
1585out_free_log: 1475out_free_log:
1586 kmem_free(log); 1476 kmem_free(log);
1587out: 1477out:
@@ -1766,42 +1656,155 @@ xlog_cksum(
1766 return xfs_end_cksum(crc); 1656 return xfs_end_cksum(crc);
1767} 1657}
1768 1658
1769/* 1659static void
1770 * The bdstrat callback function for log bufs. This gives us a central 1660xlog_bio_end_io(
1771 * place to trap bufs in case we get hit by a log I/O error and need to 1661 struct bio *bio)
1772 * shutdown. Actually, in practice, even when we didn't get a log error,
1773 * we transition the iclogs to IOERROR state *after* flushing all existing
1774 * iclogs to disk. This is because we don't want anymore new transactions to be
1775 * started or completed afterwards.
1776 *
1777 * We lock the iclogbufs here so that we can serialise against IO completion
1778 * during unmount. We might be processing a shutdown triggered during unmount,
1779 * and that can occur asynchronously to the unmount thread, and hence we need to
1780 * ensure that completes before tearing down the iclogbufs. Hence we need to
1781 * hold the buffer lock across the log IO to acheive that.
1782 */
1783STATIC int
1784xlog_bdstrat(
1785 struct xfs_buf *bp)
1786{ 1662{
1787 struct xlog_in_core *iclog = bp->b_log_item; 1663 struct xlog_in_core *iclog = bio->bi_private;
1788 1664
1789 xfs_buf_lock(bp); 1665 queue_work(iclog->ic_log->l_ioend_workqueue,
1790 if (iclog->ic_state & XLOG_STATE_IOERROR) { 1666 &iclog->ic_end_io_work);
1791 xfs_buf_ioerror(bp, -EIO); 1667}
1792 xfs_buf_stale(bp); 1668
1793 xfs_buf_ioend(bp); 1669static void
1670xlog_map_iclog_data(
1671 struct bio *bio,
1672 void *data,
1673 size_t count)
1674{
1675 do {
1676 struct page *page = kmem_to_page(data);
1677 unsigned int off = offset_in_page(data);
1678 size_t len = min_t(size_t, count, PAGE_SIZE - off);
1679
1680 WARN_ON_ONCE(bio_add_page(bio, page, len, off) != len);
1681
1682 data += len;
1683 count -= len;
1684 } while (count);
1685}
1686
1687STATIC void
1688xlog_write_iclog(
1689 struct xlog *log,
1690 struct xlog_in_core *iclog,
1691 uint64_t bno,
1692 unsigned int count,
1693 bool need_flush)
1694{
1695 ASSERT(bno < log->l_logBBsize);
1696
1697 /*
1698 * We lock the iclogbufs here so that we can serialise against I/O
1699 * completion during unmount. We might be processing a shutdown
1700 * triggered during unmount, and that can occur asynchronously to the
1701 * unmount thread, and hence we need to ensure that completes before
1702 * tearing down the iclogbufs. Hence we need to hold the buffer lock
1703 * across the log IO to archieve that.
1704 */
1705 down(&iclog->ic_sema);
1706 if (unlikely(iclog->ic_state & XLOG_STATE_IOERROR)) {
1794 /* 1707 /*
1795 * It would seem logical to return EIO here, but we rely on 1708 * It would seem logical to return EIO here, but we rely on
1796 * the log state machine to propagate I/O errors instead of 1709 * the log state machine to propagate I/O errors instead of
1797 * doing it here. Similarly, IO completion will unlock the 1710 * doing it here. We kick of the state machine and unlock
1798 * buffer, so we don't do it here. 1711 * the buffer manually, the code needs to be kept in sync
1712 * with the I/O completion path.
1799 */ 1713 */
1800 return 0; 1714 xlog_state_done_syncing(iclog, XFS_LI_ABORTED);
1715 up(&iclog->ic_sema);
1716 return;
1801 } 1717 }
1802 1718
1803 xfs_buf_submit(bp); 1719 iclog->ic_io_size = count;
1804 return 0; 1720
1721 bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE));
1722 bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev);
1723 iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
1724 iclog->ic_bio.bi_end_io = xlog_bio_end_io;
1725 iclog->ic_bio.bi_private = iclog;
1726 iclog->ic_bio.bi_opf = REQ_OP_WRITE | REQ_META | REQ_SYNC | REQ_FUA;
1727 if (need_flush)
1728 iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
1729
1730 xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, iclog->ic_io_size);
1731 if (is_vmalloc_addr(iclog->ic_data))
1732 flush_kernel_vmap_range(iclog->ic_data, iclog->ic_io_size);
1733
1734 /*
1735 * If this log buffer would straddle the end of the log we will have
1736 * to split it up into two bios, so that we can continue at the start.
1737 */
1738 if (bno + BTOBB(count) > log->l_logBBsize) {
1739 struct bio *split;
1740
1741 split = bio_split(&iclog->ic_bio, log->l_logBBsize - bno,
1742 GFP_NOIO, &fs_bio_set);
1743 bio_chain(split, &iclog->ic_bio);
1744 submit_bio(split);
1745
1746 /* restart at logical offset zero for the remainder */
1747 iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart;
1748 }
1749
1750 submit_bio(&iclog->ic_bio);
1751}
1752
1753/*
1754 * We need to bump cycle number for the part of the iclog that is
1755 * written to the start of the log. Watch out for the header magic
1756 * number case, though.
1757 */
1758static void
1759xlog_split_iclog(
1760 struct xlog *log,
1761 void *data,
1762 uint64_t bno,
1763 unsigned int count)
1764{
1765 unsigned int split_offset = BBTOB(log->l_logBBsize - bno);
1766 unsigned int i;
1767
1768 for (i = split_offset; i < count; i += BBSIZE) {
1769 uint32_t cycle = get_unaligned_be32(data + i);
1770
1771 if (++cycle == XLOG_HEADER_MAGIC_NUM)
1772 cycle++;
1773 put_unaligned_be32(cycle, data + i);
1774 }
1775}
1776
1777static int
1778xlog_calc_iclog_size(
1779 struct xlog *log,
1780 struct xlog_in_core *iclog,
1781 uint32_t *roundoff)
1782{
1783 uint32_t count_init, count;
1784 bool use_lsunit;
1785
1786 use_lsunit = xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
1787 log->l_mp->m_sb.sb_logsunit > 1;
1788
1789 /* Add for LR header */
1790 count_init = log->l_iclog_hsize + iclog->ic_offset;
1791
1792 /* Round out the log write size */
1793 if (use_lsunit) {
1794 /* we have a v2 stripe unit to use */
1795 count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
1796 } else {
1797 count = BBTOB(BTOBB(count_init));
1798 }
1799
1800 ASSERT(count >= count_init);
1801 *roundoff = count - count_init;
1802
1803 if (use_lsunit)
1804 ASSERT(*roundoff < log->l_mp->m_sb.sb_logsunit);
1805 else
1806 ASSERT(*roundoff < BBTOB(1));
1807 return count;
1805} 1808}
1806 1809
1807/* 1810/*
@@ -1824,46 +1827,23 @@ xlog_bdstrat(
1824 * log will require grabbing the lock though. 1827 * log will require grabbing the lock though.
1825 * 1828 *
1826 * The entire log manager uses a logical block numbering scheme. Only 1829 * The entire log manager uses a logical block numbering scheme. Only
1827 * log_sync (and then only bwrite()) know about the fact that the log may 1830 * xlog_write_iclog knows about the fact that the log may not start with
1828 * not start with block zero on a given device. The log block start offset 1831 * block zero on a given device.
1829 * is added immediately before calling bwrite().
1830 */ 1832 */
1831 1833STATIC void
1832STATIC int
1833xlog_sync( 1834xlog_sync(
1834 struct xlog *log, 1835 struct xlog *log,
1835 struct xlog_in_core *iclog) 1836 struct xlog_in_core *iclog)
1836{ 1837{
1837 xfs_buf_t *bp; 1838 unsigned int count; /* byte count of bwrite */
1838 int i; 1839 unsigned int roundoff; /* roundoff to BB or stripe */
1839 uint count; /* byte count of bwrite */ 1840 uint64_t bno;
1840 uint count_init; /* initial count before roundup */ 1841 unsigned int size;
1841 int roundoff; /* roundoff to BB or stripe */ 1842 bool need_flush = true, split = false;
1842 int split = 0; /* split write into two regions */
1843 int error;
1844 int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
1845 int size;
1846 1843
1847 XFS_STATS_INC(log->l_mp, xs_log_writes);
1848 ASSERT(atomic_read(&iclog->ic_refcnt) == 0); 1844 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
1849 1845
1850 /* Add for LR header */ 1846 count = xlog_calc_iclog_size(log, iclog, &roundoff);
1851 count_init = log->l_iclog_hsize + iclog->ic_offset;
1852
1853 /* Round out the log write size */
1854 if (v2 && log->l_mp->m_sb.sb_logsunit > 1) {
1855 /* we have a v2 stripe unit to use */
1856 count = XLOG_LSUNITTOB(log, XLOG_BTOLSUNIT(log, count_init));
1857 } else {
1858 count = BBTOB(BTOBB(count_init));
1859 }
1860 roundoff = count - count_init;
1861 ASSERT(roundoff >= 0);
1862 ASSERT((v2 && log->l_mp->m_sb.sb_logsunit > 1 &&
1863 roundoff < log->l_mp->m_sb.sb_logsunit)
1864 ||
1865 (log->l_mp->m_sb.sb_logsunit <= 1 &&
1866 roundoff < BBTOB(1)));
1867 1847
1868 /* move grant heads by roundoff in sync */ 1848 /* move grant heads by roundoff in sync */
1869 xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff); 1849 xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
@@ -1874,41 +1854,19 @@ xlog_sync(
1874 1854
1875 /* real byte length */ 1855 /* real byte length */
1876 size = iclog->ic_offset; 1856 size = iclog->ic_offset;
1877 if (v2) 1857 if (xfs_sb_version_haslogv2(&log->l_mp->m_sb))
1878 size += roundoff; 1858 size += roundoff;
1879 iclog->ic_header.h_len = cpu_to_be32(size); 1859 iclog->ic_header.h_len = cpu_to_be32(size);
1880 1860
1881 bp = iclog->ic_bp; 1861 XFS_STATS_INC(log->l_mp, xs_log_writes);
1882 XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
1883
1884 XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count)); 1862 XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count));
1885 1863
1886 /* Do we need to split this write into 2 parts? */ 1864 bno = BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn));
1887 if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) {
1888 char *dptr;
1889
1890 split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)));
1891 count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp));
1892 iclog->ic_bwritecnt = 2;
1893 1865
1894 /* 1866 /* Do we need to split this write into 2 parts? */
1895 * Bump the cycle numbers at the start of each block in the 1867 if (bno + BTOBB(count) > log->l_logBBsize) {
1896 * part of the iclog that ends up in the buffer that gets 1868 xlog_split_iclog(log, &iclog->ic_header, bno, count);
1897 * written to the start of the log. 1869 split = true;
1898 *
1899 * Watch out for the header magic number case, though.
1900 */
1901 dptr = (char *)&iclog->ic_header + count;
1902 for (i = 0; i < split; i += BBSIZE) {
1903 uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
1904 if (++cycle == XLOG_HEADER_MAGIC_NUM)
1905 cycle++;
1906 *(__be32 *)dptr = cpu_to_be32(cycle);
1907
1908 dptr += BBSIZE;
1909 }
1910 } else {
1911 iclog->ic_bwritecnt = 1;
1912 } 1870 }
1913 1871
1914 /* calculcate the checksum */ 1872 /* calculcate the checksum */
@@ -1921,18 +1879,15 @@ xlog_sync(
1921 * write on I/O completion and shutdown the fs. The subsequent mount 1879 * write on I/O completion and shutdown the fs. The subsequent mount
1922 * detects the bad CRC and attempts to recover. 1880 * detects the bad CRC and attempts to recover.
1923 */ 1881 */
1882#ifdef DEBUG
1924 if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) { 1883 if (XFS_TEST_ERROR(false, log->l_mp, XFS_ERRTAG_LOG_BAD_CRC)) {
1925 iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA); 1884 iclog->ic_header.h_crc &= cpu_to_le32(0xAAAAAAAA);
1926 iclog->ic_state |= XLOG_STATE_IOABORT; 1885 iclog->ic_fail_crc = true;
1927 xfs_warn(log->l_mp, 1886 xfs_warn(log->l_mp,
1928 "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.", 1887 "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
1929 be64_to_cpu(iclog->ic_header.h_lsn)); 1888 be64_to_cpu(iclog->ic_header.h_lsn));
1930 } 1889 }
1931 1890#endif
1932 bp->b_io_length = BTOBB(count);
1933 bp->b_log_item = iclog;
1934 bp->b_flags &= ~XBF_FLUSH;
1935 bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
1936 1891
1937 /* 1892 /*
1938 * Flush the data device before flushing the log to make sure all meta 1893 * Flush the data device before flushing the log to make sure all meta
@@ -1942,50 +1897,14 @@ xlog_sync(
1942 * synchronously here; for an internal log we can simply use the block 1897 * synchronously here; for an internal log we can simply use the block
1943 * layer state machine for preflushes. 1898 * layer state machine for preflushes.
1944 */ 1899 */
1945 if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) 1900 if (log->l_targ != log->l_mp->m_ddev_targp || split) {
1946 xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); 1901 xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
1947 else 1902 need_flush = false;
1948 bp->b_flags |= XBF_FLUSH;
1949
1950 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1951 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
1952
1953 xlog_verify_iclog(log, iclog, count, true);
1954
1955 /* account for log which doesn't start at block #0 */
1956 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
1957
1958 /*
1959 * Don't call xfs_bwrite here. We do log-syncs even when the filesystem
1960 * is shutting down.
1961 */
1962 error = xlog_bdstrat(bp);
1963 if (error) {
1964 xfs_buf_ioerror_alert(bp, "xlog_sync");
1965 return error;
1966 } 1903 }
1967 if (split) { 1904
1968 bp = iclog->ic_log->l_xbuf; 1905 xlog_verify_iclog(log, iclog, count);
1969 XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ 1906 xlog_write_iclog(log, iclog, bno, count, need_flush);
1970 xfs_buf_associate_memory(bp, 1907}
1971 (char *)&iclog->ic_header + count, split);
1972 bp->b_log_item = iclog;
1973 bp->b_flags &= ~XBF_FLUSH;
1974 bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE | XBF_FUA);
1975
1976 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
1977 ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
1978
1979 /* account for internal log which doesn't start at block #0 */
1980 XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
1981 error = xlog_bdstrat(bp);
1982 if (error) {
1983 xfs_buf_ioerror_alert(bp, "xlog_sync (split)");
1984 return error;
1985 }
1986 }
1987 return 0;
1988} /* xlog_sync */
1989 1908
1990/* 1909/*
1991 * Deallocate a log structure 1910 * Deallocate a log structure
@@ -2005,31 +1924,21 @@ xlog_dealloc_log(
2005 */ 1924 */
2006 iclog = log->l_iclog; 1925 iclog = log->l_iclog;
2007 for (i = 0; i < log->l_iclog_bufs; i++) { 1926 for (i = 0; i < log->l_iclog_bufs; i++) {
2008 xfs_buf_lock(iclog->ic_bp); 1927 down(&iclog->ic_sema);
2009 xfs_buf_unlock(iclog->ic_bp); 1928 up(&iclog->ic_sema);
2010 iclog = iclog->ic_next; 1929 iclog = iclog->ic_next;
2011 } 1930 }
2012 1931
2013 /*
2014 * Always need to ensure that the extra buffer does not point to memory
2015 * owned by another log buffer before we free it. Also, cycle the lock
2016 * first to ensure we've completed IO on it.
2017 */
2018 xfs_buf_lock(log->l_xbuf);
2019 xfs_buf_unlock(log->l_xbuf);
2020 xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
2021 xfs_buf_free(log->l_xbuf);
2022
2023 iclog = log->l_iclog; 1932 iclog = log->l_iclog;
2024 for (i = 0; i < log->l_iclog_bufs; i++) { 1933 for (i = 0; i < log->l_iclog_bufs; i++) {
2025 xfs_buf_free(iclog->ic_bp);
2026 next_iclog = iclog->ic_next; 1934 next_iclog = iclog->ic_next;
1935 kmem_free(iclog->ic_data);
2027 kmem_free(iclog); 1936 kmem_free(iclog);
2028 iclog = next_iclog; 1937 iclog = next_iclog;
2029 } 1938 }
2030 spinlock_destroy(&log->l_icloglock);
2031 1939
2032 log->l_mp->m_log = NULL; 1940 log->l_mp->m_log = NULL;
1941 destroy_workqueue(log->l_ioend_workqueue);
2033 kmem_free(log); 1942 kmem_free(log);
2034} /* xlog_dealloc_log */ 1943} /* xlog_dealloc_log */
2035 1944
@@ -2610,7 +2519,7 @@ xlog_state_clean_log(
2610 if (iclog->ic_state == XLOG_STATE_DIRTY) { 2519 if (iclog->ic_state == XLOG_STATE_DIRTY) {
2611 iclog->ic_state = XLOG_STATE_ACTIVE; 2520 iclog->ic_state = XLOG_STATE_ACTIVE;
2612 iclog->ic_offset = 0; 2521 iclog->ic_offset = 0;
2613 ASSERT(iclog->ic_callback == NULL); 2522 ASSERT(list_empty_careful(&iclog->ic_callbacks));
2614 /* 2523 /*
2615 * If the number of ops in this iclog indicate it just 2524 * If the number of ops in this iclog indicate it just
2616 * contains the dummy transaction, we can 2525 * contains the dummy transaction, we can
@@ -2680,37 +2589,32 @@ xlog_state_clean_log(
2680 2589
2681STATIC xfs_lsn_t 2590STATIC xfs_lsn_t
2682xlog_get_lowest_lsn( 2591xlog_get_lowest_lsn(
2683 struct xlog *log) 2592 struct xlog *log)
2684{ 2593{
2685 xlog_in_core_t *lsn_log; 2594 struct xlog_in_core *iclog = log->l_iclog;
2686 xfs_lsn_t lowest_lsn, lsn; 2595 xfs_lsn_t lowest_lsn = 0, lsn;
2687 2596
2688 lsn_log = log->l_iclog;
2689 lowest_lsn = 0;
2690 do { 2597 do {
2691 if (!(lsn_log->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY))) { 2598 if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))
2692 lsn = be64_to_cpu(lsn_log->ic_header.h_lsn); 2599 continue;
2693 if ((lsn && !lowest_lsn) || 2600
2694 (XFS_LSN_CMP(lsn, lowest_lsn) < 0)) { 2601 lsn = be64_to_cpu(iclog->ic_header.h_lsn);
2602 if ((lsn && !lowest_lsn) || XFS_LSN_CMP(lsn, lowest_lsn) < 0)
2695 lowest_lsn = lsn; 2603 lowest_lsn = lsn;
2696 } 2604 } while ((iclog = iclog->ic_next) != log->l_iclog);
2697 } 2605
2698 lsn_log = lsn_log->ic_next;
2699 } while (lsn_log != log->l_iclog);
2700 return lowest_lsn; 2606 return lowest_lsn;
2701} 2607}
2702 2608
2703
2704STATIC void 2609STATIC void
2705xlog_state_do_callback( 2610xlog_state_do_callback(
2706 struct xlog *log, 2611 struct xlog *log,
2707 int aborted, 2612 bool aborted,
2708 struct xlog_in_core *ciclog) 2613 struct xlog_in_core *ciclog)
2709{ 2614{
2710 xlog_in_core_t *iclog; 2615 xlog_in_core_t *iclog;
2711 xlog_in_core_t *first_iclog; /* used to know when we've 2616 xlog_in_core_t *first_iclog; /* used to know when we've
2712 * processed all iclogs once */ 2617 * processed all iclogs once */
2713 xfs_log_callback_t *cb, *cb_next;
2714 int flushcnt = 0; 2618 int flushcnt = 0;
2715 xfs_lsn_t lowest_lsn; 2619 xfs_lsn_t lowest_lsn;
2716 int ioerrors; /* counter: iclogs with errors */ 2620 int ioerrors; /* counter: iclogs with errors */
@@ -2821,7 +2725,7 @@ xlog_state_do_callback(
2821 */ 2725 */
2822 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), 2726 ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
2823 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); 2727 be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
2824 if (iclog->ic_callback) 2728 if (!list_empty_careful(&iclog->ic_callbacks))
2825 atomic64_set(&log->l_last_sync_lsn, 2729 atomic64_set(&log->l_last_sync_lsn,
2826 be64_to_cpu(iclog->ic_header.h_lsn)); 2730 be64_to_cpu(iclog->ic_header.h_lsn));
2827 2731
@@ -2838,26 +2742,20 @@ xlog_state_do_callback(
2838 * callbacks being added. 2742 * callbacks being added.
2839 */ 2743 */
2840 spin_lock(&iclog->ic_callback_lock); 2744 spin_lock(&iclog->ic_callback_lock);
2841 cb = iclog->ic_callback; 2745 while (!list_empty(&iclog->ic_callbacks)) {
2842 while (cb) { 2746 LIST_HEAD(tmp);
2843 iclog->ic_callback_tail = &(iclog->ic_callback);
2844 iclog->ic_callback = NULL;
2845 spin_unlock(&iclog->ic_callback_lock);
2846 2747
2847 /* perform callbacks in the order given */ 2748 list_splice_init(&iclog->ic_callbacks, &tmp);
2848 for (; cb; cb = cb_next) { 2749
2849 cb_next = cb->cb_next; 2750 spin_unlock(&iclog->ic_callback_lock);
2850 cb->cb_func(cb->cb_arg, aborted); 2751 xlog_cil_process_committed(&tmp, aborted);
2851 }
2852 spin_lock(&iclog->ic_callback_lock); 2752 spin_lock(&iclog->ic_callback_lock);
2853 cb = iclog->ic_callback;
2854 } 2753 }
2855 2754
2856 loopdidcallbacks++; 2755 loopdidcallbacks++;
2857 funcdidcallbacks++; 2756 funcdidcallbacks++;
2858 2757
2859 spin_lock(&log->l_icloglock); 2758 spin_lock(&log->l_icloglock);
2860 ASSERT(iclog->ic_callback == NULL);
2861 spin_unlock(&iclog->ic_callback_lock); 2759 spin_unlock(&iclog->ic_callback_lock);
2862 if (!(iclog->ic_state & XLOG_STATE_IOERROR)) 2760 if (!(iclog->ic_state & XLOG_STATE_IOERROR))
2863 iclog->ic_state = XLOG_STATE_DIRTY; 2761 iclog->ic_state = XLOG_STATE_DIRTY;
@@ -2943,18 +2841,16 @@ xlog_state_do_callback(
2943 */ 2841 */
2944STATIC void 2842STATIC void
2945xlog_state_done_syncing( 2843xlog_state_done_syncing(
2946 xlog_in_core_t *iclog, 2844 struct xlog_in_core *iclog,
2947 int aborted) 2845 bool aborted)
2948{ 2846{
2949 struct xlog *log = iclog->ic_log; 2847 struct xlog *log = iclog->ic_log;
2950 2848
2951 spin_lock(&log->l_icloglock); 2849 spin_lock(&log->l_icloglock);
2952 2850
2953 ASSERT(iclog->ic_state == XLOG_STATE_SYNCING || 2851 ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
2954 iclog->ic_state == XLOG_STATE_IOERROR); 2852 iclog->ic_state == XLOG_STATE_IOERROR);
2955 ASSERT(atomic_read(&iclog->ic_refcnt) == 0); 2853 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
2956 ASSERT(iclog->ic_bwritecnt == 1 || iclog->ic_bwritecnt == 2);
2957
2958 2854
2959 /* 2855 /*
2960 * If we got an error, either on the first buffer, or in the case of 2856 * If we got an error, either on the first buffer, or in the case of
@@ -2962,13 +2858,8 @@ xlog_state_done_syncing(
2962 * and none should ever be attempted to be written to disk 2858 * and none should ever be attempted to be written to disk
2963 * again. 2859 * again.
2964 */ 2860 */
2965 if (iclog->ic_state != XLOG_STATE_IOERROR) { 2861 if (iclog->ic_state != XLOG_STATE_IOERROR)
2966 if (--iclog->ic_bwritecnt == 1) {
2967 spin_unlock(&log->l_icloglock);
2968 return;
2969 }
2970 iclog->ic_state = XLOG_STATE_DONE_SYNC; 2862 iclog->ic_state = XLOG_STATE_DONE_SYNC;
2971 }
2972 2863
2973 /* 2864 /*
2974 * Someone could be sleeping prior to writing out the next 2865 * Someone could be sleeping prior to writing out the next
@@ -3237,7 +3128,7 @@ xlog_state_release_iclog(
3237 * flags after this point. 3128 * flags after this point.
3238 */ 3129 */
3239 if (sync) 3130 if (sync)
3240 return xlog_sync(log, iclog); 3131 xlog_sync(log, iclog);
3241 return 0; 3132 return 0;
3242} /* xlog_state_release_iclog */ 3133} /* xlog_state_release_iclog */
3243 3134
@@ -3828,8 +3719,7 @@ STATIC void
3828xlog_verify_iclog( 3719xlog_verify_iclog(
3829 struct xlog *log, 3720 struct xlog *log,
3830 struct xlog_in_core *iclog, 3721 struct xlog_in_core *iclog,
3831 int count, 3722 int count)
3832 bool syncing)
3833{ 3723{
3834 xlog_op_header_t *ophead; 3724 xlog_op_header_t *ophead;
3835 xlog_in_core_t *icptr; 3725 xlog_in_core_t *icptr;
@@ -3873,7 +3763,7 @@ xlog_verify_iclog(
3873 /* clientid is only 1 byte */ 3763 /* clientid is only 1 byte */
3874 p = &ophead->oh_clientid; 3764 p = &ophead->oh_clientid;
3875 field_offset = p - base_ptr; 3765 field_offset = p - base_ptr;
3876 if (!syncing || (field_offset & 0x1ff)) { 3766 if (field_offset & 0x1ff) {
3877 clientid = ophead->oh_clientid; 3767 clientid = ophead->oh_clientid;
3878 } else { 3768 } else {
3879 idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap); 3769 idx = BTOBBT((char *)&ophead->oh_clientid - iclog->ic_datap);
@@ -3896,7 +3786,7 @@ xlog_verify_iclog(
3896 /* check length */ 3786 /* check length */
3897 p = &ophead->oh_len; 3787 p = &ophead->oh_len;
3898 field_offset = p - base_ptr; 3788 field_offset = p - base_ptr;
3899 if (!syncing || (field_offset & 0x1ff)) { 3789 if (field_offset & 0x1ff) {
3900 op_len = be32_to_cpu(ophead->oh_len); 3790 op_len = be32_to_cpu(ophead->oh_len);
3901 } else { 3791 } else {
3902 idx = BTOBBT((uintptr_t)&ophead->oh_len - 3792 idx = BTOBBT((uintptr_t)&ophead->oh_len -
@@ -4033,7 +3923,7 @@ xfs_log_force_umount(
4033 * avoid races. 3923 * avoid races.
4034 */ 3924 */
4035 wake_up_all(&log->l_cilp->xc_commit_wait); 3925 wake_up_all(&log->l_cilp->xc_commit_wait);
4036 xlog_state_do_callback(log, XFS_LI_ABORTED, NULL); 3926 xlog_state_do_callback(log, true, NULL);
4037 3927
4038#ifdef XFSERRORDEBUG 3928#ifdef XFSERRORDEBUG
4039 { 3929 {
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 73a64bf32f6f..84e06805160f 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -6,6 +6,8 @@
6#ifndef __XFS_LOG_H__ 6#ifndef __XFS_LOG_H__
7#define __XFS_LOG_H__ 7#define __XFS_LOG_H__
8 8
9struct xfs_cil_ctx;
10
9struct xfs_log_vec { 11struct xfs_log_vec {
10 struct xfs_log_vec *lv_next; /* next lv in build list */ 12 struct xfs_log_vec *lv_next; /* next lv in build list */
11 int lv_niovecs; /* number of iovecs in lv */ 13 int lv_niovecs; /* number of iovecs in lv */
@@ -72,16 +74,6 @@ xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
72} 74}
73 75
74/* 76/*
75 * Structure used to pass callback function and the function's argument
76 * to the log manager.
77 */
78typedef struct xfs_log_callback {
79 struct xfs_log_callback *cb_next;
80 void (*cb_func)(void *, int);
81 void *cb_arg;
82} xfs_log_callback_t;
83
84/*
85 * By comparing each component, we don't have to worry about extra 77 * By comparing each component, we don't have to worry about extra
86 * endian issues in treating two 32 bit numbers as one 64 bit number 78 * endian issues in treating two 32 bit numbers as one 64 bit number
87 */ 79 */
@@ -125,12 +117,10 @@ int xfs_log_mount(struct xfs_mount *mp,
125 xfs_daddr_t start_block, 117 xfs_daddr_t start_block,
126 int num_bblocks); 118 int num_bblocks);
127int xfs_log_mount_finish(struct xfs_mount *mp); 119int xfs_log_mount_finish(struct xfs_mount *mp);
128int xfs_log_mount_cancel(struct xfs_mount *); 120void xfs_log_mount_cancel(struct xfs_mount *);
129xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp); 121xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
130xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp); 122xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
131void xfs_log_space_wake(struct xfs_mount *mp); 123void xfs_log_space_wake(struct xfs_mount *mp);
132int xfs_log_notify(struct xlog_in_core *iclog,
133 struct xfs_log_callback *callback_entry);
134int xfs_log_release_iclog(struct xfs_mount *mp, 124int xfs_log_release_iclog(struct xfs_mount *mp,
135 struct xlog_in_core *iclog); 125 struct xlog_in_core *iclog);
136int xfs_log_reserve(struct xfs_mount *mp, 126int xfs_log_reserve(struct xfs_mount *mp,
@@ -148,6 +138,7 @@ void xfs_log_ticket_put(struct xlog_ticket *ticket);
148 138
149void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 139void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
150 xfs_lsn_t *commit_lsn, bool regrant); 140 xfs_lsn_t *commit_lsn, bool regrant);
141void xlog_cil_process_committed(struct list_head *list, bool aborted);
151bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); 142bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
152 143
153void xfs_log_work_queue(struct xfs_mount *mp); 144void xfs_log_work_queue(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 5e595948bc5a..fa5602d0fd7f 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -10,10 +10,7 @@
10#include "xfs_shared.h" 10#include "xfs_shared.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_mount.h" 12#include "xfs_mount.h"
13#include "xfs_error.h"
14#include "xfs_alloc.h"
15#include "xfs_extent_busy.h" 13#include "xfs_extent_busy.h"
16#include "xfs_discard.h"
17#include "xfs_trans.h" 14#include "xfs_trans.h"
18#include "xfs_trans_priv.h" 15#include "xfs_trans_priv.h"
19#include "xfs_log.h" 16#include "xfs_log.h"
@@ -246,7 +243,8 @@ xfs_cil_prepare_item(
246 * shadow buffer, so update the the pointer to it appropriately. 243 * shadow buffer, so update the the pointer to it appropriately.
247 */ 244 */
248 if (!old_lv) { 245 if (!old_lv) {
249 lv->lv_item->li_ops->iop_pin(lv->lv_item); 246 if (lv->lv_item->li_ops->iop_pin)
247 lv->lv_item->li_ops->iop_pin(lv->lv_item);
250 lv->lv_item->li_lv_shadow = NULL; 248 lv->lv_item->li_lv_shadow = NULL;
251 } else if (old_lv != lv) { 249 } else if (old_lv != lv) {
252 ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); 250 ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
@@ -576,10 +574,9 @@ xlog_discard_busy_extents(
576 */ 574 */
577static void 575static void
578xlog_cil_committed( 576xlog_cil_committed(
579 void *args, 577 struct xfs_cil_ctx *ctx,
580 int abort) 578 bool abort)
581{ 579{
582 struct xfs_cil_ctx *ctx = args;
583 struct xfs_mount *mp = ctx->cil->xc_log->l_mp; 580 struct xfs_mount *mp = ctx->cil->xc_log->l_mp;
584 581
585 /* 582 /*
@@ -614,6 +611,20 @@ xlog_cil_committed(
614 kmem_free(ctx); 611 kmem_free(ctx);
615} 612}
616 613
614void
615xlog_cil_process_committed(
616 struct list_head *list,
617 bool aborted)
618{
619 struct xfs_cil_ctx *ctx;
620
621 while ((ctx = list_first_entry_or_null(list,
622 struct xfs_cil_ctx, iclog_entry))) {
623 list_del(&ctx->iclog_entry);
624 xlog_cil_committed(ctx, aborted);
625 }
626}
627
617/* 628/*
618 * Push the Committed Item List to the log. If @push_seq flag is zero, then it 629 * Push the Committed Item List to the log. If @push_seq flag is zero, then it
619 * is a background flush and so we can chose to ignore it. Otherwise, if the 630 * is a background flush and so we can chose to ignore it. Otherwise, if the
@@ -835,12 +846,15 @@ restart:
835 if (commit_lsn == -1) 846 if (commit_lsn == -1)
836 goto out_abort; 847 goto out_abort;
837 848
838 /* attach all the transactions w/ busy extents to iclog */ 849 spin_lock(&commit_iclog->ic_callback_lock);
839 ctx->log_cb.cb_func = xlog_cil_committed; 850 if (commit_iclog->ic_state & XLOG_STATE_IOERROR) {
840 ctx->log_cb.cb_arg = ctx; 851 spin_unlock(&commit_iclog->ic_callback_lock);
841 error = xfs_log_notify(commit_iclog, &ctx->log_cb);
842 if (error)
843 goto out_abort; 852 goto out_abort;
853 }
854 ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE ||
855 commit_iclog->ic_state == XLOG_STATE_WANT_SYNC);
856 list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks);
857 spin_unlock(&commit_iclog->ic_callback_lock);
844 858
845 /* 859 /*
846 * now the checkpoint commit is complete and we've attached the 860 * now the checkpoint commit is complete and we've attached the
@@ -864,7 +878,7 @@ out_skip:
864out_abort_free_ticket: 878out_abort_free_ticket:
865 xfs_log_ticket_put(tic); 879 xfs_log_ticket_put(tic);
866out_abort: 880out_abort:
867 xlog_cil_committed(ctx, XFS_LI_ABORTED); 881 xlog_cil_committed(ctx, true);
868 return -EIO; 882 return -EIO;
869} 883}
870 884
@@ -984,6 +998,7 @@ xfs_log_commit_cil(
984{ 998{
985 struct xlog *log = mp->m_log; 999 struct xlog *log = mp->m_log;
986 struct xfs_cil *cil = log->l_cilp; 1000 struct xfs_cil *cil = log->l_cilp;
1001 struct xfs_log_item *lip, *next;
987 xfs_lsn_t xc_commit_lsn; 1002 xfs_lsn_t xc_commit_lsn;
988 1003
989 /* 1004 /*
@@ -1008,7 +1023,7 @@ xfs_log_commit_cil(
1008 1023
1009 /* 1024 /*
1010 * Once all the items of the transaction have been copied to the CIL, 1025 * Once all the items of the transaction have been copied to the CIL,
1011 * the items can be unlocked and freed. 1026 * the items can be unlocked and possibly freed.
1012 * 1027 *
1013 * This needs to be done before we drop the CIL context lock because we 1028 * This needs to be done before we drop the CIL context lock because we
1014 * have to update state in the log items and unlock them before they go 1029 * have to update state in the log items and unlock them before they go
@@ -1017,8 +1032,12 @@ xfs_log_commit_cil(
1017 * the log items. This affects (at least) processing of stale buffers, 1032 * the log items. This affects (at least) processing of stale buffers,
1018 * inodes and EFIs. 1033 * inodes and EFIs.
1019 */ 1034 */
1020 xfs_trans_free_items(tp, xc_commit_lsn, false); 1035 trace_xfs_trans_commit_items(tp, _RET_IP_);
1021 1036 list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
1037 xfs_trans_del_item(lip);
1038 if (lip->li_ops->iop_committing)
1039 lip->li_ops->iop_committing(lip, xc_commit_lsn);
1040 }
1022 xlog_cil_push_background(log); 1041 xlog_cil_push_background(log);
1023 1042
1024 up_read(&cil->xc_ctx_lock); 1043 up_read(&cil->xc_ctx_lock);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index b5f82cb36202..b880c23cb6e4 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -10,7 +10,6 @@ struct xfs_buf;
10struct xlog; 10struct xlog;
11struct xlog_ticket; 11struct xlog_ticket;
12struct xfs_mount; 12struct xfs_mount;
13struct xfs_log_callback;
14 13
15/* 14/*
16 * Flags for log structure 15 * Flags for log structure
@@ -50,7 +49,6 @@ static inline uint xlog_get_client_id(__be32 i)
50#define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */ 49#define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */
51#define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/ 50#define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/
52#define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */ 51#define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */
53#define XLOG_STATE_IOABORT 0x0100 /* force abort on I/O completion (debug) */
54#define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */ 52#define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */
55#define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */ 53#define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */
56 54
@@ -179,11 +177,10 @@ typedef struct xlog_ticket {
179 * the iclog. 177 * the iclog.
180 * - ic_forcewait is used to implement synchronous forcing of the iclog to disk. 178 * - ic_forcewait is used to implement synchronous forcing of the iclog to disk.
181 * - ic_next is the pointer to the next iclog in the ring. 179 * - ic_next is the pointer to the next iclog in the ring.
182 * - ic_bp is a pointer to the buffer used to write this incore log to disk.
183 * - ic_log is a pointer back to the global log structure. 180 * - ic_log is a pointer back to the global log structure.
184 * - ic_callback is a linked list of callback function/argument pairs to be 181 * - ic_size is the full size of the log buffer, minus the cycle headers.
185 * called after an iclog finishes writing. 182 * - ic_io_size is the size of the currently pending log buffer write, which
186 * - ic_size is the full size of the header plus data. 183 * might be smaller than ic_size
187 * - ic_offset is the current number of bytes written to in this iclog. 184 * - ic_offset is the current number of bytes written to in this iclog.
188 * - ic_refcnt is bumped when someone is writing to the log. 185 * - ic_refcnt is bumped when someone is writing to the log.
189 * - ic_state is the state of the iclog. 186 * - ic_state is the state of the iclog.
@@ -193,7 +190,7 @@ typedef struct xlog_ticket {
193 * structure cacheline aligned. The following fields can be contended on 190 * structure cacheline aligned. The following fields can be contended on
194 * by independent processes: 191 * by independent processes:
195 * 192 *
196 * - ic_callback_* 193 * - ic_callbacks
197 * - ic_refcnt 194 * - ic_refcnt
198 * - fields protected by the global l_icloglock 195 * - fields protected by the global l_icloglock
199 * 196 *
@@ -206,23 +203,28 @@ typedef struct xlog_in_core {
206 wait_queue_head_t ic_write_wait; 203 wait_queue_head_t ic_write_wait;
207 struct xlog_in_core *ic_next; 204 struct xlog_in_core *ic_next;
208 struct xlog_in_core *ic_prev; 205 struct xlog_in_core *ic_prev;
209 struct xfs_buf *ic_bp;
210 struct xlog *ic_log; 206 struct xlog *ic_log;
211 int ic_size; 207 u32 ic_size;
212 int ic_offset; 208 u32 ic_io_size;
213 int ic_bwritecnt; 209 u32 ic_offset;
214 unsigned short ic_state; 210 unsigned short ic_state;
215 char *ic_datap; /* pointer to iclog data */ 211 char *ic_datap; /* pointer to iclog data */
216 212
217 /* Callback structures need their own cacheline */ 213 /* Callback structures need their own cacheline */
218 spinlock_t ic_callback_lock ____cacheline_aligned_in_smp; 214 spinlock_t ic_callback_lock ____cacheline_aligned_in_smp;
219 struct xfs_log_callback *ic_callback; 215 struct list_head ic_callbacks;
220 struct xfs_log_callback **ic_callback_tail;
221 216
222 /* reference counts need their own cacheline */ 217 /* reference counts need their own cacheline */
223 atomic_t ic_refcnt ____cacheline_aligned_in_smp; 218 atomic_t ic_refcnt ____cacheline_aligned_in_smp;
224 xlog_in_core_2_t *ic_data; 219 xlog_in_core_2_t *ic_data;
225#define ic_header ic_data->hic_header 220#define ic_header ic_data->hic_header
221#ifdef DEBUG
222 bool ic_fail_crc : 1;
223#endif
224 struct semaphore ic_sema;
225 struct work_struct ic_end_io_work;
226 struct bio ic_bio;
227 struct bio_vec ic_bvec[];
226} xlog_in_core_t; 228} xlog_in_core_t;
227 229
228/* 230/*
@@ -243,7 +245,7 @@ struct xfs_cil_ctx {
243 int space_used; /* aggregate size of regions */ 245 int space_used; /* aggregate size of regions */
244 struct list_head busy_extents; /* busy extents in chkpt */ 246 struct list_head busy_extents; /* busy extents in chkpt */
245 struct xfs_log_vec *lv_chain; /* logvecs being pushed */ 247 struct xfs_log_vec *lv_chain; /* logvecs being pushed */
246 struct xfs_log_callback log_cb; /* completion callback hook. */ 248 struct list_head iclog_entry;
247 struct list_head committing; /* ctx committing list */ 249 struct list_head committing; /* ctx committing list */
248 struct work_struct discard_endio_work; 250 struct work_struct discard_endio_work;
249}; 251};
@@ -350,9 +352,8 @@ struct xlog {
350 struct xfs_mount *l_mp; /* mount point */ 352 struct xfs_mount *l_mp; /* mount point */
351 struct xfs_ail *l_ailp; /* AIL log is working with */ 353 struct xfs_ail *l_ailp; /* AIL log is working with */
352 struct xfs_cil *l_cilp; /* CIL log is working with */ 354 struct xfs_cil *l_cilp; /* CIL log is working with */
353 struct xfs_buf *l_xbuf; /* extra buffer for log
354 * wrapping */
355 struct xfs_buftarg *l_targ; /* buftarg of log */ 355 struct xfs_buftarg *l_targ; /* buftarg of log */
356 struct workqueue_struct *l_ioend_workqueue; /* for I/O completions */
356 struct delayed_work l_work; /* background flush work */ 357 struct delayed_work l_work; /* background flush work */
357 uint l_flags; 358 uint l_flags;
358 uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */ 359 uint l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
@@ -361,7 +362,6 @@ struct xlog {
361 int l_iclog_heads; /* # of iclog header sectors */ 362 int l_iclog_heads; /* # of iclog header sectors */
362 uint l_sectBBsize; /* sector size in BBs (2^n) */ 363 uint l_sectBBsize; /* sector size in BBs (2^n) */
363 int l_iclog_size; /* size of log in bytes */ 364 int l_iclog_size; /* size of log in bytes */
364 int l_iclog_size_log; /* log power size of log */
365 int l_iclog_bufs; /* number of iclog buffers */ 365 int l_iclog_bufs; /* number of iclog buffers */
366 xfs_daddr_t l_logBBstart; /* start block of log */ 366 xfs_daddr_t l_logBBstart; /* start block of log */
367 int l_logsize; /* size of log in bytes */ 367 int l_logsize; /* size of log in bytes */
@@ -418,7 +418,7 @@ xlog_recover(
418extern int 418extern int
419xlog_recover_finish( 419xlog_recover_finish(
420 struct xlog *log); 420 struct xlog *log);
421extern int 421extern void
422xlog_recover_cancel(struct xlog *); 422xlog_recover_cancel(struct xlog *);
423 423
424extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead, 424extern __le32 xlog_cksum(struct xlog *log, struct xlog_rec_header *rhead,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 9329f5adbfbe..13d1d3e95b88 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -13,8 +13,6 @@
13#include "xfs_sb.h" 13#include "xfs_sb.h"
14#include "xfs_mount.h" 14#include "xfs_mount.h"
15#include "xfs_defer.h" 15#include "xfs_defer.h"
16#include "xfs_da_format.h"
17#include "xfs_da_btree.h"
18#include "xfs_inode.h" 16#include "xfs_inode.h"
19#include "xfs_trans.h" 17#include "xfs_trans.h"
20#include "xfs_log.h" 18#include "xfs_log.h"
@@ -26,7 +24,6 @@
26#include "xfs_alloc.h" 24#include "xfs_alloc.h"
27#include "xfs_ialloc.h" 25#include "xfs_ialloc.h"
28#include "xfs_quota.h" 26#include "xfs_quota.h"
29#include "xfs_cksum.h"
30#include "xfs_trace.h" 27#include "xfs_trace.h"
31#include "xfs_icache.h" 28#include "xfs_icache.h"
32#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
@@ -79,7 +76,7 @@ struct xfs_buf_cancel {
79 * are valid, false otherwise. 76 * are valid, false otherwise.
80 */ 77 */
81static inline bool 78static inline bool
82xlog_verify_bp( 79xlog_verify_bno(
83 struct xlog *log, 80 struct xlog *log,
84 xfs_daddr_t blk_no, 81 xfs_daddr_t blk_no,
85 int bbcount) 82 int bbcount)
@@ -92,22 +89,19 @@ xlog_verify_bp(
92} 89}
93 90
94/* 91/*
95 * Allocate a buffer to hold log data. The buffer needs to be able 92 * Allocate a buffer to hold log data. The buffer needs to be able to map to
96 * to map to a range of nbblks basic blocks at any valid (basic 93 * a range of nbblks basic blocks at any valid offset within the log.
97 * block) offset within the log.
98 */ 94 */
99STATIC xfs_buf_t * 95static char *
100xlog_get_bp( 96xlog_alloc_buffer(
101 struct xlog *log, 97 struct xlog *log,
102 int nbblks) 98 int nbblks)
103{ 99{
104 struct xfs_buf *bp;
105
106 /* 100 /*
107 * Pass log block 0 since we don't have an addr yet, buffer will be 101 * Pass log block 0 since we don't have an addr yet, buffer will be
108 * verified on read. 102 * verified on read.
109 */ 103 */
110 if (!xlog_verify_bp(log, 0, nbblks)) { 104 if (!xlog_verify_bno(log, 0, nbblks)) {
111 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 105 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
112 nbblks); 106 nbblks);
113 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp); 107 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
@@ -115,69 +109,48 @@ xlog_get_bp(
115 } 109 }
116 110
117 /* 111 /*
118 * We do log I/O in units of log sectors (a power-of-2 112 * We do log I/O in units of log sectors (a power-of-2 multiple of the
119 * multiple of the basic block size), so we round up the 113 * basic block size), so we round up the requested size to accommodate
120 * requested size to accommodate the basic blocks required 114 * the basic blocks required for complete log sectors.
121 * for complete log sectors.
122 * 115 *
123 * In addition, the buffer may be used for a non-sector- 116 * In addition, the buffer may be used for a non-sector-aligned block
124 * aligned block offset, in which case an I/O of the 117 * offset, in which case an I/O of the requested size could extend
125 * requested size could extend beyond the end of the 118 * beyond the end of the buffer. If the requested size is only 1 basic
126 * buffer. If the requested size is only 1 basic block it 119 * block it will never straddle a sector boundary, so this won't be an
127 * will never straddle a sector boundary, so this won't be 120 * issue. Nor will this be a problem if the log I/O is done in basic
128 * an issue. Nor will this be a problem if the log I/O is 121 * blocks (sector size 1). But otherwise we extend the buffer by one
129 * done in basic blocks (sector size 1). But otherwise we 122 * extra log sector to ensure there's space to accommodate this
130 * extend the buffer by one extra log sector to ensure 123 * possibility.
131 * there's space to accommodate this possibility.
132 */ 124 */
133 if (nbblks > 1 && log->l_sectBBsize > 1) 125 if (nbblks > 1 && log->l_sectBBsize > 1)
134 nbblks += log->l_sectBBsize; 126 nbblks += log->l_sectBBsize;
135 nbblks = round_up(nbblks, log->l_sectBBsize); 127 nbblks = round_up(nbblks, log->l_sectBBsize);
136 128 return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL);
137 bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, nbblks, 0);
138 if (bp)
139 xfs_buf_unlock(bp);
140 return bp;
141}
142
143STATIC void
144xlog_put_bp(
145 xfs_buf_t *bp)
146{
147 xfs_buf_free(bp);
148} 129}
149 130
150/* 131/*
151 * Return the address of the start of the given block number's data 132 * Return the address of the start of the given block number's data
152 * in a log buffer. The buffer covers a log sector-aligned region. 133 * in a log buffer. The buffer covers a log sector-aligned region.
153 */ 134 */
154STATIC char * 135static inline unsigned int
155xlog_align( 136xlog_align(
156 struct xlog *log, 137 struct xlog *log,
157 xfs_daddr_t blk_no, 138 xfs_daddr_t blk_no)
158 int nbblks,
159 struct xfs_buf *bp)
160{ 139{
161 xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1); 140 return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1));
162
163 ASSERT(offset + nbblks <= bp->b_length);
164 return bp->b_addr + BBTOB(offset);
165} 141}
166 142
167 143static int
168/* 144xlog_do_io(
169 * nbblks should be uint, but oh well. Just want to catch that 32-bit length. 145 struct xlog *log,
170 */ 146 xfs_daddr_t blk_no,
171STATIC int 147 unsigned int nbblks,
172xlog_bread_noalign( 148 char *data,
173 struct xlog *log, 149 unsigned int op)
174 xfs_daddr_t blk_no,
175 int nbblks,
176 struct xfs_buf *bp)
177{ 150{
178 int error; 151 int error;
179 152
180 if (!xlog_verify_bp(log, blk_no, nbblks)) { 153 if (!xlog_verify_bno(log, blk_no, nbblks)) {
181 xfs_warn(log->l_mp, 154 xfs_warn(log->l_mp,
182 "Invalid log block/length (0x%llx, 0x%x) for buffer", 155 "Invalid log block/length (0x%llx, 0x%x) for buffer",
183 blk_no, nbblks); 156 blk_no, nbblks);
@@ -187,107 +160,53 @@ xlog_bread_noalign(
187 160
188 blk_no = round_down(blk_no, log->l_sectBBsize); 161 blk_no = round_down(blk_no, log->l_sectBBsize);
189 nbblks = round_up(nbblks, log->l_sectBBsize); 162 nbblks = round_up(nbblks, log->l_sectBBsize);
190
191 ASSERT(nbblks > 0); 163 ASSERT(nbblks > 0);
192 ASSERT(nbblks <= bp->b_length);
193
194 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
195 bp->b_flags |= XBF_READ;
196 bp->b_io_length = nbblks;
197 bp->b_error = 0;
198 164
199 error = xfs_buf_submit(bp); 165 error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no,
200 if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) 166 BBTOB(nbblks), data, op);
201 xfs_buf_ioerror_alert(bp, __func__); 167 if (error && !XFS_FORCED_SHUTDOWN(log->l_mp)) {
168 xfs_alert(log->l_mp,
169 "log recovery %s I/O error at daddr 0x%llx len %d error %d",
170 op == REQ_OP_WRITE ? "write" : "read",
171 blk_no, nbblks, error);
172 }
202 return error; 173 return error;
203} 174}
204 175
205STATIC int 176STATIC int
206xlog_bread( 177xlog_bread_noalign(
207 struct xlog *log, 178 struct xlog *log,
208 xfs_daddr_t blk_no, 179 xfs_daddr_t blk_no,
209 int nbblks, 180 int nbblks,
210 struct xfs_buf *bp, 181 char *data)
211 char **offset)
212{ 182{
213 int error; 183 return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ);
214
215 error = xlog_bread_noalign(log, blk_no, nbblks, bp);
216 if (error)
217 return error;
218
219 *offset = xlog_align(log, blk_no, nbblks, bp);
220 return 0;
221} 184}
222 185
223/*
224 * Read at an offset into the buffer. Returns with the buffer in it's original
225 * state regardless of the result of the read.
226 */
227STATIC int 186STATIC int
228xlog_bread_offset( 187xlog_bread(
229 struct xlog *log, 188 struct xlog *log,
230 xfs_daddr_t blk_no, /* block to read from */ 189 xfs_daddr_t blk_no,
231 int nbblks, /* blocks to read */ 190 int nbblks,
232 struct xfs_buf *bp, 191 char *data,
233 char *offset) 192 char **offset)
234{ 193{
235 char *orig_offset = bp->b_addr; 194 int error;
236 int orig_len = BBTOB(bp->b_length);
237 int error, error2;
238
239 error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks));
240 if (error)
241 return error;
242
243 error = xlog_bread_noalign(log, blk_no, nbblks, bp);
244 195
245 /* must reset buffer pointer even on error */ 196 error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ);
246 error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len); 197 if (!error)
247 if (error) 198 *offset = data + xlog_align(log, blk_no);
248 return error; 199 return error;
249 return error2;
250} 200}
251 201
252/*
253 * Write out the buffer at the given block for the given number of blocks.
254 * The buffer is kept locked across the write and is returned locked.
255 * This can only be used for synchronous log writes.
256 */
257STATIC int 202STATIC int
258xlog_bwrite( 203xlog_bwrite(
259 struct xlog *log, 204 struct xlog *log,
260 xfs_daddr_t blk_no, 205 xfs_daddr_t blk_no,
261 int nbblks, 206 int nbblks,
262 struct xfs_buf *bp) 207 char *data)
263{ 208{
264 int error; 209 return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE);
265
266 if (!xlog_verify_bp(log, blk_no, nbblks)) {
267 xfs_warn(log->l_mp,
268 "Invalid log block/length (0x%llx, 0x%x) for buffer",
269 blk_no, nbblks);
270 XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
271 return -EFSCORRUPTED;
272 }
273
274 blk_no = round_down(blk_no, log->l_sectBBsize);
275 nbblks = round_up(nbblks, log->l_sectBBsize);
276
277 ASSERT(nbblks > 0);
278 ASSERT(nbblks <= bp->b_length);
279
280 XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
281 xfs_buf_hold(bp);
282 xfs_buf_lock(bp);
283 bp->b_io_length = nbblks;
284 bp->b_error = 0;
285
286 error = xfs_bwrite(bp);
287 if (error)
288 xfs_buf_ioerror_alert(bp, __func__);
289 xfs_buf_relse(bp);
290 return error;
291} 210}
292 211
293#ifdef DEBUG 212#ifdef DEBUG
@@ -377,10 +296,9 @@ xlog_recover_iodone(
377 * We're not going to bother about retrying 296 * We're not going to bother about retrying
378 * this during recovery. One strike! 297 * this during recovery. One strike!
379 */ 298 */
380 if (!XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { 299 if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) {
381 xfs_buf_ioerror_alert(bp, __func__); 300 xfs_buf_ioerror_alert(bp, __func__);
382 xfs_force_shutdown(bp->b_target->bt_mount, 301 xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
383 SHUTDOWN_META_IO_ERROR);
384 } 302 }
385 } 303 }
386 304
@@ -405,7 +323,7 @@ xlog_recover_iodone(
405STATIC int 323STATIC int
406xlog_find_cycle_start( 324xlog_find_cycle_start(
407 struct xlog *log, 325 struct xlog *log,
408 struct xfs_buf *bp, 326 char *buffer,
409 xfs_daddr_t first_blk, 327 xfs_daddr_t first_blk,
410 xfs_daddr_t *last_blk, 328 xfs_daddr_t *last_blk,
411 uint cycle) 329 uint cycle)
@@ -419,7 +337,7 @@ xlog_find_cycle_start(
419 end_blk = *last_blk; 337 end_blk = *last_blk;
420 mid_blk = BLK_AVG(first_blk, end_blk); 338 mid_blk = BLK_AVG(first_blk, end_blk);
421 while (mid_blk != first_blk && mid_blk != end_blk) { 339 while (mid_blk != first_blk && mid_blk != end_blk) {
422 error = xlog_bread(log, mid_blk, 1, bp, &offset); 340 error = xlog_bread(log, mid_blk, 1, buffer, &offset);
423 if (error) 341 if (error)
424 return error; 342 return error;
425 mid_cycle = xlog_get_cycle(offset); 343 mid_cycle = xlog_get_cycle(offset);
@@ -455,7 +373,7 @@ xlog_find_verify_cycle(
455{ 373{
456 xfs_daddr_t i, j; 374 xfs_daddr_t i, j;
457 uint cycle; 375 uint cycle;
458 xfs_buf_t *bp; 376 char *buffer;
459 xfs_daddr_t bufblks; 377 xfs_daddr_t bufblks;
460 char *buf = NULL; 378 char *buf = NULL;
461 int error = 0; 379 int error = 0;
@@ -469,7 +387,7 @@ xlog_find_verify_cycle(
469 bufblks = 1 << ffs(nbblks); 387 bufblks = 1 << ffs(nbblks);
470 while (bufblks > log->l_logBBsize) 388 while (bufblks > log->l_logBBsize)
471 bufblks >>= 1; 389 bufblks >>= 1;
472 while (!(bp = xlog_get_bp(log, bufblks))) { 390 while (!(buffer = xlog_alloc_buffer(log, bufblks))) {
473 bufblks >>= 1; 391 bufblks >>= 1;
474 if (bufblks < log->l_sectBBsize) 392 if (bufblks < log->l_sectBBsize)
475 return -ENOMEM; 393 return -ENOMEM;
@@ -480,7 +398,7 @@ xlog_find_verify_cycle(
480 398
481 bcount = min(bufblks, (start_blk + nbblks - i)); 399 bcount = min(bufblks, (start_blk + nbblks - i));
482 400
483 error = xlog_bread(log, i, bcount, bp, &buf); 401 error = xlog_bread(log, i, bcount, buffer, &buf);
484 if (error) 402 if (error)
485 goto out; 403 goto out;
486 404
@@ -498,7 +416,7 @@ xlog_find_verify_cycle(
498 *new_blk = -1; 416 *new_blk = -1;
499 417
500out: 418out:
501 xlog_put_bp(bp); 419 kmem_free(buffer);
502 return error; 420 return error;
503} 421}
504 422
@@ -522,7 +440,7 @@ xlog_find_verify_log_record(
522 int extra_bblks) 440 int extra_bblks)
523{ 441{
524 xfs_daddr_t i; 442 xfs_daddr_t i;
525 xfs_buf_t *bp; 443 char *buffer;
526 char *offset = NULL; 444 char *offset = NULL;
527 xlog_rec_header_t *head = NULL; 445 xlog_rec_header_t *head = NULL;
528 int error = 0; 446 int error = 0;
@@ -532,12 +450,14 @@ xlog_find_verify_log_record(
532 450
533 ASSERT(start_blk != 0 || *last_blk != start_blk); 451 ASSERT(start_blk != 0 || *last_blk != start_blk);
534 452
535 if (!(bp = xlog_get_bp(log, num_blks))) { 453 buffer = xlog_alloc_buffer(log, num_blks);
536 if (!(bp = xlog_get_bp(log, 1))) 454 if (!buffer) {
455 buffer = xlog_alloc_buffer(log, 1);
456 if (!buffer)
537 return -ENOMEM; 457 return -ENOMEM;
538 smallmem = 1; 458 smallmem = 1;
539 } else { 459 } else {
540 error = xlog_bread(log, start_blk, num_blks, bp, &offset); 460 error = xlog_bread(log, start_blk, num_blks, buffer, &offset);
541 if (error) 461 if (error)
542 goto out; 462 goto out;
543 offset += ((num_blks - 1) << BBSHIFT); 463 offset += ((num_blks - 1) << BBSHIFT);
@@ -554,7 +474,7 @@ xlog_find_verify_log_record(
554 } 474 }
555 475
556 if (smallmem) { 476 if (smallmem) {
557 error = xlog_bread(log, i, 1, bp, &offset); 477 error = xlog_bread(log, i, 1, buffer, &offset);
558 if (error) 478 if (error)
559 goto out; 479 goto out;
560 } 480 }
@@ -607,7 +527,7 @@ xlog_find_verify_log_record(
607 *last_blk = i; 527 *last_blk = i;
608 528
609out: 529out:
610 xlog_put_bp(bp); 530 kmem_free(buffer);
611 return error; 531 return error;
612} 532}
613 533
@@ -629,7 +549,7 @@ xlog_find_head(
629 struct xlog *log, 549 struct xlog *log,
630 xfs_daddr_t *return_head_blk) 550 xfs_daddr_t *return_head_blk)
631{ 551{
632 xfs_buf_t *bp; 552 char *buffer;
633 char *offset; 553 char *offset;
634 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; 554 xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk;
635 int num_scan_bblks; 555 int num_scan_bblks;
@@ -659,20 +579,20 @@ xlog_find_head(
659 } 579 }
660 580
661 first_blk = 0; /* get cycle # of 1st block */ 581 first_blk = 0; /* get cycle # of 1st block */
662 bp = xlog_get_bp(log, 1); 582 buffer = xlog_alloc_buffer(log, 1);
663 if (!bp) 583 if (!buffer)
664 return -ENOMEM; 584 return -ENOMEM;
665 585
666 error = xlog_bread(log, 0, 1, bp, &offset); 586 error = xlog_bread(log, 0, 1, buffer, &offset);
667 if (error) 587 if (error)
668 goto bp_err; 588 goto out_free_buffer;
669 589
670 first_half_cycle = xlog_get_cycle(offset); 590 first_half_cycle = xlog_get_cycle(offset);
671 591
672 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ 592 last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */
673 error = xlog_bread(log, last_blk, 1, bp, &offset); 593 error = xlog_bread(log, last_blk, 1, buffer, &offset);
674 if (error) 594 if (error)
675 goto bp_err; 595 goto out_free_buffer;
676 596
677 last_half_cycle = xlog_get_cycle(offset); 597 last_half_cycle = xlog_get_cycle(offset);
678 ASSERT(last_half_cycle != 0); 598 ASSERT(last_half_cycle != 0);
@@ -740,9 +660,10 @@ xlog_find_head(
740 * ^ we want to locate this spot 660 * ^ we want to locate this spot
741 */ 661 */
742 stop_on_cycle = last_half_cycle; 662 stop_on_cycle = last_half_cycle;
743 if ((error = xlog_find_cycle_start(log, bp, first_blk, 663 error = xlog_find_cycle_start(log, buffer, first_blk, &head_blk,
744 &head_blk, last_half_cycle))) 664 last_half_cycle);
745 goto bp_err; 665 if (error)
666 goto out_free_buffer;
746 } 667 }
747 668
748 /* 669 /*
@@ -762,7 +683,7 @@ xlog_find_head(
762 if ((error = xlog_find_verify_cycle(log, 683 if ((error = xlog_find_verify_cycle(log,
763 start_blk, num_scan_bblks, 684 start_blk, num_scan_bblks,
764 stop_on_cycle, &new_blk))) 685 stop_on_cycle, &new_blk)))
765 goto bp_err; 686 goto out_free_buffer;
766 if (new_blk != -1) 687 if (new_blk != -1)
767 head_blk = new_blk; 688 head_blk = new_blk;
768 } else { /* need to read 2 parts of log */ 689 } else { /* need to read 2 parts of log */
@@ -799,7 +720,7 @@ xlog_find_head(
799 if ((error = xlog_find_verify_cycle(log, start_blk, 720 if ((error = xlog_find_verify_cycle(log, start_blk,
800 num_scan_bblks - (int)head_blk, 721 num_scan_bblks - (int)head_blk,
801 (stop_on_cycle - 1), &new_blk))) 722 (stop_on_cycle - 1), &new_blk)))
802 goto bp_err; 723 goto out_free_buffer;
803 if (new_blk != -1) { 724 if (new_blk != -1) {
804 head_blk = new_blk; 725 head_blk = new_blk;
805 goto validate_head; 726 goto validate_head;
@@ -815,7 +736,7 @@ xlog_find_head(
815 if ((error = xlog_find_verify_cycle(log, 736 if ((error = xlog_find_verify_cycle(log,
816 start_blk, (int)head_blk, 737 start_blk, (int)head_blk,
817 stop_on_cycle, &new_blk))) 738 stop_on_cycle, &new_blk)))
818 goto bp_err; 739 goto out_free_buffer;
819 if (new_blk != -1) 740 if (new_blk != -1)
820 head_blk = new_blk; 741 head_blk = new_blk;
821 } 742 }
@@ -834,13 +755,13 @@ validate_head:
834 if (error == 1) 755 if (error == 1)
835 error = -EIO; 756 error = -EIO;
836 if (error) 757 if (error)
837 goto bp_err; 758 goto out_free_buffer;
838 } else { 759 } else {
839 start_blk = 0; 760 start_blk = 0;
840 ASSERT(head_blk <= INT_MAX); 761 ASSERT(head_blk <= INT_MAX);
841 error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); 762 error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0);
842 if (error < 0) 763 if (error < 0)
843 goto bp_err; 764 goto out_free_buffer;
844 if (error == 1) { 765 if (error == 1) {
845 /* We hit the beginning of the log during our search */ 766 /* We hit the beginning of the log during our search */
846 start_blk = log_bbnum - (num_scan_bblks - head_blk); 767 start_blk = log_bbnum - (num_scan_bblks - head_blk);
@@ -853,14 +774,14 @@ validate_head:
853 if (error == 1) 774 if (error == 1)
854 error = -EIO; 775 error = -EIO;
855 if (error) 776 if (error)
856 goto bp_err; 777 goto out_free_buffer;
857 if (new_blk != log_bbnum) 778 if (new_blk != log_bbnum)
858 head_blk = new_blk; 779 head_blk = new_blk;
859 } else if (error) 780 } else if (error)
860 goto bp_err; 781 goto out_free_buffer;
861 } 782 }
862 783
863 xlog_put_bp(bp); 784 kmem_free(buffer);
864 if (head_blk == log_bbnum) 785 if (head_blk == log_bbnum)
865 *return_head_blk = 0; 786 *return_head_blk = 0;
866 else 787 else
@@ -873,9 +794,8 @@ validate_head:
873 */ 794 */
874 return 0; 795 return 0;
875 796
876 bp_err: 797out_free_buffer:
877 xlog_put_bp(bp); 798 kmem_free(buffer);
878
879 if (error) 799 if (error)
880 xfs_warn(log->l_mp, "failed to find log head"); 800 xfs_warn(log->l_mp, "failed to find log head");
881 return error; 801 return error;
@@ -895,7 +815,7 @@ xlog_rseek_logrec_hdr(
895 xfs_daddr_t head_blk, 815 xfs_daddr_t head_blk,
896 xfs_daddr_t tail_blk, 816 xfs_daddr_t tail_blk,
897 int count, 817 int count,
898 struct xfs_buf *bp, 818 char *buffer,
899 xfs_daddr_t *rblk, 819 xfs_daddr_t *rblk,
900 struct xlog_rec_header **rhead, 820 struct xlog_rec_header **rhead,
901 bool *wrapped) 821 bool *wrapped)
@@ -914,7 +834,7 @@ xlog_rseek_logrec_hdr(
914 */ 834 */
915 end_blk = head_blk > tail_blk ? tail_blk : 0; 835 end_blk = head_blk > tail_blk ? tail_blk : 0;
916 for (i = (int) head_blk - 1; i >= end_blk; i--) { 836 for (i = (int) head_blk - 1; i >= end_blk; i--) {
917 error = xlog_bread(log, i, 1, bp, &offset); 837 error = xlog_bread(log, i, 1, buffer, &offset);
918 if (error) 838 if (error)
919 goto out_error; 839 goto out_error;
920 840
@@ -933,7 +853,7 @@ xlog_rseek_logrec_hdr(
933 */ 853 */
934 if (tail_blk >= head_blk && found != count) { 854 if (tail_blk >= head_blk && found != count) {
935 for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) { 855 for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) {
936 error = xlog_bread(log, i, 1, bp, &offset); 856 error = xlog_bread(log, i, 1, buffer, &offset);
937 if (error) 857 if (error)
938 goto out_error; 858 goto out_error;
939 859
@@ -969,7 +889,7 @@ xlog_seek_logrec_hdr(
969 xfs_daddr_t head_blk, 889 xfs_daddr_t head_blk,
970 xfs_daddr_t tail_blk, 890 xfs_daddr_t tail_blk,
971 int count, 891 int count,
972 struct xfs_buf *bp, 892 char *buffer,
973 xfs_daddr_t *rblk, 893 xfs_daddr_t *rblk,
974 struct xlog_rec_header **rhead, 894 struct xlog_rec_header **rhead,
975 bool *wrapped) 895 bool *wrapped)
@@ -988,7 +908,7 @@ xlog_seek_logrec_hdr(
988 */ 908 */
989 end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1; 909 end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1;
990 for (i = (int) tail_blk; i <= end_blk; i++) { 910 for (i = (int) tail_blk; i <= end_blk; i++) {
991 error = xlog_bread(log, i, 1, bp, &offset); 911 error = xlog_bread(log, i, 1, buffer, &offset);
992 if (error) 912 if (error)
993 goto out_error; 913 goto out_error;
994 914
@@ -1006,7 +926,7 @@ xlog_seek_logrec_hdr(
1006 */ 926 */
1007 if (tail_blk > head_blk && found != count) { 927 if (tail_blk > head_blk && found != count) {
1008 for (i = 0; i < (int) head_blk; i++) { 928 for (i = 0; i < (int) head_blk; i++) {
1009 error = xlog_bread(log, i, 1, bp, &offset); 929 error = xlog_bread(log, i, 1, buffer, &offset);
1010 if (error) 930 if (error)
1011 goto out_error; 931 goto out_error;
1012 932
@@ -1069,22 +989,22 @@ xlog_verify_tail(
1069 int hsize) 989 int hsize)
1070{ 990{
1071 struct xlog_rec_header *thead; 991 struct xlog_rec_header *thead;
1072 struct xfs_buf *bp; 992 char *buffer;
1073 xfs_daddr_t first_bad; 993 xfs_daddr_t first_bad;
1074 int error = 0; 994 int error = 0;
1075 bool wrapped; 995 bool wrapped;
1076 xfs_daddr_t tmp_tail; 996 xfs_daddr_t tmp_tail;
1077 xfs_daddr_t orig_tail = *tail_blk; 997 xfs_daddr_t orig_tail = *tail_blk;
1078 998
1079 bp = xlog_get_bp(log, 1); 999 buffer = xlog_alloc_buffer(log, 1);
1080 if (!bp) 1000 if (!buffer)
1081 return -ENOMEM; 1001 return -ENOMEM;
1082 1002
1083 /* 1003 /*
1084 * Make sure the tail points to a record (returns positive count on 1004 * Make sure the tail points to a record (returns positive count on
1085 * success). 1005 * success).
1086 */ 1006 */
1087 error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp, 1007 error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, buffer,
1088 &tmp_tail, &thead, &wrapped); 1008 &tmp_tail, &thead, &wrapped);
1089 if (error < 0) 1009 if (error < 0)
1090 goto out; 1010 goto out;
@@ -1113,8 +1033,8 @@ xlog_verify_tail(
1113 break; 1033 break;
1114 1034
1115 /* skip to the next record; returns positive count on success */ 1035 /* skip to the next record; returns positive count on success */
1116 error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp, 1036 error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2,
1117 &tmp_tail, &thead, &wrapped); 1037 buffer, &tmp_tail, &thead, &wrapped);
1118 if (error < 0) 1038 if (error < 0)
1119 goto out; 1039 goto out;
1120 1040
@@ -1129,7 +1049,7 @@ xlog_verify_tail(
1129 "Tail block (0x%llx) overwrite detected. Updated to 0x%llx", 1049 "Tail block (0x%llx) overwrite detected. Updated to 0x%llx",
1130 orig_tail, *tail_blk); 1050 orig_tail, *tail_blk);
1131out: 1051out:
1132 xlog_put_bp(bp); 1052 kmem_free(buffer);
1133 return error; 1053 return error;
1134} 1054}
1135 1055
@@ -1151,13 +1071,13 @@ xlog_verify_head(
1151 struct xlog *log, 1071 struct xlog *log,
1152 xfs_daddr_t *head_blk, /* in/out: unverified head */ 1072 xfs_daddr_t *head_blk, /* in/out: unverified head */
1153 xfs_daddr_t *tail_blk, /* out: tail block */ 1073 xfs_daddr_t *tail_blk, /* out: tail block */
1154 struct xfs_buf *bp, 1074 char *buffer,
1155 xfs_daddr_t *rhead_blk, /* start blk of last record */ 1075 xfs_daddr_t *rhead_blk, /* start blk of last record */
1156 struct xlog_rec_header **rhead, /* ptr to last record */ 1076 struct xlog_rec_header **rhead, /* ptr to last record */
1157 bool *wrapped) /* last rec. wraps phys. log */ 1077 bool *wrapped) /* last rec. wraps phys. log */
1158{ 1078{
1159 struct xlog_rec_header *tmp_rhead; 1079 struct xlog_rec_header *tmp_rhead;
1160 struct xfs_buf *tmp_bp; 1080 char *tmp_buffer;
1161 xfs_daddr_t first_bad; 1081 xfs_daddr_t first_bad;
1162 xfs_daddr_t tmp_rhead_blk; 1082 xfs_daddr_t tmp_rhead_blk;
1163 int found; 1083 int found;
@@ -1168,15 +1088,15 @@ xlog_verify_head(
1168 * Check the head of the log for torn writes. Search backwards from the 1088 * Check the head of the log for torn writes. Search backwards from the
1169 * head until we hit the tail or the maximum number of log record I/Os 1089 * head until we hit the tail or the maximum number of log record I/Os
1170 * that could have been in flight at one time. Use a temporary buffer so 1090 * that could have been in flight at one time. Use a temporary buffer so
1171 * we don't trash the rhead/bp pointers from the caller. 1091 * we don't trash the rhead/buffer pointers from the caller.
1172 */ 1092 */
1173 tmp_bp = xlog_get_bp(log, 1); 1093 tmp_buffer = xlog_alloc_buffer(log, 1);
1174 if (!tmp_bp) 1094 if (!tmp_buffer)
1175 return -ENOMEM; 1095 return -ENOMEM;
1176 error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk, 1096 error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk,
1177 XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk, 1097 XLOG_MAX_ICLOGS, tmp_buffer,
1178 &tmp_rhead, &tmp_wrapped); 1098 &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped);
1179 xlog_put_bp(tmp_bp); 1099 kmem_free(tmp_buffer);
1180 if (error < 0) 1100 if (error < 0)
1181 return error; 1101 return error;
1182 1102
@@ -1205,8 +1125,8 @@ xlog_verify_head(
1205 * (i.e., the records with invalid CRC) if the cycle number 1125 * (i.e., the records with invalid CRC) if the cycle number
1206 * matches the the current cycle. 1126 * matches the the current cycle.
1207 */ 1127 */
1208 found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp, 1128 found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1,
1209 rhead_blk, rhead, wrapped); 1129 buffer, rhead_blk, rhead, wrapped);
1210 if (found < 0) 1130 if (found < 0)
1211 return found; 1131 return found;
1212 if (found == 0) /* XXX: right thing to do here? */ 1132 if (found == 0) /* XXX: right thing to do here? */
@@ -1266,7 +1186,7 @@ xlog_check_unmount_rec(
1266 xfs_daddr_t *tail_blk, 1186 xfs_daddr_t *tail_blk,
1267 struct xlog_rec_header *rhead, 1187 struct xlog_rec_header *rhead,
1268 xfs_daddr_t rhead_blk, 1188 xfs_daddr_t rhead_blk,
1269 struct xfs_buf *bp, 1189 char *buffer,
1270 bool *clean) 1190 bool *clean)
1271{ 1191{
1272 struct xlog_op_header *op_head; 1192 struct xlog_op_header *op_head;
@@ -1309,7 +1229,7 @@ xlog_check_unmount_rec(
1309 if (*head_blk == after_umount_blk && 1229 if (*head_blk == after_umount_blk &&
1310 be32_to_cpu(rhead->h_num_logops) == 1) { 1230 be32_to_cpu(rhead->h_num_logops) == 1) {
1311 umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks); 1231 umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks);
1312 error = xlog_bread(log, umount_data_blk, 1, bp, &offset); 1232 error = xlog_bread(log, umount_data_blk, 1, buffer, &offset);
1313 if (error) 1233 if (error)
1314 return error; 1234 return error;
1315 1235
@@ -1388,7 +1308,7 @@ xlog_find_tail(
1388{ 1308{
1389 xlog_rec_header_t *rhead; 1309 xlog_rec_header_t *rhead;
1390 char *offset = NULL; 1310 char *offset = NULL;
1391 xfs_buf_t *bp; 1311 char *buffer;
1392 int error; 1312 int error;
1393 xfs_daddr_t rhead_blk; 1313 xfs_daddr_t rhead_blk;
1394 xfs_lsn_t tail_lsn; 1314 xfs_lsn_t tail_lsn;
@@ -1402,11 +1322,11 @@ xlog_find_tail(
1402 return error; 1322 return error;
1403 ASSERT(*head_blk < INT_MAX); 1323 ASSERT(*head_blk < INT_MAX);
1404 1324
1405 bp = xlog_get_bp(log, 1); 1325 buffer = xlog_alloc_buffer(log, 1);
1406 if (!bp) 1326 if (!buffer)
1407 return -ENOMEM; 1327 return -ENOMEM;
1408 if (*head_blk == 0) { /* special case */ 1328 if (*head_blk == 0) { /* special case */
1409 error = xlog_bread(log, 0, 1, bp, &offset); 1329 error = xlog_bread(log, 0, 1, buffer, &offset);
1410 if (error) 1330 if (error)
1411 goto done; 1331 goto done;
1412 1332
@@ -1422,7 +1342,7 @@ xlog_find_tail(
1422 * block. This wraps all the way back around to the head so something is 1342 * block. This wraps all the way back around to the head so something is
1423 * seriously wrong if we can't find it. 1343 * seriously wrong if we can't find it.
1424 */ 1344 */
1425 error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, 1345 error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer,
1426 &rhead_blk, &rhead, &wrapped); 1346 &rhead_blk, &rhead, &wrapped);
1427 if (error < 0) 1347 if (error < 0)
1428 return error; 1348 return error;
@@ -1443,7 +1363,7 @@ xlog_find_tail(
1443 * state to determine whether recovery is necessary. 1363 * state to determine whether recovery is necessary.
1444 */ 1364 */
1445 error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead, 1365 error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead,
1446 rhead_blk, bp, &clean); 1366 rhead_blk, buffer, &clean);
1447 if (error) 1367 if (error)
1448 goto done; 1368 goto done;
1449 1369
@@ -1460,7 +1380,7 @@ xlog_find_tail(
1460 if (!clean) { 1380 if (!clean) {
1461 xfs_daddr_t orig_head = *head_blk; 1381 xfs_daddr_t orig_head = *head_blk;
1462 1382
1463 error = xlog_verify_head(log, head_blk, tail_blk, bp, 1383 error = xlog_verify_head(log, head_blk, tail_blk, buffer,
1464 &rhead_blk, &rhead, &wrapped); 1384 &rhead_blk, &rhead, &wrapped);
1465 if (error) 1385 if (error)
1466 goto done; 1386 goto done;
@@ -1471,7 +1391,7 @@ xlog_find_tail(
1471 wrapped); 1391 wrapped);
1472 tail_lsn = atomic64_read(&log->l_tail_lsn); 1392 tail_lsn = atomic64_read(&log->l_tail_lsn);
1473 error = xlog_check_unmount_rec(log, head_blk, tail_blk, 1393 error = xlog_check_unmount_rec(log, head_blk, tail_blk,
1474 rhead, rhead_blk, bp, 1394 rhead, rhead_blk, buffer,
1475 &clean); 1395 &clean);
1476 if (error) 1396 if (error)
1477 goto done; 1397 goto done;
@@ -1505,11 +1425,11 @@ xlog_find_tail(
1505 * But... if the -device- itself is readonly, just skip this. 1425 * But... if the -device- itself is readonly, just skip this.
1506 * We can't recover this device anyway, so it won't matter. 1426 * We can't recover this device anyway, so it won't matter.
1507 */ 1427 */
1508 if (!xfs_readonly_buftarg(log->l_mp->m_logdev_targp)) 1428 if (!xfs_readonly_buftarg(log->l_targ))
1509 error = xlog_clear_stale_blocks(log, tail_lsn); 1429 error = xlog_clear_stale_blocks(log, tail_lsn);
1510 1430
1511done: 1431done:
1512 xlog_put_bp(bp); 1432 kmem_free(buffer);
1513 1433
1514 if (error) 1434 if (error)
1515 xfs_warn(log->l_mp, "failed to locate log tail"); 1435 xfs_warn(log->l_mp, "failed to locate log tail");
@@ -1537,7 +1457,7 @@ xlog_find_zeroed(
1537 struct xlog *log, 1457 struct xlog *log,
1538 xfs_daddr_t *blk_no) 1458 xfs_daddr_t *blk_no)
1539{ 1459{
1540 xfs_buf_t *bp; 1460 char *buffer;
1541 char *offset; 1461 char *offset;
1542 uint first_cycle, last_cycle; 1462 uint first_cycle, last_cycle;
1543 xfs_daddr_t new_blk, last_blk, start_blk; 1463 xfs_daddr_t new_blk, last_blk, start_blk;
@@ -1547,35 +1467,36 @@ xlog_find_zeroed(
1547 *blk_no = 0; 1467 *blk_no = 0;
1548 1468
1549 /* check totally zeroed log */ 1469 /* check totally zeroed log */
1550 bp = xlog_get_bp(log, 1); 1470 buffer = xlog_alloc_buffer(log, 1);
1551 if (!bp) 1471 if (!buffer)
1552 return -ENOMEM; 1472 return -ENOMEM;
1553 error = xlog_bread(log, 0, 1, bp, &offset); 1473 error = xlog_bread(log, 0, 1, buffer, &offset);
1554 if (error) 1474 if (error)
1555 goto bp_err; 1475 goto out_free_buffer;
1556 1476
1557 first_cycle = xlog_get_cycle(offset); 1477 first_cycle = xlog_get_cycle(offset);
1558 if (first_cycle == 0) { /* completely zeroed log */ 1478 if (first_cycle == 0) { /* completely zeroed log */
1559 *blk_no = 0; 1479 *blk_no = 0;
1560 xlog_put_bp(bp); 1480 kmem_free(buffer);
1561 return 1; 1481 return 1;
1562 } 1482 }
1563 1483
1564 /* check partially zeroed log */ 1484 /* check partially zeroed log */
1565 error = xlog_bread(log, log_bbnum-1, 1, bp, &offset); 1485 error = xlog_bread(log, log_bbnum-1, 1, buffer, &offset);
1566 if (error) 1486 if (error)
1567 goto bp_err; 1487 goto out_free_buffer;
1568 1488
1569 last_cycle = xlog_get_cycle(offset); 1489 last_cycle = xlog_get_cycle(offset);
1570 if (last_cycle != 0) { /* log completely written to */ 1490 if (last_cycle != 0) { /* log completely written to */
1571 xlog_put_bp(bp); 1491 kmem_free(buffer);
1572 return 0; 1492 return 0;
1573 } 1493 }
1574 1494
1575 /* we have a partially zeroed log */ 1495 /* we have a partially zeroed log */
1576 last_blk = log_bbnum-1; 1496 last_blk = log_bbnum-1;
1577 if ((error = xlog_find_cycle_start(log, bp, 0, &last_blk, 0))) 1497 error = xlog_find_cycle_start(log, buffer, 0, &last_blk, 0);
1578 goto bp_err; 1498 if (error)
1499 goto out_free_buffer;
1579 1500
1580 /* 1501 /*
1581 * Validate the answer. Because there is no way to guarantee that 1502 * Validate the answer. Because there is no way to guarantee that
@@ -1598,7 +1519,7 @@ xlog_find_zeroed(
1598 */ 1519 */
1599 if ((error = xlog_find_verify_cycle(log, start_blk, 1520 if ((error = xlog_find_verify_cycle(log, start_blk,
1600 (int)num_scan_bblks, 0, &new_blk))) 1521 (int)num_scan_bblks, 0, &new_blk)))
1601 goto bp_err; 1522 goto out_free_buffer;
1602 if (new_blk != -1) 1523 if (new_blk != -1)
1603 last_blk = new_blk; 1524 last_blk = new_blk;
1604 1525
@@ -1610,11 +1531,11 @@ xlog_find_zeroed(
1610 if (error == 1) 1531 if (error == 1)
1611 error = -EIO; 1532 error = -EIO;
1612 if (error) 1533 if (error)
1613 goto bp_err; 1534 goto out_free_buffer;
1614 1535
1615 *blk_no = last_blk; 1536 *blk_no = last_blk;
1616bp_err: 1537out_free_buffer:
1617 xlog_put_bp(bp); 1538 kmem_free(buffer);
1618 if (error) 1539 if (error)
1619 return error; 1540 return error;
1620 return 1; 1541 return 1;
@@ -1657,7 +1578,7 @@ xlog_write_log_records(
1657 int tail_block) 1578 int tail_block)
1658{ 1579{
1659 char *offset; 1580 char *offset;
1660 xfs_buf_t *bp; 1581 char *buffer;
1661 int balign, ealign; 1582 int balign, ealign;
1662 int sectbb = log->l_sectBBsize; 1583 int sectbb = log->l_sectBBsize;
1663 int end_block = start_block + blocks; 1584 int end_block = start_block + blocks;
@@ -1674,7 +1595,7 @@ xlog_write_log_records(
1674 bufblks = 1 << ffs(blocks); 1595 bufblks = 1 << ffs(blocks);
1675 while (bufblks > log->l_logBBsize) 1596 while (bufblks > log->l_logBBsize)
1676 bufblks >>= 1; 1597 bufblks >>= 1;
1677 while (!(bp = xlog_get_bp(log, bufblks))) { 1598 while (!(buffer = xlog_alloc_buffer(log, bufblks))) {
1678 bufblks >>= 1; 1599 bufblks >>= 1;
1679 if (bufblks < sectbb) 1600 if (bufblks < sectbb)
1680 return -ENOMEM; 1601 return -ENOMEM;
@@ -1686,9 +1607,9 @@ xlog_write_log_records(
1686 */ 1607 */
1687 balign = round_down(start_block, sectbb); 1608 balign = round_down(start_block, sectbb);
1688 if (balign != start_block) { 1609 if (balign != start_block) {
1689 error = xlog_bread_noalign(log, start_block, 1, bp); 1610 error = xlog_bread_noalign(log, start_block, 1, buffer);
1690 if (error) 1611 if (error)
1691 goto out_put_bp; 1612 goto out_free_buffer;
1692 1613
1693 j = start_block - balign; 1614 j = start_block - balign;
1694 } 1615 }
@@ -1705,29 +1626,28 @@ xlog_write_log_records(
1705 */ 1626 */
1706 ealign = round_down(end_block, sectbb); 1627 ealign = round_down(end_block, sectbb);
1707 if (j == 0 && (start_block + endcount > ealign)) { 1628 if (j == 0 && (start_block + endcount > ealign)) {
1708 offset = bp->b_addr + BBTOB(ealign - start_block); 1629 error = xlog_bread_noalign(log, ealign, sectbb,
1709 error = xlog_bread_offset(log, ealign, sectbb, 1630 buffer + BBTOB(ealign - start_block));
1710 bp, offset);
1711 if (error) 1631 if (error)
1712 break; 1632 break;
1713 1633
1714 } 1634 }
1715 1635
1716 offset = xlog_align(log, start_block, endcount, bp); 1636 offset = buffer + xlog_align(log, start_block);
1717 for (; j < endcount; j++) { 1637 for (; j < endcount; j++) {
1718 xlog_add_record(log, offset, cycle, i+j, 1638 xlog_add_record(log, offset, cycle, i+j,
1719 tail_cycle, tail_block); 1639 tail_cycle, tail_block);
1720 offset += BBSIZE; 1640 offset += BBSIZE;
1721 } 1641 }
1722 error = xlog_bwrite(log, start_block, endcount, bp); 1642 error = xlog_bwrite(log, start_block, endcount, buffer);
1723 if (error) 1643 if (error)
1724 break; 1644 break;
1725 start_block += endcount; 1645 start_block += endcount;
1726 j = 0; 1646 j = 0;
1727 } 1647 }
1728 1648
1729 out_put_bp: 1649out_free_buffer:
1730 xlog_put_bp(bp); 1650 kmem_free(buffer);
1731 return error; 1651 return error;
1732} 1652}
1733 1653
@@ -2162,7 +2082,7 @@ xlog_recover_do_inode_buffer(
2162 if (xfs_sb_version_hascrc(&mp->m_sb)) 2082 if (xfs_sb_version_hascrc(&mp->m_sb))
2163 bp->b_ops = &xfs_inode_buf_ops; 2083 bp->b_ops = &xfs_inode_buf_ops;
2164 2084
2165 inodes_per_buf = BBTOB(bp->b_io_length) >> mp->m_sb.sb_inodelog; 2085 inodes_per_buf = BBTOB(bp->b_length) >> mp->m_sb.sb_inodelog;
2166 for (i = 0; i < inodes_per_buf; i++) { 2086 for (i = 0; i < inodes_per_buf; i++) {
2167 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) + 2087 next_unlinked_offset = (i * mp->m_sb.sb_inodesize) +
2168 offsetof(xfs_dinode_t, di_next_unlinked); 2088 offsetof(xfs_dinode_t, di_next_unlinked);
@@ -2204,8 +2124,7 @@ xlog_recover_do_inode_buffer(
2204 2124
2205 ASSERT(item->ri_buf[item_index].i_addr != NULL); 2125 ASSERT(item->ri_buf[item_index].i_addr != NULL);
2206 ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0); 2126 ASSERT((item->ri_buf[item_index].i_len % XFS_BLF_CHUNK) == 0);
2207 ASSERT((reg_buf_offset + reg_buf_bytes) <= 2127 ASSERT((reg_buf_offset + reg_buf_bytes) <= BBTOB(bp->b_length));
2208 BBTOB(bp->b_io_length));
2209 2128
2210 /* 2129 /*
2211 * The current logged region contains a copy of the 2130 * The current logged region contains a copy of the
@@ -2670,7 +2589,7 @@ xlog_recover_do_reg_buffer(
2670 ASSERT(nbits > 0); 2589 ASSERT(nbits > 0);
2671 ASSERT(item->ri_buf[i].i_addr != NULL); 2590 ASSERT(item->ri_buf[i].i_addr != NULL);
2672 ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0); 2591 ASSERT(item->ri_buf[i].i_len % XFS_BLF_CHUNK == 0);
2673 ASSERT(BBTOB(bp->b_io_length) >= 2592 ASSERT(BBTOB(bp->b_length) >=
2674 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT)); 2593 ((uint)bit << XFS_BLF_SHIFT) + (nbits << XFS_BLF_SHIFT));
2675 2594
2676 /* 2595 /*
@@ -2882,23 +2801,22 @@ xlog_recover_buffer_pass2(
2882 * 2801 *
2883 * Also make sure that only inode buffers with good sizes stay in 2802 * Also make sure that only inode buffers with good sizes stay in
2884 * the buffer cache. The kernel moves inodes in buffers of 1 block 2803 * the buffer cache. The kernel moves inodes in buffers of 1 block
2885 * or mp->m_inode_cluster_size bytes, whichever is bigger. The inode 2804 * or inode_cluster_size bytes, whichever is bigger. The inode
2886 * buffers in the log can be a different size if the log was generated 2805 * buffers in the log can be a different size if the log was generated
2887 * by an older kernel using unclustered inode buffers or a newer kernel 2806 * by an older kernel using unclustered inode buffers or a newer kernel
2888 * running with a different inode cluster size. Regardless, if the 2807 * running with a different inode cluster size. Regardless, if the
2889 * the inode buffer size isn't max(blocksize, mp->m_inode_cluster_size) 2808 * the inode buffer size isn't max(blocksize, inode_cluster_size)
2890 * for *our* value of mp->m_inode_cluster_size, then we need to keep 2809 * for *our* value of inode_cluster_size, then we need to keep
2891 * the buffer out of the buffer cache so that the buffer won't 2810 * the buffer out of the buffer cache so that the buffer won't
2892 * overlap with future reads of those inodes. 2811 * overlap with future reads of those inodes.
2893 */ 2812 */
2894 if (XFS_DINODE_MAGIC == 2813 if (XFS_DINODE_MAGIC ==
2895 be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) && 2814 be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
2896 (BBTOB(bp->b_io_length) != max(log->l_mp->m_sb.sb_blocksize, 2815 (BBTOB(bp->b_length) != M_IGEO(log->l_mp)->inode_cluster_size)) {
2897 (uint32_t)log->l_mp->m_inode_cluster_size))) {
2898 xfs_buf_stale(bp); 2816 xfs_buf_stale(bp);
2899 error = xfs_bwrite(bp); 2817 error = xfs_bwrite(bp);
2900 } else { 2818 } else {
2901 ASSERT(bp->b_target->bt_mount == mp); 2819 ASSERT(bp->b_mount == mp);
2902 bp->b_iodone = xlog_recover_iodone; 2820 bp->b_iodone = xlog_recover_iodone;
2903 xfs_buf_delwri_queue(bp, buffer_list); 2821 xfs_buf_delwri_queue(bp, buffer_list);
2904 } 2822 }
@@ -3260,7 +3178,7 @@ out_owner_change:
3260 /* re-generate the checksum. */ 3178 /* re-generate the checksum. */
3261 xfs_dinode_calc_crc(log->l_mp, dip); 3179 xfs_dinode_calc_crc(log->l_mp, dip);
3262 3180
3263 ASSERT(bp->b_target->bt_mount == mp); 3181 ASSERT(bp->b_mount == mp);
3264 bp->b_iodone = xlog_recover_iodone; 3182 bp->b_iodone = xlog_recover_iodone;
3265 xfs_buf_delwri_queue(bp, buffer_list); 3183 xfs_buf_delwri_queue(bp, buffer_list);
3266 3184
@@ -3399,7 +3317,7 @@ xlog_recover_dquot_pass2(
3399 } 3317 }
3400 3318
3401 ASSERT(dq_f->qlf_size == 2); 3319 ASSERT(dq_f->qlf_size == 2);
3402 ASSERT(bp->b_target->bt_mount == mp); 3320 ASSERT(bp->b_mount == mp);
3403 bp->b_iodone = xlog_recover_iodone; 3321 bp->b_iodone = xlog_recover_iodone;
3404 xfs_buf_delwri_queue(bp, buffer_list); 3322 xfs_buf_delwri_queue(bp, buffer_list);
3405 3323
@@ -3463,7 +3381,7 @@ xlog_recover_efd_pass2(
3463{ 3381{
3464 xfs_efd_log_format_t *efd_formatp; 3382 xfs_efd_log_format_t *efd_formatp;
3465 xfs_efi_log_item_t *efip = NULL; 3383 xfs_efi_log_item_t *efip = NULL;
3466 xfs_log_item_t *lip; 3384 struct xfs_log_item *lip;
3467 uint64_t efi_id; 3385 uint64_t efi_id;
3468 struct xfs_ail_cursor cur; 3386 struct xfs_ail_cursor cur;
3469 struct xfs_ail *ailp = log->l_ailp; 3387 struct xfs_ail *ailp = log->l_ailp;
@@ -3849,6 +3767,7 @@ xlog_recover_do_icreate_pass2(
3849{ 3767{
3850 struct xfs_mount *mp = log->l_mp; 3768 struct xfs_mount *mp = log->l_mp;
3851 struct xfs_icreate_log *icl; 3769 struct xfs_icreate_log *icl;
3770 struct xfs_ino_geometry *igeo = M_IGEO(mp);
3852 xfs_agnumber_t agno; 3771 xfs_agnumber_t agno;
3853 xfs_agblock_t agbno; 3772 xfs_agblock_t agbno;
3854 unsigned int count; 3773 unsigned int count;
@@ -3898,10 +3817,10 @@ xlog_recover_do_icreate_pass2(
3898 3817
3899 /* 3818 /*
3900 * The inode chunk is either full or sparse and we only support 3819 * The inode chunk is either full or sparse and we only support
3901 * m_ialloc_min_blks sized sparse allocations at this time. 3820 * m_ino_geo.ialloc_min_blks sized sparse allocations at this time.
3902 */ 3821 */
3903 if (length != mp->m_ialloc_blks && 3822 if (length != igeo->ialloc_blks &&
3904 length != mp->m_ialloc_min_blks) { 3823 length != igeo->ialloc_min_blks) {
3905 xfs_warn(log->l_mp, 3824 xfs_warn(log->l_mp,
3906 "%s: unsupported chunk length", __FUNCTION__); 3825 "%s: unsupported chunk length", __FUNCTION__);
3907 return -EINVAL; 3826 return -EINVAL;
@@ -3921,13 +3840,13 @@ xlog_recover_do_icreate_pass2(
3921 * buffers for cancellation so we don't overwrite anything written after 3840 * buffers for cancellation so we don't overwrite anything written after
3922 * a cancellation. 3841 * a cancellation.
3923 */ 3842 */
3924 bb_per_cluster = XFS_FSB_TO_BB(mp, mp->m_blocks_per_cluster); 3843 bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
3925 nbufs = length / mp->m_blocks_per_cluster; 3844 nbufs = length / igeo->blocks_per_cluster;
3926 for (i = 0, cancel_count = 0; i < nbufs; i++) { 3845 for (i = 0, cancel_count = 0; i < nbufs; i++) {
3927 xfs_daddr_t daddr; 3846 xfs_daddr_t daddr;
3928 3847
3929 daddr = XFS_AGB_TO_DADDR(mp, agno, 3848 daddr = XFS_AGB_TO_DADDR(mp, agno,
3930 agbno + i * mp->m_blocks_per_cluster); 3849 agbno + i * igeo->blocks_per_cluster);
3931 if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0)) 3850 if (xlog_check_buffer_cancelled(log, daddr, bb_per_cluster, 0))
3932 cancel_count++; 3851 cancel_count++;
3933 } 3852 }
@@ -4956,12 +4875,11 @@ out:
4956 * A cancel occurs when the mount has failed and we're bailing out. 4875 * A cancel occurs when the mount has failed and we're bailing out.
4957 * Release all pending log intent items so they don't pin the AIL. 4876 * Release all pending log intent items so they don't pin the AIL.
4958 */ 4877 */
4959STATIC int 4878STATIC void
4960xlog_recover_cancel_intents( 4879xlog_recover_cancel_intents(
4961 struct xlog *log) 4880 struct xlog *log)
4962{ 4881{
4963 struct xfs_log_item *lip; 4882 struct xfs_log_item *lip;
4964 int error = 0;
4965 struct xfs_ail_cursor cur; 4883 struct xfs_ail_cursor cur;
4966 struct xfs_ail *ailp; 4884 struct xfs_ail *ailp;
4967 4885
@@ -5001,7 +4919,6 @@ xlog_recover_cancel_intents(
5001 4919
5002 xfs_trans_ail_cursor_done(&cur); 4920 xfs_trans_ail_cursor_done(&cur);
5003 spin_unlock(&ailp->ail_lock); 4921 spin_unlock(&ailp->ail_lock);
5004 return error;
5005} 4922}
5006 4923
5007/* 4924/*
@@ -5307,7 +5224,7 @@ xlog_do_recovery_pass(
5307 xfs_daddr_t blk_no, rblk_no; 5224 xfs_daddr_t blk_no, rblk_no;
5308 xfs_daddr_t rhead_blk; 5225 xfs_daddr_t rhead_blk;
5309 char *offset; 5226 char *offset;
5310 xfs_buf_t *hbp, *dbp; 5227 char *hbp, *dbp;
5311 int error = 0, h_size, h_len; 5228 int error = 0, h_size, h_len;
5312 int error2 = 0; 5229 int error2 = 0;
5313 int bblks, split_bblks; 5230 int bblks, split_bblks;
@@ -5332,7 +5249,7 @@ xlog_do_recovery_pass(
5332 * iclog header and extract the header size from it. Get a 5249 * iclog header and extract the header size from it. Get a
5333 * new hbp that is the correct size. 5250 * new hbp that is the correct size.
5334 */ 5251 */
5335 hbp = xlog_get_bp(log, 1); 5252 hbp = xlog_alloc_buffer(log, 1);
5336 if (!hbp) 5253 if (!hbp)
5337 return -ENOMEM; 5254 return -ENOMEM;
5338 5255
@@ -5374,23 +5291,23 @@ xlog_do_recovery_pass(
5374 hblks = h_size / XLOG_HEADER_CYCLE_SIZE; 5291 hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
5375 if (h_size % XLOG_HEADER_CYCLE_SIZE) 5292 if (h_size % XLOG_HEADER_CYCLE_SIZE)
5376 hblks++; 5293 hblks++;
5377 xlog_put_bp(hbp); 5294 kmem_free(hbp);
5378 hbp = xlog_get_bp(log, hblks); 5295 hbp = xlog_alloc_buffer(log, hblks);
5379 } else { 5296 } else {
5380 hblks = 1; 5297 hblks = 1;
5381 } 5298 }
5382 } else { 5299 } else {
5383 ASSERT(log->l_sectBBsize == 1); 5300 ASSERT(log->l_sectBBsize == 1);
5384 hblks = 1; 5301 hblks = 1;
5385 hbp = xlog_get_bp(log, 1); 5302 hbp = xlog_alloc_buffer(log, 1);
5386 h_size = XLOG_BIG_RECORD_BSIZE; 5303 h_size = XLOG_BIG_RECORD_BSIZE;
5387 } 5304 }
5388 5305
5389 if (!hbp) 5306 if (!hbp)
5390 return -ENOMEM; 5307 return -ENOMEM;
5391 dbp = xlog_get_bp(log, BTOBB(h_size)); 5308 dbp = xlog_alloc_buffer(log, BTOBB(h_size));
5392 if (!dbp) { 5309 if (!dbp) {
5393 xlog_put_bp(hbp); 5310 kmem_free(hbp);
5394 return -ENOMEM; 5311 return -ENOMEM;
5395 } 5312 }
5396 5313
@@ -5405,7 +5322,7 @@ xlog_do_recovery_pass(
5405 /* 5322 /*
5406 * Check for header wrapping around physical end-of-log 5323 * Check for header wrapping around physical end-of-log
5407 */ 5324 */
5408 offset = hbp->b_addr; 5325 offset = hbp;
5409 split_hblks = 0; 5326 split_hblks = 0;
5410 wrapped_hblks = 0; 5327 wrapped_hblks = 0;
5411 if (blk_no + hblks <= log->l_logBBsize) { 5328 if (blk_no + hblks <= log->l_logBBsize) {
@@ -5441,8 +5358,8 @@ xlog_do_recovery_pass(
5441 * - order is important. 5358 * - order is important.
5442 */ 5359 */
5443 wrapped_hblks = hblks - split_hblks; 5360 wrapped_hblks = hblks - split_hblks;
5444 error = xlog_bread_offset(log, 0, 5361 error = xlog_bread_noalign(log, 0,
5445 wrapped_hblks, hbp, 5362 wrapped_hblks,
5446 offset + BBTOB(split_hblks)); 5363 offset + BBTOB(split_hblks));
5447 if (error) 5364 if (error)
5448 goto bread_err2; 5365 goto bread_err2;
@@ -5473,7 +5390,7 @@ xlog_do_recovery_pass(
5473 } else { 5390 } else {
5474 /* This log record is split across the 5391 /* This log record is split across the
5475 * physical end of log */ 5392 * physical end of log */
5476 offset = dbp->b_addr; 5393 offset = dbp;
5477 split_bblks = 0; 5394 split_bblks = 0;
5478 if (blk_no != log->l_logBBsize) { 5395 if (blk_no != log->l_logBBsize) {
5479 /* some data is before the physical 5396 /* some data is before the physical
@@ -5502,8 +5419,8 @@ xlog_do_recovery_pass(
5502 * _first_, then the log start (LR header end) 5419 * _first_, then the log start (LR header end)
5503 * - order is important. 5420 * - order is important.
5504 */ 5421 */
5505 error = xlog_bread_offset(log, 0, 5422 error = xlog_bread_noalign(log, 0,
5506 bblks - split_bblks, dbp, 5423 bblks - split_bblks,
5507 offset + BBTOB(split_bblks)); 5424 offset + BBTOB(split_bblks));
5508 if (error) 5425 if (error)
5509 goto bread_err2; 5426 goto bread_err2;
@@ -5551,9 +5468,9 @@ xlog_do_recovery_pass(
5551 } 5468 }
5552 5469
5553 bread_err2: 5470 bread_err2:
5554 xlog_put_bp(dbp); 5471 kmem_free(dbp);
5555 bread_err1: 5472 bread_err1:
5556 xlog_put_bp(hbp); 5473 kmem_free(hbp);
5557 5474
5558 /* 5475 /*
5559 * Submit buffers that have been added from the last record processed, 5476 * Submit buffers that have been added from the last record processed,
@@ -5687,7 +5604,7 @@ xlog_do_recover(
5687 * Now that we've finished replaying all buffer and inode 5604 * Now that we've finished replaying all buffer and inode
5688 * updates, re-read in the superblock and reverify it. 5605 * updates, re-read in the superblock and reverify it.
5689 */ 5606 */
5690 bp = xfs_getsb(mp, 0); 5607 bp = xfs_getsb(mp);
5691 bp->b_flags &= ~(XBF_DONE | XBF_ASYNC); 5608 bp->b_flags &= ~(XBF_DONE | XBF_ASYNC);
5692 ASSERT(!(bp->b_flags & XBF_WRITE)); 5609 ASSERT(!(bp->b_flags & XBF_WRITE));
5693 bp->b_flags |= XBF_READ; 5610 bp->b_flags |= XBF_READ;
@@ -5860,16 +5777,12 @@ xlog_recover_finish(
5860 return 0; 5777 return 0;
5861} 5778}
5862 5779
5863int 5780void
5864xlog_recover_cancel( 5781xlog_recover_cancel(
5865 struct xlog *log) 5782 struct xlog *log)
5866{ 5783{
5867 int error = 0;
5868
5869 if (log->l_flags & XLOG_RECOVERY_NEEDED) 5784 if (log->l_flags & XLOG_RECOVERY_NEEDED)
5870 error = xlog_recover_cancel_intents(log); 5785 xlog_recover_cancel_intents(log);
5871
5872 return error;
5873} 5786}
5874 5787
5875#if defined(DEBUG) 5788#if defined(DEBUG)
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 6b736ea58d35..9804efe525a9 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -6,8 +6,8 @@
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_fs.h" 7#include "xfs_fs.h"
8#include "xfs_error.h" 8#include "xfs_error.h"
9#include "xfs_shared.h"
9#include "xfs_format.h" 10#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_mount.h" 12#include "xfs_mount.h"
13 13
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 6b2bfe81dc51..322da6909290 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -12,9 +12,6 @@
12#include "xfs_bit.h" 12#include "xfs_bit.h"
13#include "xfs_sb.h" 13#include "xfs_sb.h"
14#include "xfs_mount.h" 14#include "xfs_mount.h"
15#include "xfs_defer.h"
16#include "xfs_da_format.h"
17#include "xfs_da_btree.h"
18#include "xfs_inode.h" 15#include "xfs_inode.h"
19#include "xfs_dir2.h" 16#include "xfs_dir2.h"
20#include "xfs_ialloc.h" 17#include "xfs_ialloc.h"
@@ -27,7 +24,6 @@
27#include "xfs_error.h" 24#include "xfs_error.h"
28#include "xfs_quota.h" 25#include "xfs_quota.h"
29#include "xfs_fsops.h" 26#include "xfs_fsops.h"
30#include "xfs_trace.h"
31#include "xfs_icache.h" 27#include "xfs_icache.h"
32#include "xfs_sysfs.h" 28#include "xfs_sysfs.h"
33#include "xfs_rmap_btree.h" 29#include "xfs_rmap_btree.h"
@@ -430,30 +426,6 @@ xfs_update_alignment(xfs_mount_t *mp)
430} 426}
431 427
432/* 428/*
433 * Set the maximum inode count for this filesystem
434 */
435STATIC void
436xfs_set_maxicount(xfs_mount_t *mp)
437{
438 xfs_sb_t *sbp = &(mp->m_sb);
439 uint64_t icount;
440
441 if (sbp->sb_imax_pct) {
442 /*
443 * Make sure the maximum inode count is a multiple
444 * of the units we allocate inodes in.
445 */
446 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
447 do_div(icount, 100);
448 do_div(icount, mp->m_ialloc_blks);
449 mp->m_maxicount = (icount * mp->m_ialloc_blks) <<
450 sbp->sb_inopblog;
451 } else {
452 mp->m_maxicount = 0;
453 }
454}
455
456/*
457 * Set the default minimum read and write sizes unless 429 * Set the default minimum read and write sizes unless
458 * already specified in a mount option. 430 * already specified in a mount option.
459 * We use smaller I/O sizes when the file system 431 * We use smaller I/O sizes when the file system
@@ -509,29 +481,6 @@ xfs_set_low_space_thresholds(
509 } 481 }
510} 482}
511 483
512
513/*
514 * Set whether we're using inode alignment.
515 */
516STATIC void
517xfs_set_inoalignment(xfs_mount_t *mp)
518{
519 if (xfs_sb_version_hasalign(&mp->m_sb) &&
520 mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp))
521 mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1;
522 else
523 mp->m_inoalign_mask = 0;
524 /*
525 * If we are using stripe alignment, check whether
526 * the stripe unit is a multiple of the inode alignment
527 */
528 if (mp->m_dalign && mp->m_inoalign_mask &&
529 !(mp->m_dalign & mp->m_inoalign_mask))
530 mp->m_sinoalign = mp->m_dalign;
531 else
532 mp->m_sinoalign = 0;
533}
534
535/* 484/*
536 * Check that the data (and log if separate) is an ok size. 485 * Check that the data (and log if separate) is an ok size.
537 */ 486 */
@@ -683,6 +632,7 @@ xfs_mountfs(
683{ 632{
684 struct xfs_sb *sbp = &(mp->m_sb); 633 struct xfs_sb *sbp = &(mp->m_sb);
685 struct xfs_inode *rip; 634 struct xfs_inode *rip;
635 struct xfs_ino_geometry *igeo = M_IGEO(mp);
686 uint64_t resblks; 636 uint64_t resblks;
687 uint quotamount = 0; 637 uint quotamount = 0;
688 uint quotaflags = 0; 638 uint quotaflags = 0;
@@ -749,12 +699,10 @@ xfs_mountfs(
749 xfs_alloc_compute_maxlevels(mp); 699 xfs_alloc_compute_maxlevels(mp);
750 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK); 700 xfs_bmap_compute_maxlevels(mp, XFS_DATA_FORK);
751 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK); 701 xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
752 xfs_ialloc_compute_maxlevels(mp); 702 xfs_ialloc_setup_geometry(mp);
753 xfs_rmapbt_compute_maxlevels(mp); 703 xfs_rmapbt_compute_maxlevels(mp);
754 xfs_refcountbt_compute_maxlevels(mp); 704 xfs_refcountbt_compute_maxlevels(mp);
755 705
756 xfs_set_maxicount(mp);
757
758 /* enable fail_at_unmount as default */ 706 /* enable fail_at_unmount as default */
759 mp->m_fail_unmount = true; 707 mp->m_fail_unmount = true;
760 708
@@ -788,50 +736,22 @@ xfs_mountfs(
788 xfs_set_low_space_thresholds(mp); 736 xfs_set_low_space_thresholds(mp);
789 737
790 /* 738 /*
791 * Set the inode cluster size.
792 * This may still be overridden by the file system
793 * block size if it is larger than the chosen cluster size.
794 *
795 * For v5 filesystems, scale the cluster size with the inode size to
796 * keep a constant ratio of inode per cluster buffer, but only if mkfs
797 * has set the inode alignment value appropriately for larger cluster
798 * sizes.
799 */
800 mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
801 if (xfs_sb_version_hascrc(&mp->m_sb)) {
802 int new_size = mp->m_inode_cluster_size;
803
804 new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
805 if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
806 mp->m_inode_cluster_size = new_size;
807 }
808 mp->m_blocks_per_cluster = xfs_icluster_size_fsb(mp);
809 mp->m_inodes_per_cluster = XFS_FSB_TO_INO(mp, mp->m_blocks_per_cluster);
810 mp->m_cluster_align = xfs_ialloc_cluster_alignment(mp);
811 mp->m_cluster_align_inodes = XFS_FSB_TO_INO(mp, mp->m_cluster_align);
812
813 /*
814 * If enabled, sparse inode chunk alignment is expected to match the 739 * If enabled, sparse inode chunk alignment is expected to match the
815 * cluster size. Full inode chunk alignment must match the chunk size, 740 * cluster size. Full inode chunk alignment must match the chunk size,
816 * but that is checked on sb read verification... 741 * but that is checked on sb read verification...
817 */ 742 */
818 if (xfs_sb_version_hassparseinodes(&mp->m_sb) && 743 if (xfs_sb_version_hassparseinodes(&mp->m_sb) &&
819 mp->m_sb.sb_spino_align != 744 mp->m_sb.sb_spino_align !=
820 XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) { 745 XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw)) {
821 xfs_warn(mp, 746 xfs_warn(mp,
822 "Sparse inode block alignment (%u) must match cluster size (%llu).", 747 "Sparse inode block alignment (%u) must match cluster size (%llu).",
823 mp->m_sb.sb_spino_align, 748 mp->m_sb.sb_spino_align,
824 XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)); 749 XFS_B_TO_FSBT(mp, igeo->inode_cluster_size_raw));
825 error = -EINVAL; 750 error = -EINVAL;
826 goto out_remove_uuid; 751 goto out_remove_uuid;
827 } 752 }
828 753
829 /* 754 /*
830 * Set inode alignment fields
831 */
832 xfs_set_inoalignment(mp);
833
834 /*
835 * Check that the data (and log if separate) is an ok size. 755 * Check that the data (and log if separate) is an ok size.
836 */ 756 */
837 error = xfs_check_sizes(mp); 757 error = xfs_check_sizes(mp);
@@ -1385,24 +1305,14 @@ xfs_mod_frextents(
1385 * xfs_getsb() is called to obtain the buffer for the superblock. 1305 * xfs_getsb() is called to obtain the buffer for the superblock.
1386 * The buffer is returned locked and read in from disk. 1306 * The buffer is returned locked and read in from disk.
1387 * The buffer should be released with a call to xfs_brelse(). 1307 * The buffer should be released with a call to xfs_brelse().
1388 *
1389 * If the flags parameter is BUF_TRYLOCK, then we'll only return
1390 * the superblock buffer if it can be locked without sleeping.
1391 * If it can't then we'll return NULL.
1392 */ 1308 */
1393struct xfs_buf * 1309struct xfs_buf *
1394xfs_getsb( 1310xfs_getsb(
1395 struct xfs_mount *mp, 1311 struct xfs_mount *mp)
1396 int flags)
1397{ 1312{
1398 struct xfs_buf *bp = mp->m_sb_bp; 1313 struct xfs_buf *bp = mp->m_sb_bp;
1399 1314
1400 if (!xfs_buf_trylock(bp)) { 1315 xfs_buf_lock(bp);
1401 if (flags & XBF_TRYLOCK)
1402 return NULL;
1403 xfs_buf_lock(bp);
1404 }
1405
1406 xfs_buf_hold(bp); 1316 xfs_buf_hold(bp);
1407 ASSERT(bp->b_flags & XBF_DONE); 1317 ASSERT(bp->b_flags & XBF_DONE);
1408 return bp; 1318 return bp;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index c81a5cd7c228..4adb6837439a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -105,6 +105,7 @@ typedef struct xfs_mount {
105 struct xfs_da_geometry *m_dir_geo; /* directory block geometry */ 105 struct xfs_da_geometry *m_dir_geo; /* directory block geometry */
106 struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */ 106 struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */
107 struct xlog *m_log; /* log specific stuff */ 107 struct xlog *m_log; /* log specific stuff */
108 struct xfs_ino_geometry m_ino_geo; /* inode geometry */
108 int m_logbufs; /* number of log buffers */ 109 int m_logbufs; /* number of log buffers */
109 int m_logbsize; /* size of each log buffer */ 110 int m_logbsize; /* size of each log buffer */
110 uint m_rsumlevels; /* rt summary levels */ 111 uint m_rsumlevels; /* rt summary levels */
@@ -126,12 +127,6 @@ typedef struct xfs_mount {
126 uint8_t m_blkbit_log; /* blocklog + NBBY */ 127 uint8_t m_blkbit_log; /* blocklog + NBBY */
127 uint8_t m_blkbb_log; /* blocklog - BBSHIFT */ 128 uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
128 uint8_t m_agno_log; /* log #ag's */ 129 uint8_t m_agno_log; /* log #ag's */
129 uint8_t m_agino_log; /* #bits for agino in inum */
130 uint m_inode_cluster_size;/* min inode buf size */
131 unsigned int m_inodes_per_cluster;
132 unsigned int m_blocks_per_cluster;
133 unsigned int m_cluster_align;
134 unsigned int m_cluster_align_inodes;
135 uint m_blockmask; /* sb_blocksize-1 */ 130 uint m_blockmask; /* sb_blocksize-1 */
136 uint m_blockwsize; /* sb_blocksize in words */ 131 uint m_blockwsize; /* sb_blocksize in words */
137 uint m_blockwmask; /* blockwsize-1 */ 132 uint m_blockwmask; /* blockwsize-1 */
@@ -139,15 +134,12 @@ typedef struct xfs_mount {
139 uint m_alloc_mnr[2]; /* min alloc btree records */ 134 uint m_alloc_mnr[2]; /* min alloc btree records */
140 uint m_bmap_dmxr[2]; /* max bmap btree records */ 135 uint m_bmap_dmxr[2]; /* max bmap btree records */
141 uint m_bmap_dmnr[2]; /* min bmap btree records */ 136 uint m_bmap_dmnr[2]; /* min bmap btree records */
142 uint m_inobt_mxr[2]; /* max inobt btree records */
143 uint m_inobt_mnr[2]; /* min inobt btree records */
144 uint m_rmap_mxr[2]; /* max rmap btree records */ 137 uint m_rmap_mxr[2]; /* max rmap btree records */
145 uint m_rmap_mnr[2]; /* min rmap btree records */ 138 uint m_rmap_mnr[2]; /* min rmap btree records */
146 uint m_refc_mxr[2]; /* max refc btree records */ 139 uint m_refc_mxr[2]; /* max refc btree records */
147 uint m_refc_mnr[2]; /* min refc btree records */ 140 uint m_refc_mnr[2]; /* min refc btree records */
148 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ 141 uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */
149 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ 142 uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
150 uint m_in_maxlevels; /* max inobt btree levels. */
151 uint m_rmap_maxlevels; /* max rmap btree levels */ 143 uint m_rmap_maxlevels; /* max rmap btree levels */
152 uint m_refc_maxlevels; /* max refcount btree level */ 144 uint m_refc_maxlevels; /* max refcount btree level */
153 xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ 145 xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */
@@ -159,20 +151,13 @@ typedef struct xfs_mount {
159 int m_fixedfsid[2]; /* unchanged for life of FS */ 151 int m_fixedfsid[2]; /* unchanged for life of FS */
160 uint64_t m_flags; /* global mount flags */ 152 uint64_t m_flags; /* global mount flags */
161 bool m_finobt_nores; /* no per-AG finobt resv. */ 153 bool m_finobt_nores; /* no per-AG finobt resv. */
162 int m_ialloc_inos; /* inodes in inode allocation */
163 int m_ialloc_blks; /* blocks in inode allocation */
164 int m_ialloc_min_blks;/* min blocks in sparse inode
165 * allocation */
166 int m_inoalign_mask;/* mask sb_inoalignmt if used */
167 uint m_qflags; /* quota status flags */ 154 uint m_qflags; /* quota status flags */
168 struct xfs_trans_resv m_resv; /* precomputed res values */ 155 struct xfs_trans_resv m_resv; /* precomputed res values */
169 uint64_t m_maxicount; /* maximum inode count */
170 uint64_t m_resblks; /* total reserved blocks */ 156 uint64_t m_resblks; /* total reserved blocks */
171 uint64_t m_resblks_avail;/* available reserved blocks */ 157 uint64_t m_resblks_avail;/* available reserved blocks */
172 uint64_t m_resblks_save; /* reserved blks @ remount,ro */ 158 uint64_t m_resblks_save; /* reserved blks @ remount,ro */
173 int m_dalign; /* stripe unit */ 159 int m_dalign; /* stripe unit */
174 int m_swidth; /* stripe width */ 160 int m_swidth; /* stripe width */
175 int m_sinoalign; /* stripe unit inode alignment */
176 uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ 161 uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
177 const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ 162 const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
178 const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */ 163 const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */
@@ -198,7 +183,6 @@ typedef struct xfs_mount {
198 struct workqueue_struct *m_unwritten_workqueue; 183 struct workqueue_struct *m_unwritten_workqueue;
199 struct workqueue_struct *m_cil_workqueue; 184 struct workqueue_struct *m_cil_workqueue;
200 struct workqueue_struct *m_reclaim_workqueue; 185 struct workqueue_struct *m_reclaim_workqueue;
201 struct workqueue_struct *m_log_workqueue;
202 struct workqueue_struct *m_eofblocks_workqueue; 186 struct workqueue_struct *m_eofblocks_workqueue;
203 struct workqueue_struct *m_sync_workqueue; 187 struct workqueue_struct *m_sync_workqueue;
204 188
@@ -226,6 +210,8 @@ typedef struct xfs_mount {
226#endif 210#endif
227} xfs_mount_t; 211} xfs_mount_t;
228 212
213#define M_IGEO(mp) (&(mp)->m_ino_geo)
214
229/* 215/*
230 * Flags for m_flags. 216 * Flags for m_flags.
231 */ 217 */
@@ -465,7 +451,7 @@ extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
465 bool reserved); 451 bool reserved);
466extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta); 452extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
467 453
468extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); 454extern struct xfs_buf *xfs_getsb(xfs_mount_t *);
469extern int xfs_readsb(xfs_mount_t *, int); 455extern int xfs_readsb(xfs_mount_t *, int);
470extern void xfs_freesb(xfs_mount_t *); 456extern void xfs_freesb(xfs_mount_t *);
471extern bool xfs_fs_writable(struct xfs_mount *mp, int level); 457extern bool xfs_fs_writable(struct xfs_mount *mp, int level);
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index c8ba98fae30a..b6701b4f59a9 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -146,6 +146,11 @@ xfs_check_ondisk_structs(void)
146 XFS_CHECK_OFFSET(struct xfs_dir3_data_hdr, hdr.magic, 0); 146 XFS_CHECK_OFFSET(struct xfs_dir3_data_hdr, hdr.magic, 0);
147 XFS_CHECK_OFFSET(struct xfs_dir3_free, hdr.hdr.magic, 0); 147 XFS_CHECK_OFFSET(struct xfs_dir3_free, hdr.hdr.magic, 0);
148 XFS_CHECK_OFFSET(struct xfs_attr3_leafblock, hdr.info.hdr, 0); 148 XFS_CHECK_OFFSET(struct xfs_attr3_leafblock, hdr.info.hdr, 0);
149
150 XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat, 192);
151 XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers, 24);
152 XFS_CHECK_STRUCT_SIZE(struct xfs_bulkstat_req, 64);
153 XFS_CHECK_STRUCT_SIZE(struct xfs_inumbers_req, 64);
149} 154}
150 155
151#endif /* __XFS_ONDISK_H */ 156#endif /* __XFS_ONDISK_H */
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index bde2c9f56a46..0c954cad7449 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -2,23 +2,16 @@
2/* 2/*
3 * Copyright (c) 2014 Christoph Hellwig. 3 * Copyright (c) 2014 Christoph Hellwig.
4 */ 4 */
5#include <linux/iomap.h>
6#include "xfs.h" 5#include "xfs.h"
6#include "xfs_shared.h"
7#include "xfs_format.h" 7#include "xfs_format.h"
8#include "xfs_log_format.h" 8#include "xfs_log_format.h"
9#include "xfs_trans_resv.h" 9#include "xfs_trans_resv.h"
10#include "xfs_sb.h"
11#include "xfs_mount.h" 10#include "xfs_mount.h"
12#include "xfs_inode.h" 11#include "xfs_inode.h"
13#include "xfs_trans.h" 12#include "xfs_trans.h"
14#include "xfs_log.h"
15#include "xfs_bmap.h" 13#include "xfs_bmap.h"
16#include "xfs_bmap_util.h"
17#include "xfs_error.h"
18#include "xfs_iomap.h" 14#include "xfs_iomap.h"
19#include "xfs_shared.h"
20#include "xfs_bit.h"
21#include "xfs_pnfs.h"
22 15
23/* 16/*
24 * Ensure that we do not have any outstanding pNFS layouts that can be used by 17 * Ensure that we do not have any outstanding pNFS layouts that can be used by
diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c
new file mode 100644
index 000000000000..4bcc3e61056c
--- /dev/null
+++ b/fs/xfs/xfs_pwork.c
@@ -0,0 +1,136 @@
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2019 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_mount.h"
13#include "xfs_trace.h"
14#include "xfs_sysctl.h"
15#include "xfs_pwork.h"
16#include <linux/nmi.h>
17
18/*
19 * Parallel Work Queue
20 * ===================
21 *
22 * Abstract away the details of running a large and "obviously" parallelizable
23 * task across multiple CPUs. Callers initialize the pwork control object with
24 * a desired level of parallelization and a work function. Next, they embed
25 * struct xfs_pwork in whatever structure they use to pass work context to a
26 * worker thread and queue that pwork. The work function will be passed the
27 * pwork item when it is run (from process context) and any returned error will
28 * be recorded in xfs_pwork_ctl.error. Work functions should check for errors
29 * and abort if necessary; the non-zeroness of xfs_pwork_ctl.error does not
30 * stop workqueue item processing.
31 *
32 * This is the rough equivalent of the xfsprogs workqueue code, though we can't
33 * reuse that name here.
34 */
35
36/* Invoke our caller's function. */
37static void
38xfs_pwork_work(
39 struct work_struct *work)
40{
41 struct xfs_pwork *pwork;
42 struct xfs_pwork_ctl *pctl;
43 int error;
44
45 pwork = container_of(work, struct xfs_pwork, work);
46 pctl = pwork->pctl;
47 error = pctl->work_fn(pctl->mp, pwork);
48 if (error && !pctl->error)
49 pctl->error = error;
50 if (atomic_dec_and_test(&pctl->nr_work))
51 wake_up(&pctl->poll_wait);
52}
53
54/*
55 * Set up control data for parallel work. @work_fn is the function that will
56 * be called. @tag will be written into the kernel threads. @nr_threads is
57 * the level of parallelism desired, or 0 for no limit.
58 */
59int
60xfs_pwork_init(
61 struct xfs_mount *mp,
62 struct xfs_pwork_ctl *pctl,
63 xfs_pwork_work_fn work_fn,
64 const char *tag,
65 unsigned int nr_threads)
66{
67#ifdef DEBUG
68 if (xfs_globals.pwork_threads >= 0)
69 nr_threads = xfs_globals.pwork_threads;
70#endif
71 trace_xfs_pwork_init(mp, nr_threads, current->pid);
72
73 pctl->wq = alloc_workqueue("%s-%d", WQ_FREEZABLE, nr_threads, tag,
74 current->pid);
75 if (!pctl->wq)
76 return -ENOMEM;
77 pctl->work_fn = work_fn;
78 pctl->error = 0;
79 pctl->mp = mp;
80 atomic_set(&pctl->nr_work, 0);
81 init_waitqueue_head(&pctl->poll_wait);
82
83 return 0;
84}
85
86/* Queue some parallel work. */
87void
88xfs_pwork_queue(
89 struct xfs_pwork_ctl *pctl,
90 struct xfs_pwork *pwork)
91{
92 INIT_WORK(&pwork->work, xfs_pwork_work);
93 pwork->pctl = pctl;
94 atomic_inc(&pctl->nr_work);
95 queue_work(pctl->wq, &pwork->work);
96}
97
98/* Wait for the work to finish and tear down the control structure. */
99int
100xfs_pwork_destroy(
101 struct xfs_pwork_ctl *pctl)
102{
103 destroy_workqueue(pctl->wq);
104 pctl->wq = NULL;
105 return pctl->error;
106}
107
108/*
109 * Wait for the work to finish by polling completion status and touch the soft
110 * lockup watchdog. This is for callers such as mount which hold locks.
111 */
112void
113xfs_pwork_poll(
114 struct xfs_pwork_ctl *pctl)
115{
116 while (wait_event_timeout(pctl->poll_wait,
117 atomic_read(&pctl->nr_work) == 0, HZ) == 0)
118 touch_softlockup_watchdog();
119}
120
121/*
122 * Return the amount of parallelism that the data device can handle, or 0 for
123 * no limit.
124 */
125unsigned int
126xfs_pwork_guess_datadev_parallelism(
127 struct xfs_mount *mp)
128{
129 struct xfs_buftarg *btp = mp->m_ddev_targp;
130
131 /*
132 * For now we'll go with the most conservative setting possible,
133 * which is two threads for an SSD and 1 thread everywhere else.
134 */
135 return blk_queue_nonrot(btp->bt_bdev->bd_queue) ? 2 : 1;
136}
diff --git a/fs/xfs/xfs_pwork.h b/fs/xfs/xfs_pwork.h
new file mode 100644
index 000000000000..8133124cf3bb
--- /dev/null
+++ b/fs/xfs/xfs_pwork.h
@@ -0,0 +1,61 @@
1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * Copyright (C) 2019 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#ifndef __XFS_PWORK_H__
7#define __XFS_PWORK_H__
8
9struct xfs_pwork;
10struct xfs_mount;
11
12typedef int (*xfs_pwork_work_fn)(struct xfs_mount *mp, struct xfs_pwork *pwork);
13
14/*
15 * Parallel work coordination structure.
16 */
17struct xfs_pwork_ctl {
18 struct workqueue_struct *wq;
19 struct xfs_mount *mp;
20 xfs_pwork_work_fn work_fn;
21 struct wait_queue_head poll_wait;
22 atomic_t nr_work;
23 int error;
24};
25
26/*
27 * Embed this parallel work control item inside your own work structure,
28 * then queue work with it.
29 */
30struct xfs_pwork {
31 struct work_struct work;
32 struct xfs_pwork_ctl *pctl;
33};
34
35#define XFS_PWORK_SINGLE_THREADED { .pctl = NULL }
36
37/* Have we been told to abort? */
38static inline bool
39xfs_pwork_ctl_want_abort(
40 struct xfs_pwork_ctl *pctl)
41{
42 return pctl && pctl->error;
43}
44
45/* Have we been told to abort? */
46static inline bool
47xfs_pwork_want_abort(
48 struct xfs_pwork *pwork)
49{
50 return xfs_pwork_ctl_want_abort(pwork->pctl);
51}
52
53int xfs_pwork_init(struct xfs_mount *mp, struct xfs_pwork_ctl *pctl,
54 xfs_pwork_work_fn work_fn, const char *tag,
55 unsigned int nr_threads);
56void xfs_pwork_queue(struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork);
57int xfs_pwork_destroy(struct xfs_pwork_ctl *pctl);
58void xfs_pwork_poll(struct xfs_pwork_ctl *pctl);
59unsigned int xfs_pwork_guess_datadev_parallelism(struct xfs_mount *mp);
60
61#endif /* __XFS_PWORK_H__ */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index aa6b6db3db0e..5e7a37f0cf84 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -13,19 +13,15 @@
13#include "xfs_sb.h" 13#include "xfs_sb.h"
14#include "xfs_mount.h" 14#include "xfs_mount.h"
15#include "xfs_inode.h" 15#include "xfs_inode.h"
16#include "xfs_ialloc.h" 16#include "xfs_iwalk.h"
17#include "xfs_itable.h"
18#include "xfs_quota.h" 17#include "xfs_quota.h"
19#include "xfs_error.h"
20#include "xfs_bmap.h" 18#include "xfs_bmap.h"
21#include "xfs_bmap_btree.h"
22#include "xfs_bmap_util.h" 19#include "xfs_bmap_util.h"
23#include "xfs_trans.h" 20#include "xfs_trans.h"
24#include "xfs_trans_space.h" 21#include "xfs_trans_space.h"
25#include "xfs_qm.h" 22#include "xfs_qm.h"
26#include "xfs_trace.h" 23#include "xfs_trace.h"
27#include "xfs_icache.h" 24#include "xfs_icache.h"
28#include "xfs_cksum.h"
29 25
30/* 26/*
31 * The global quota manager. There is only one of these for the entire 27 * The global quota manager. There is only one of these for the entire
@@ -1118,17 +1114,15 @@ xfs_qm_quotacheck_dqadjust(
1118/* ARGSUSED */ 1114/* ARGSUSED */
1119STATIC int 1115STATIC int
1120xfs_qm_dqusage_adjust( 1116xfs_qm_dqusage_adjust(
1121 xfs_mount_t *mp, /* mount point for filesystem */ 1117 struct xfs_mount *mp,
1122 xfs_ino_t ino, /* inode number to get data for */ 1118 struct xfs_trans *tp,
1123 void __user *buffer, /* not used */ 1119 xfs_ino_t ino,
1124 int ubsize, /* not used */ 1120 void *data)
1125 int *ubused, /* not used */
1126 int *res) /* result code value */
1127{ 1121{
1128 xfs_inode_t *ip; 1122 struct xfs_inode *ip;
1129 xfs_qcnt_t nblks; 1123 xfs_qcnt_t nblks;
1130 xfs_filblks_t rtblks = 0; /* total rt blks */ 1124 xfs_filblks_t rtblks = 0; /* total rt blks */
1131 int error; 1125 int error;
1132 1126
1133 ASSERT(XFS_IS_QUOTA_RUNNING(mp)); 1127 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1134 1128
@@ -1136,20 +1130,18 @@ xfs_qm_dqusage_adjust(
1136 * rootino must have its resources accounted for, not so with the quota 1130 * rootino must have its resources accounted for, not so with the quota
1137 * inodes. 1131 * inodes.
1138 */ 1132 */
1139 if (xfs_is_quota_inode(&mp->m_sb, ino)) { 1133 if (xfs_is_quota_inode(&mp->m_sb, ino))
1140 *res = BULKSTAT_RV_NOTHING; 1134 return 0;
1141 return -EINVAL;
1142 }
1143 1135
1144 /* 1136 /*
1145 * We don't _need_ to take the ilock EXCL here because quotacheck runs 1137 * We don't _need_ to take the ilock EXCL here because quotacheck runs
1146 * at mount time and therefore nobody will be racing chown/chproj. 1138 * at mount time and therefore nobody will be racing chown/chproj.
1147 */ 1139 */
1148 error = xfs_iget(mp, NULL, ino, XFS_IGET_DONTCACHE, 0, &ip); 1140 error = xfs_iget(mp, tp, ino, XFS_IGET_DONTCACHE, 0, &ip);
1149 if (error) { 1141 if (error == -EINVAL || error == -ENOENT)
1150 *res = BULKSTAT_RV_NOTHING; 1142 return 0;
1143 if (error)
1151 return error; 1144 return error;
1152 }
1153 1145
1154 ASSERT(ip->i_delayed_blks == 0); 1146 ASSERT(ip->i_delayed_blks == 0);
1155 1147
@@ -1157,7 +1149,7 @@ xfs_qm_dqusage_adjust(
1157 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 1149 struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1158 1150
1159 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 1151 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1160 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 1152 error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
1161 if (error) 1153 if (error)
1162 goto error0; 1154 goto error0;
1163 } 1155 }
@@ -1200,13 +1192,8 @@ xfs_qm_dqusage_adjust(
1200 goto error0; 1192 goto error0;
1201 } 1193 }
1202 1194
1203 xfs_irele(ip);
1204 *res = BULKSTAT_RV_DIDONE;
1205 return 0;
1206
1207error0: 1195error0:
1208 xfs_irele(ip); 1196 xfs_irele(ip);
1209 *res = BULKSTAT_RV_GIVEUP;
1210 return error; 1197 return error;
1211} 1198}
1212 1199
@@ -1270,18 +1257,13 @@ STATIC int
1270xfs_qm_quotacheck( 1257xfs_qm_quotacheck(
1271 xfs_mount_t *mp) 1258 xfs_mount_t *mp)
1272{ 1259{
1273 int done, count, error, error2; 1260 int error, error2;
1274 xfs_ino_t lastino;
1275 size_t structsz;
1276 uint flags; 1261 uint flags;
1277 LIST_HEAD (buffer_list); 1262 LIST_HEAD (buffer_list);
1278 struct xfs_inode *uip = mp->m_quotainfo->qi_uquotaip; 1263 struct xfs_inode *uip = mp->m_quotainfo->qi_uquotaip;
1279 struct xfs_inode *gip = mp->m_quotainfo->qi_gquotaip; 1264 struct xfs_inode *gip = mp->m_quotainfo->qi_gquotaip;
1280 struct xfs_inode *pip = mp->m_quotainfo->qi_pquotaip; 1265 struct xfs_inode *pip = mp->m_quotainfo->qi_pquotaip;
1281 1266
1282 count = INT_MAX;
1283 structsz = 1;
1284 lastino = 0;
1285 flags = 0; 1267 flags = 0;
1286 1268
1287 ASSERT(uip || gip || pip); 1269 ASSERT(uip || gip || pip);
@@ -1318,18 +1300,10 @@ xfs_qm_quotacheck(
1318 flags |= XFS_PQUOTA_CHKD; 1300 flags |= XFS_PQUOTA_CHKD;
1319 } 1301 }
1320 1302
1321 do { 1303 error = xfs_iwalk_threaded(mp, 0, 0, xfs_qm_dqusage_adjust, 0, true,
1322 /* 1304 NULL);
1323 * Iterate thru all the inodes in the file system, 1305 if (error)
1324 * adjusting the corresponding dquot counters in core. 1306 goto error_return;
1325 */
1326 error = xfs_bulkstat(mp, &lastino, &count,
1327 xfs_qm_dqusage_adjust,
1328 structsz, NULL, &done);
1329 if (error)
1330 break;
1331
1332 } while (!done);
1333 1307
1334 /* 1308 /*
1335 * We've made all the changes that we need to make incore. Flush them 1309 * We've made all the changes that we need to make incore. Flush them
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 3091e4bc04ef..5d72e88598b4 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -5,13 +5,13 @@
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_fs.h" 7#include "xfs_fs.h"
8#include "xfs_shared.h"
8#include "xfs_format.h" 9#include "xfs_format.h"
9#include "xfs_log_format.h" 10#include "xfs_log_format.h"
10#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
11#include "xfs_quota.h" 12#include "xfs_quota.h"
12#include "xfs_mount.h" 13#include "xfs_mount.h"
13#include "xfs_inode.h" 14#include "xfs_inode.h"
14#include "xfs_error.h"
15#include "xfs_trans.h" 15#include "xfs_trans.h"
16#include "xfs_qm.h" 16#include "xfs_qm.h"
17 17
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index b3190890f096..da7ad0383037 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -4,7 +4,6 @@
4 * All Rights Reserved. 4 * All Rights Reserved.
5 */ 5 */
6 6
7#include <linux/capability.h>
8 7
9#include "xfs.h" 8#include "xfs.h"
10#include "xfs_fs.h" 9#include "xfs_fs.h"
@@ -12,17 +11,13 @@
12#include "xfs_format.h" 11#include "xfs_format.h"
13#include "xfs_log_format.h" 12#include "xfs_log_format.h"
14#include "xfs_trans_resv.h" 13#include "xfs_trans_resv.h"
15#include "xfs_bit.h"
16#include "xfs_sb.h" 14#include "xfs_sb.h"
17#include "xfs_mount.h" 15#include "xfs_mount.h"
18#include "xfs_inode.h" 16#include "xfs_inode.h"
19#include "xfs_trans.h" 17#include "xfs_trans.h"
20#include "xfs_error.h"
21#include "xfs_quota.h" 18#include "xfs_quota.h"
22#include "xfs_qm.h" 19#include "xfs_qm.h"
23#include "xfs_trace.h"
24#include "xfs_icache.h" 20#include "xfs_icache.h"
25#include "xfs_defer.h"
26 21
27STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint); 22STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
28STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *, 23STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index a7c0c657dfaf..cd6c7210a373 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -4,6 +4,7 @@
4 * All Rights Reserved. 4 * All Rights Reserved.
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include "xfs_shared.h"
7#include "xfs_format.h" 8#include "xfs_format.h"
8#include "xfs_log_format.h" 9#include "xfs_log_format.h"
9#include "xfs_trans_resv.h" 10#include "xfs_trans_resv.h"
@@ -11,10 +12,8 @@
11#include "xfs_inode.h" 12#include "xfs_inode.h"
12#include "xfs_quota.h" 13#include "xfs_quota.h"
13#include "xfs_trans.h" 14#include "xfs_trans.h"
14#include "xfs_trace.h"
15#include "xfs_icache.h" 15#include "xfs_icache.h"
16#include "xfs_qm.h" 16#include "xfs_qm.h"
17#include <linux/quota.h>
18 17
19 18
20static void 19static void
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index fce38b56b962..d8288aa0670a 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -14,7 +14,6 @@
14#include "xfs_defer.h" 14#include "xfs_defer.h"
15#include "xfs_trans.h" 15#include "xfs_trans.h"
16#include "xfs_trans_priv.h" 16#include "xfs_trans_priv.h"
17#include "xfs_buf_item.h"
18#include "xfs_refcount_item.h" 17#include "xfs_refcount_item.h"
19#include "xfs_log.h" 18#include "xfs_log.h"
20#include "xfs_refcount.h" 19#include "xfs_refcount.h"
@@ -95,15 +94,6 @@ xfs_cui_item_format(
95} 94}
96 95
97/* 96/*
98 * Pinning has no meaning for an cui item, so just return.
99 */
100STATIC void
101xfs_cui_item_pin(
102 struct xfs_log_item *lip)
103{
104}
105
106/*
107 * The unpin operation is the last place an CUI is manipulated in the log. It is 97 * The unpin operation is the last place an CUI is manipulated in the log. It is
108 * either inserted in the AIL or aborted in the event of a log I/O error. In 98 * either inserted in the AIL or aborted in the event of a log I/O error. In
109 * either case, the CUI transaction has been successfully committed to make it 99 * either case, the CUI transaction has been successfully committed to make it
@@ -122,71 +112,22 @@ xfs_cui_item_unpin(
122} 112}
123 113
124/* 114/*
125 * CUI items have no locking or pushing. However, since CUIs are pulled from
126 * the AIL when their corresponding CUDs are committed to disk, their situation
127 * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
128 * will eventually flush the log. This should help in getting the CUI out of
129 * the AIL.
130 */
131STATIC uint
132xfs_cui_item_push(
133 struct xfs_log_item *lip,
134 struct list_head *buffer_list)
135{
136 return XFS_ITEM_PINNED;
137}
138
139/*
140 * The CUI has been either committed or aborted if the transaction has been 115 * The CUI has been either committed or aborted if the transaction has been
141 * cancelled. If the transaction was cancelled, an CUD isn't going to be 116 * cancelled. If the transaction was cancelled, an CUD isn't going to be
142 * constructed and thus we free the CUI here directly. 117 * constructed and thus we free the CUI here directly.
143 */ 118 */
144STATIC void 119STATIC void
145xfs_cui_item_unlock( 120xfs_cui_item_release(
146 struct xfs_log_item *lip) 121 struct xfs_log_item *lip)
147{ 122{
148 if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) 123 xfs_cui_release(CUI_ITEM(lip));
149 xfs_cui_release(CUI_ITEM(lip));
150} 124}
151 125
152/*
153 * The CUI is logged only once and cannot be moved in the log, so simply return
154 * the lsn at which it's been logged.
155 */
156STATIC xfs_lsn_t
157xfs_cui_item_committed(
158 struct xfs_log_item *lip,
159 xfs_lsn_t lsn)
160{
161 return lsn;
162}
163
164/*
165 * The CUI dependency tracking op doesn't do squat. It can't because
166 * it doesn't know where the free extent is coming from. The dependency
167 * tracking has to be handled by the "enclosing" metadata object. For
168 * example, for inodes, the inode is locked throughout the extent freeing
169 * so the dependency should be recorded there.
170 */
171STATIC void
172xfs_cui_item_committing(
173 struct xfs_log_item *lip,
174 xfs_lsn_t lsn)
175{
176}
177
178/*
179 * This is the ops vector shared by all cui log items.
180 */
181static const struct xfs_item_ops xfs_cui_item_ops = { 126static const struct xfs_item_ops xfs_cui_item_ops = {
182 .iop_size = xfs_cui_item_size, 127 .iop_size = xfs_cui_item_size,
183 .iop_format = xfs_cui_item_format, 128 .iop_format = xfs_cui_item_format,
184 .iop_pin = xfs_cui_item_pin,
185 .iop_unpin = xfs_cui_item_unpin, 129 .iop_unpin = xfs_cui_item_unpin,
186 .iop_unlock = xfs_cui_item_unlock, 130 .iop_release = xfs_cui_item_release,
187 .iop_committed = xfs_cui_item_committed,
188 .iop_push = xfs_cui_item_push,
189 .iop_committing = xfs_cui_item_committing,
190}; 131};
191 132
192/* 133/*
@@ -254,126 +195,250 @@ xfs_cud_item_format(
254} 195}
255 196
256/* 197/*
257 * Pinning has no meaning for an cud item, so just return. 198 * The CUD is either committed or aborted if the transaction is cancelled. If
199 * the transaction is cancelled, drop our reference to the CUI and free the
200 * CUD.
258 */ 201 */
259STATIC void 202STATIC void
260xfs_cud_item_pin( 203xfs_cud_item_release(
261 struct xfs_log_item *lip) 204 struct xfs_log_item *lip)
262{ 205{
206 struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
207
208 xfs_cui_release(cudp->cud_cuip);
209 kmem_zone_free(xfs_cud_zone, cudp);
263} 210}
264 211
265/* 212static const struct xfs_item_ops xfs_cud_item_ops = {
266 * Since pinning has no meaning for an cud item, unpinning does 213 .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
267 * not either. 214 .iop_size = xfs_cud_item_size,
268 */ 215 .iop_format = xfs_cud_item_format,
269STATIC void 216 .iop_release = xfs_cud_item_release,
270xfs_cud_item_unpin( 217};
271 struct xfs_log_item *lip, 218
272 int remove) 219static struct xfs_cud_log_item *
220xfs_trans_get_cud(
221 struct xfs_trans *tp,
222 struct xfs_cui_log_item *cuip)
273{ 223{
224 struct xfs_cud_log_item *cudp;
225
226 cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
227 xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD,
228 &xfs_cud_item_ops);
229 cudp->cud_cuip = cuip;
230 cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id;
231
232 xfs_trans_add_item(tp, &cudp->cud_item);
233 return cudp;
274} 234}
275 235
276/* 236/*
277 * There isn't much you can do to push on an cud item. It is simply stuck 237 * Finish an refcount update and log it to the CUD. Note that the
278 * waiting for the log to be flushed to disk. 238 * transaction is marked dirty regardless of whether the refcount
239 * update succeeds or fails to support the CUI/CUD lifecycle rules.
279 */ 240 */
280STATIC uint 241static int
281xfs_cud_item_push( 242xfs_trans_log_finish_refcount_update(
282 struct xfs_log_item *lip, 243 struct xfs_trans *tp,
283 struct list_head *buffer_list) 244 struct xfs_cud_log_item *cudp,
245 enum xfs_refcount_intent_type type,
246 xfs_fsblock_t startblock,
247 xfs_extlen_t blockcount,
248 xfs_fsblock_t *new_fsb,
249 xfs_extlen_t *new_len,
250 struct xfs_btree_cur **pcur)
284{ 251{
285 return XFS_ITEM_PINNED; 252 int error;
253
254 error = xfs_refcount_finish_one(tp, type, startblock,
255 blockcount, new_fsb, new_len, pcur);
256
257 /*
258 * Mark the transaction dirty, even on error. This ensures the
259 * transaction is aborted, which:
260 *
261 * 1.) releases the CUI and frees the CUD
262 * 2.) shuts down the filesystem
263 */
264 tp->t_flags |= XFS_TRANS_DIRTY;
265 set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
266
267 return error;
286} 268}
287 269
288/* 270/* Sort refcount intents by AG. */
289 * The CUD is either committed or aborted if the transaction is cancelled. If 271static int
290 * the transaction is cancelled, drop our reference to the CUI and free the 272xfs_refcount_update_diff_items(
291 * CUD. 273 void *priv,
292 */ 274 struct list_head *a,
293STATIC void 275 struct list_head *b)
294xfs_cud_item_unlock(
295 struct xfs_log_item *lip)
296{ 276{
297 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 277 struct xfs_mount *mp = priv;
278 struct xfs_refcount_intent *ra;
279 struct xfs_refcount_intent *rb;
280
281 ra = container_of(a, struct xfs_refcount_intent, ri_list);
282 rb = container_of(b, struct xfs_refcount_intent, ri_list);
283 return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) -
284 XFS_FSB_TO_AGNO(mp, rb->ri_startblock);
285}
298 286
299 if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { 287/* Get an CUI. */
300 xfs_cui_release(cudp->cud_cuip); 288STATIC void *
301 kmem_zone_free(xfs_cud_zone, cudp); 289xfs_refcount_update_create_intent(
290 struct xfs_trans *tp,
291 unsigned int count)
292{
293 struct xfs_cui_log_item *cuip;
294
295 ASSERT(tp != NULL);
296 ASSERT(count > 0);
297
298 cuip = xfs_cui_init(tp->t_mountp, count);
299 ASSERT(cuip != NULL);
300
301 /*
302 * Get a log_item_desc to point at the new item.
303 */
304 xfs_trans_add_item(tp, &cuip->cui_item);
305 return cuip;
306}
307
308/* Set the phys extent flags for this reverse mapping. */
309static void
310xfs_trans_set_refcount_flags(
311 struct xfs_phys_extent *refc,
312 enum xfs_refcount_intent_type type)
313{
314 refc->pe_flags = 0;
315 switch (type) {
316 case XFS_REFCOUNT_INCREASE:
317 case XFS_REFCOUNT_DECREASE:
318 case XFS_REFCOUNT_ALLOC_COW:
319 case XFS_REFCOUNT_FREE_COW:
320 refc->pe_flags |= type;
321 break;
322 default:
323 ASSERT(0);
302 } 324 }
303} 325}
304 326
305/* 327/* Log refcount updates in the intent item. */
306 * When the cud item is committed to disk, all we need to do is delete our 328STATIC void
307 * reference to our partner cui item and then free ourselves. Since we're 329xfs_refcount_update_log_item(
308 * freeing ourselves we must return -1 to keep the transaction code from 330 struct xfs_trans *tp,
309 * further referencing this item. 331 void *intent,
310 */ 332 struct list_head *item)
311STATIC xfs_lsn_t
312xfs_cud_item_committed(
313 struct xfs_log_item *lip,
314 xfs_lsn_t lsn)
315{ 333{
316 struct xfs_cud_log_item *cudp = CUD_ITEM(lip); 334 struct xfs_cui_log_item *cuip = intent;
335 struct xfs_refcount_intent *refc;
336 uint next_extent;
337 struct xfs_phys_extent *ext;
338
339 refc = container_of(item, struct xfs_refcount_intent, ri_list);
340
341 tp->t_flags |= XFS_TRANS_DIRTY;
342 set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
317 343
318 /* 344 /*
319 * Drop the CUI reference regardless of whether the CUD has been 345 * atomic_inc_return gives us the value after the increment;
320 * aborted. Once the CUD transaction is constructed, it is the sole 346 * we want to use it as an array index so we need to subtract 1 from
321 * responsibility of the CUD to release the CUI (even if the CUI is 347 * it.
322 * aborted due to log I/O error).
323 */ 348 */
324 xfs_cui_release(cudp->cud_cuip); 349 next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1;
325 kmem_zone_free(xfs_cud_zone, cudp); 350 ASSERT(next_extent < cuip->cui_format.cui_nextents);
351 ext = &cuip->cui_format.cui_extents[next_extent];
352 ext->pe_startblock = refc->ri_startblock;
353 ext->pe_len = refc->ri_blockcount;
354 xfs_trans_set_refcount_flags(ext, refc->ri_type);
355}
326 356
327 return (xfs_lsn_t)-1; 357/* Get an CUD so we can process all the deferred refcount updates. */
358STATIC void *
359xfs_refcount_update_create_done(
360 struct xfs_trans *tp,
361 void *intent,
362 unsigned int count)
363{
364 return xfs_trans_get_cud(tp, intent);
328} 365}
329 366
330/* 367/* Process a deferred refcount update. */
331 * The CUD dependency tracking op doesn't do squat. It can't because 368STATIC int
332 * it doesn't know where the free extent is coming from. The dependency 369xfs_refcount_update_finish_item(
333 * tracking has to be handled by the "enclosing" metadata object. For 370 struct xfs_trans *tp,
334 * example, for inodes, the inode is locked throughout the extent freeing 371 struct list_head *item,
335 * so the dependency should be recorded there. 372 void *done_item,
336 */ 373 void **state)
337STATIC void
338xfs_cud_item_committing(
339 struct xfs_log_item *lip,
340 xfs_lsn_t lsn)
341{ 374{
375 struct xfs_refcount_intent *refc;
376 xfs_fsblock_t new_fsb;
377 xfs_extlen_t new_aglen;
378 int error;
379
380 refc = container_of(item, struct xfs_refcount_intent, ri_list);
381 error = xfs_trans_log_finish_refcount_update(tp, done_item,
382 refc->ri_type,
383 refc->ri_startblock,
384 refc->ri_blockcount,
385 &new_fsb, &new_aglen,
386 (struct xfs_btree_cur **)state);
387 /* Did we run out of reservation? Requeue what we didn't finish. */
388 if (!error && new_aglen > 0) {
389 ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
390 refc->ri_type == XFS_REFCOUNT_DECREASE);
391 refc->ri_startblock = new_fsb;
392 refc->ri_blockcount = new_aglen;
393 return -EAGAIN;
394 }
395 kmem_free(refc);
396 return error;
342} 397}
343 398
344/* 399/* Clean up after processing deferred refcounts. */
345 * This is the ops vector shared by all cud log items. 400STATIC void
346 */ 401xfs_refcount_update_finish_cleanup(
347static const struct xfs_item_ops xfs_cud_item_ops = { 402 struct xfs_trans *tp,
348 .iop_size = xfs_cud_item_size, 403 void *state,
349 .iop_format = xfs_cud_item_format, 404 int error)
350 .iop_pin = xfs_cud_item_pin, 405{
351 .iop_unpin = xfs_cud_item_unpin, 406 struct xfs_btree_cur *rcur = state;
352 .iop_unlock = xfs_cud_item_unlock,
353 .iop_committed = xfs_cud_item_committed,
354 .iop_push = xfs_cud_item_push,
355 .iop_committing = xfs_cud_item_committing,
356};
357 407
358/* 408 xfs_refcount_finish_one_cleanup(tp, rcur, error);
359 * Allocate and initialize an cud item with the given number of extents. 409}
360 */
361struct xfs_cud_log_item *
362xfs_cud_init(
363 struct xfs_mount *mp,
364 struct xfs_cui_log_item *cuip)
365 410
411/* Abort all pending CUIs. */
412STATIC void
413xfs_refcount_update_abort_intent(
414 void *intent)
366{ 415{
367 struct xfs_cud_log_item *cudp; 416 xfs_cui_release(intent);
417}
368 418
369 cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP); 419/* Cancel a deferred refcount update. */
370 xfs_log_item_init(mp, &cudp->cud_item, XFS_LI_CUD, &xfs_cud_item_ops); 420STATIC void
371 cudp->cud_cuip = cuip; 421xfs_refcount_update_cancel_item(
372 cudp->cud_format.cud_cui_id = cuip->cui_format.cui_id; 422 struct list_head *item)
423{
424 struct xfs_refcount_intent *refc;
373 425
374 return cudp; 426 refc = container_of(item, struct xfs_refcount_intent, ri_list);
427 kmem_free(refc);
375} 428}
376 429
430const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
431 .max_items = XFS_CUI_MAX_FAST_EXTENTS,
432 .diff_items = xfs_refcount_update_diff_items,
433 .create_intent = xfs_refcount_update_create_intent,
434 .abort_intent = xfs_refcount_update_abort_intent,
435 .log_item = xfs_refcount_update_log_item,
436 .create_done = xfs_refcount_update_create_done,
437 .finish_item = xfs_refcount_update_finish_item,
438 .finish_cleanup = xfs_refcount_update_finish_cleanup,
439 .cancel_item = xfs_refcount_update_cancel_item,
440};
441
377/* 442/*
378 * Process a refcount update intent item that was recovered from the log. 443 * Process a refcount update intent item that was recovered from the log.
379 * We need to update the refcountbt. 444 * We need to update the refcountbt.
diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h
index 3896dcc2368f..e47530f30489 100644
--- a/fs/xfs/xfs_refcount_item.h
+++ b/fs/xfs/xfs_refcount_item.h
@@ -78,8 +78,6 @@ extern struct kmem_zone *xfs_cui_zone;
78extern struct kmem_zone *xfs_cud_zone; 78extern struct kmem_zone *xfs_cud_zone;
79 79
80struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint); 80struct xfs_cui_log_item *xfs_cui_init(struct xfs_mount *, uint);
81struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *,
82 struct xfs_cui_log_item *);
83void xfs_cui_item_free(struct xfs_cui_log_item *); 81void xfs_cui_item_free(struct xfs_cui_log_item *);
84void xfs_cui_release(struct xfs_cui_log_item *); 82void xfs_cui_release(struct xfs_cui_log_item *);
85int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip); 83int xfs_cui_recover(struct xfs_trans *parent_tp, struct xfs_cui_log_item *cuip);
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 680ae7662a78..c4ec7afd1170 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -11,21 +11,12 @@
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_mount.h" 12#include "xfs_mount.h"
13#include "xfs_defer.h" 13#include "xfs_defer.h"
14#include "xfs_da_format.h"
15#include "xfs_da_btree.h"
16#include "xfs_inode.h" 14#include "xfs_inode.h"
17#include "xfs_trans.h" 15#include "xfs_trans.h"
18#include "xfs_inode_item.h"
19#include "xfs_bmap.h" 16#include "xfs_bmap.h"
20#include "xfs_bmap_util.h" 17#include "xfs_bmap_util.h"
21#include "xfs_error.h"
22#include "xfs_dir2.h"
23#include "xfs_dir2_priv.h"
24#include "xfs_ioctl.h"
25#include "xfs_trace.h" 18#include "xfs_trace.h"
26#include "xfs_log.h"
27#include "xfs_icache.h" 19#include "xfs_icache.h"
28#include "xfs_pnfs.h"
29#include "xfs_btree.h" 20#include "xfs_btree.h"
30#include "xfs_refcount_btree.h" 21#include "xfs_refcount_btree.h"
31#include "xfs_refcount.h" 22#include "xfs_refcount.h"
@@ -33,11 +24,9 @@
33#include "xfs_trans_space.h" 24#include "xfs_trans_space.h"
34#include "xfs_bit.h" 25#include "xfs_bit.h"
35#include "xfs_alloc.h" 26#include "xfs_alloc.h"
36#include "xfs_quota_defs.h"
37#include "xfs_quota.h" 27#include "xfs_quota.h"
38#include "xfs_reflink.h" 28#include "xfs_reflink.h"
39#include "xfs_iomap.h" 29#include "xfs_iomap.h"
40#include "xfs_rmap_btree.h"
41#include "xfs_sb.h" 30#include "xfs_sb.h"
42#include "xfs_ag_resv.h" 31#include "xfs_ag_resv.h"
43 32
@@ -572,7 +561,7 @@ xfs_reflink_cancel_cow_range(
572 561
573 /* Start a rolling transaction to remove the mappings */ 562 /* Start a rolling transaction to remove the mappings */
574 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, 563 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
575 0, 0, XFS_TRANS_NOFS, &tp); 564 0, 0, 0, &tp);
576 if (error) 565 if (error)
577 goto out; 566 goto out;
578 567
@@ -631,7 +620,7 @@ xfs_reflink_end_cow_extent(
631 620
632 resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); 621 resblks = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
633 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 622 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0,
634 XFS_TRANS_RESERVE | XFS_TRANS_NOFS, &tp); 623 XFS_TRANS_RESERVE, &tp);
635 if (error) 624 if (error)
636 return error; 625 return error;
637 626
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 127dc9c32a54..77ed557b6127 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -14,7 +14,6 @@
14#include "xfs_defer.h" 14#include "xfs_defer.h"
15#include "xfs_trans.h" 15#include "xfs_trans.h"
16#include "xfs_trans_priv.h" 16#include "xfs_trans_priv.h"
17#include "xfs_buf_item.h"
18#include "xfs_rmap_item.h" 17#include "xfs_rmap_item.h"
19#include "xfs_log.h" 18#include "xfs_log.h"
20#include "xfs_rmap.h" 19#include "xfs_rmap.h"
@@ -94,15 +93,6 @@ xfs_rui_item_format(
94} 93}
95 94
96/* 95/*
97 * Pinning has no meaning for an rui item, so just return.
98 */
99STATIC void
100xfs_rui_item_pin(
101 struct xfs_log_item *lip)
102{
103}
104
105/*
106 * The unpin operation is the last place an RUI is manipulated in the log. It is 96 * The unpin operation is the last place an RUI is manipulated in the log. It is
107 * either inserted in the AIL or aborted in the event of a log I/O error. In 97 * either inserted in the AIL or aborted in the event of a log I/O error. In
108 * either case, the RUI transaction has been successfully committed to make it 98 * either case, the RUI transaction has been successfully committed to make it
@@ -121,71 +111,22 @@ xfs_rui_item_unpin(
121} 111}
122 112
123/* 113/*
124 * RUI items have no locking or pushing. However, since RUIs are pulled from
125 * the AIL when their corresponding RUDs are committed to disk, their situation
126 * is very similar to being pinned. Return XFS_ITEM_PINNED so that the caller
127 * will eventually flush the log. This should help in getting the RUI out of
128 * the AIL.
129 */
130STATIC uint
131xfs_rui_item_push(
132 struct xfs_log_item *lip,
133 struct list_head *buffer_list)
134{
135 return XFS_ITEM_PINNED;
136}
137
138/*
139 * The RUI has been either committed or aborted if the transaction has been 114 * The RUI has been either committed or aborted if the transaction has been
140 * cancelled. If the transaction was cancelled, an RUD isn't going to be 115 * cancelled. If the transaction was cancelled, an RUD isn't going to be
141 * constructed and thus we free the RUI here directly. 116 * constructed and thus we free the RUI here directly.
142 */ 117 */
143STATIC void 118STATIC void
144xfs_rui_item_unlock( 119xfs_rui_item_release(
145 struct xfs_log_item *lip) 120 struct xfs_log_item *lip)
146{ 121{
147 if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) 122 xfs_rui_release(RUI_ITEM(lip));
148 xfs_rui_release(RUI_ITEM(lip));
149} 123}
150 124
151/*
152 * The RUI is logged only once and cannot be moved in the log, so simply return
153 * the lsn at which it's been logged.
154 */
155STATIC xfs_lsn_t
156xfs_rui_item_committed(
157 struct xfs_log_item *lip,
158 xfs_lsn_t lsn)
159{
160 return lsn;
161}
162
163/*
164 * The RUI dependency tracking op doesn't do squat. It can't because
165 * it doesn't know where the free extent is coming from. The dependency
166 * tracking has to be handled by the "enclosing" metadata object. For
167 * example, for inodes, the inode is locked throughout the extent freeing
168 * so the dependency should be recorded there.
169 */
170STATIC void
171xfs_rui_item_committing(
172 struct xfs_log_item *lip,
173 xfs_lsn_t lsn)
174{
175}
176
177/*
178 * This is the ops vector shared by all rui log items.
179 */
180static const struct xfs_item_ops xfs_rui_item_ops = { 125static const struct xfs_item_ops xfs_rui_item_ops = {
181 .iop_size = xfs_rui_item_size, 126 .iop_size = xfs_rui_item_size,
182 .iop_format = xfs_rui_item_format, 127 .iop_format = xfs_rui_item_format,
183 .iop_pin = xfs_rui_item_pin,
184 .iop_unpin = xfs_rui_item_unpin, 128 .iop_unpin = xfs_rui_item_unpin,
185 .iop_unlock = xfs_rui_item_unlock, 129 .iop_release = xfs_rui_item_release,
186 .iop_committed = xfs_rui_item_committed,
187 .iop_push = xfs_rui_item_push,
188 .iop_committing = xfs_rui_item_committing,
189}; 130};
190 131
191/* 132/*
@@ -275,126 +216,271 @@ xfs_rud_item_format(
275} 216}
276 217
277/* 218/*
278 * Pinning has no meaning for an rud item, so just return. 219 * The RUD is either committed or aborted if the transaction is cancelled. If
220 * the transaction is cancelled, drop our reference to the RUI and free the
221 * RUD.
279 */ 222 */
280STATIC void 223STATIC void
281xfs_rud_item_pin( 224xfs_rud_item_release(
282 struct xfs_log_item *lip) 225 struct xfs_log_item *lip)
283{ 226{
227 struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
228
229 xfs_rui_release(rudp->rud_ruip);
230 kmem_zone_free(xfs_rud_zone, rudp);
284} 231}
285 232
286/* 233static const struct xfs_item_ops xfs_rud_item_ops = {
287 * Since pinning has no meaning for an rud item, unpinning does 234 .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
288 * not either. 235 .iop_size = xfs_rud_item_size,
289 */ 236 .iop_format = xfs_rud_item_format,
290STATIC void 237 .iop_release = xfs_rud_item_release,
291xfs_rud_item_unpin( 238};
292 struct xfs_log_item *lip, 239
293 int remove) 240static struct xfs_rud_log_item *
241xfs_trans_get_rud(
242 struct xfs_trans *tp,
243 struct xfs_rui_log_item *ruip)
294{ 244{
245 struct xfs_rud_log_item *rudp;
246
247 rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
248 xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD,
249 &xfs_rud_item_ops);
250 rudp->rud_ruip = ruip;
251 rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
252
253 xfs_trans_add_item(tp, &rudp->rud_item);
254 return rudp;
295} 255}
296 256
297/* 257/* Set the map extent flags for this reverse mapping. */
298 * There isn't much you can do to push on an rud item. It is simply stuck 258static void
299 * waiting for the log to be flushed to disk. 259xfs_trans_set_rmap_flags(
300 */ 260 struct xfs_map_extent *rmap,
301STATIC uint 261 enum xfs_rmap_intent_type type,
302xfs_rud_item_push( 262 int whichfork,
303 struct xfs_log_item *lip, 263 xfs_exntst_t state)
304 struct list_head *buffer_list)
305{ 264{
306 return XFS_ITEM_PINNED; 265 rmap->me_flags = 0;
266 if (state == XFS_EXT_UNWRITTEN)
267 rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
268 if (whichfork == XFS_ATTR_FORK)
269 rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
270 switch (type) {
271 case XFS_RMAP_MAP:
272 rmap->me_flags |= XFS_RMAP_EXTENT_MAP;
273 break;
274 case XFS_RMAP_MAP_SHARED:
275 rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED;
276 break;
277 case XFS_RMAP_UNMAP:
278 rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP;
279 break;
280 case XFS_RMAP_UNMAP_SHARED:
281 rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED;
282 break;
283 case XFS_RMAP_CONVERT:
284 rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT;
285 break;
286 case XFS_RMAP_CONVERT_SHARED:
287 rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED;
288 break;
289 case XFS_RMAP_ALLOC:
290 rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC;
291 break;
292 case XFS_RMAP_FREE:
293 rmap->me_flags |= XFS_RMAP_EXTENT_FREE;
294 break;
295 default:
296 ASSERT(0);
297 }
307} 298}
308 299
309/* 300/*
310 * The RUD is either committed or aborted if the transaction is cancelled. If 301 * Finish an rmap update and log it to the RUD. Note that the transaction is
311 * the transaction is cancelled, drop our reference to the RUI and free the 302 * marked dirty regardless of whether the rmap update succeeds or fails to
312 * RUD. 303 * support the RUI/RUD lifecycle rules.
313 */ 304 */
314STATIC void 305static int
315xfs_rud_item_unlock( 306xfs_trans_log_finish_rmap_update(
316 struct xfs_log_item *lip) 307 struct xfs_trans *tp,
308 struct xfs_rud_log_item *rudp,
309 enum xfs_rmap_intent_type type,
310 uint64_t owner,
311 int whichfork,
312 xfs_fileoff_t startoff,
313 xfs_fsblock_t startblock,
314 xfs_filblks_t blockcount,
315 xfs_exntst_t state,
316 struct xfs_btree_cur **pcur)
317{ 317{
318 struct xfs_rud_log_item *rudp = RUD_ITEM(lip); 318 int error;
319 319
320 if (test_bit(XFS_LI_ABORTED, &lip->li_flags)) { 320 error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
321 xfs_rui_release(rudp->rud_ruip); 321 startblock, blockcount, state, pcur);
322 kmem_zone_free(xfs_rud_zone, rudp); 322
323 } 323 /*
324 * Mark the transaction dirty, even on error. This ensures the
325 * transaction is aborted, which:
326 *
327 * 1.) releases the RUI and frees the RUD
328 * 2.) shuts down the filesystem
329 */
330 tp->t_flags |= XFS_TRANS_DIRTY;
331 set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
332
333 return error;
324} 334}
325 335
326/* 336/* Sort rmap intents by AG. */
327 * When the rud item is committed to disk, all we need to do is delete our 337static int
328 * reference to our partner rui item and then free ourselves. Since we're 338xfs_rmap_update_diff_items(
329 * freeing ourselves we must return -1 to keep the transaction code from 339 void *priv,
330 * further referencing this item. 340 struct list_head *a,
331 */ 341 struct list_head *b)
332STATIC xfs_lsn_t
333xfs_rud_item_committed(
334 struct xfs_log_item *lip,
335 xfs_lsn_t lsn)
336{ 342{
337 struct xfs_rud_log_item *rudp = RUD_ITEM(lip); 343 struct xfs_mount *mp = priv;
344 struct xfs_rmap_intent *ra;
345 struct xfs_rmap_intent *rb;
346
347 ra = container_of(a, struct xfs_rmap_intent, ri_list);
348 rb = container_of(b, struct xfs_rmap_intent, ri_list);
349 return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) -
350 XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
351}
352
353/* Get an RUI. */
354STATIC void *
355xfs_rmap_update_create_intent(
356 struct xfs_trans *tp,
357 unsigned int count)
358{
359 struct xfs_rui_log_item *ruip;
360
361 ASSERT(tp != NULL);
362 ASSERT(count > 0);
363
364 ruip = xfs_rui_init(tp->t_mountp, count);
365 ASSERT(ruip != NULL);
338 366
339 /* 367 /*
340 * Drop the RUI reference regardless of whether the RUD has been 368 * Get a log_item_desc to point at the new item.
341 * aborted. Once the RUD transaction is constructed, it is the sole
342 * responsibility of the RUD to release the RUI (even if the RUI is
343 * aborted due to log I/O error).
344 */ 369 */
345 xfs_rui_release(rudp->rud_ruip); 370 xfs_trans_add_item(tp, &ruip->rui_item);
346 kmem_zone_free(xfs_rud_zone, rudp); 371 return ruip;
347
348 return (xfs_lsn_t)-1;
349} 372}
350 373
351/* 374/* Log rmap updates in the intent item. */
352 * The RUD dependency tracking op doesn't do squat. It can't because
353 * it doesn't know where the free extent is coming from. The dependency
354 * tracking has to be handled by the "enclosing" metadata object. For
355 * example, for inodes, the inode is locked throughout the extent freeing
356 * so the dependency should be recorded there.
357 */
358STATIC void 375STATIC void
359xfs_rud_item_committing( 376xfs_rmap_update_log_item(
360 struct xfs_log_item *lip, 377 struct xfs_trans *tp,
361 xfs_lsn_t lsn) 378 void *intent,
379 struct list_head *item)
362{ 380{
381 struct xfs_rui_log_item *ruip = intent;
382 struct xfs_rmap_intent *rmap;
383 uint next_extent;
384 struct xfs_map_extent *map;
385
386 rmap = container_of(item, struct xfs_rmap_intent, ri_list);
387
388 tp->t_flags |= XFS_TRANS_DIRTY;
389 set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
390
391 /*
392 * atomic_inc_return gives us the value after the increment;
393 * we want to use it as an array index so we need to subtract 1 from
394 * it.
395 */
396 next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1;
397 ASSERT(next_extent < ruip->rui_format.rui_nextents);
398 map = &ruip->rui_format.rui_extents[next_extent];
399 map->me_owner = rmap->ri_owner;
400 map->me_startblock = rmap->ri_bmap.br_startblock;
401 map->me_startoff = rmap->ri_bmap.br_startoff;
402 map->me_len = rmap->ri_bmap.br_blockcount;
403 xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork,
404 rmap->ri_bmap.br_state);
363} 405}
364 406
365/* 407/* Get an RUD so we can process all the deferred rmap updates. */
366 * This is the ops vector shared by all rud log items. 408STATIC void *
367 */ 409xfs_rmap_update_create_done(
368static const struct xfs_item_ops xfs_rud_item_ops = { 410 struct xfs_trans *tp,
369 .iop_size = xfs_rud_item_size, 411 void *intent,
370 .iop_format = xfs_rud_item_format, 412 unsigned int count)
371 .iop_pin = xfs_rud_item_pin, 413{
372 .iop_unpin = xfs_rud_item_unpin, 414 return xfs_trans_get_rud(tp, intent);
373 .iop_unlock = xfs_rud_item_unlock, 415}
374 .iop_committed = xfs_rud_item_committed,
375 .iop_push = xfs_rud_item_push,
376 .iop_committing = xfs_rud_item_committing,
377};
378 416
379/* 417/* Process a deferred rmap update. */
380 * Allocate and initialize an rud item with the given number of extents. 418STATIC int
381 */ 419xfs_rmap_update_finish_item(
382struct xfs_rud_log_item * 420 struct xfs_trans *tp,
383xfs_rud_init( 421 struct list_head *item,
384 struct xfs_mount *mp, 422 void *done_item,
385 struct xfs_rui_log_item *ruip) 423 void **state)
424{
425 struct xfs_rmap_intent *rmap;
426 int error;
427
428 rmap = container_of(item, struct xfs_rmap_intent, ri_list);
429 error = xfs_trans_log_finish_rmap_update(tp, done_item,
430 rmap->ri_type,
431 rmap->ri_owner, rmap->ri_whichfork,
432 rmap->ri_bmap.br_startoff,
433 rmap->ri_bmap.br_startblock,
434 rmap->ri_bmap.br_blockcount,
435 rmap->ri_bmap.br_state,
436 (struct xfs_btree_cur **)state);
437 kmem_free(rmap);
438 return error;
439}
440
441/* Clean up after processing deferred rmaps. */
442STATIC void
443xfs_rmap_update_finish_cleanup(
444 struct xfs_trans *tp,
445 void *state,
446 int error)
447{
448 struct xfs_btree_cur *rcur = state;
449
450 xfs_rmap_finish_one_cleanup(tp, rcur, error);
451}
386 452
453/* Abort all pending RUIs. */
454STATIC void
455xfs_rmap_update_abort_intent(
456 void *intent)
387{ 457{
388 struct xfs_rud_log_item *rudp; 458 xfs_rui_release(intent);
459}
389 460
390 rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP); 461/* Cancel a deferred rmap update. */
391 xfs_log_item_init(mp, &rudp->rud_item, XFS_LI_RUD, &xfs_rud_item_ops); 462STATIC void
392 rudp->rud_ruip = ruip; 463xfs_rmap_update_cancel_item(
393 rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id; 464 struct list_head *item)
465{
466 struct xfs_rmap_intent *rmap;
394 467
395 return rudp; 468 rmap = container_of(item, struct xfs_rmap_intent, ri_list);
469 kmem_free(rmap);
396} 470}
397 471
472const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
473 .max_items = XFS_RUI_MAX_FAST_EXTENTS,
474 .diff_items = xfs_rmap_update_diff_items,
475 .create_intent = xfs_rmap_update_create_intent,
476 .abort_intent = xfs_rmap_update_abort_intent,
477 .log_item = xfs_rmap_update_log_item,
478 .create_done = xfs_rmap_update_create_done,
479 .finish_item = xfs_rmap_update_finish_item,
480 .finish_cleanup = xfs_rmap_update_finish_cleanup,
481 .cancel_item = xfs_rmap_update_cancel_item,
482};
483
398/* 484/*
399 * Process an rmap update intent item that was recovered from the log. 485 * Process an rmap update intent item that was recovered from the log.
400 * We need to update the rmapbt. 486 * We need to update the rmapbt.
diff --git a/fs/xfs/xfs_rmap_item.h b/fs/xfs/xfs_rmap_item.h
index 7e482baa27f5..8708e4a5aa5c 100644
--- a/fs/xfs/xfs_rmap_item.h
+++ b/fs/xfs/xfs_rmap_item.h
@@ -78,8 +78,6 @@ extern struct kmem_zone *xfs_rui_zone;
78extern struct kmem_zone *xfs_rud_zone; 78extern struct kmem_zone *xfs_rud_zone;
79 79
80struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint); 80struct xfs_rui_log_item *xfs_rui_init(struct xfs_mount *, uint);
81struct xfs_rud_log_item *xfs_rud_init(struct xfs_mount *,
82 struct xfs_rui_log_item *);
83int xfs_rui_copy_format(struct xfs_log_iovec *buf, 81int xfs_rui_copy_format(struct xfs_log_iovec *buf,
84 struct xfs_rui_log_format *dst_rui_fmt); 82 struct xfs_rui_log_format *dst_rui_fmt);
85void xfs_rui_item_free(struct xfs_rui_log_item *); 83void xfs_rui_item_free(struct xfs_rui_log_item *);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index ac0fcdad0c4e..5fa4db3c3e32 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -11,17 +11,11 @@
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_bit.h" 12#include "xfs_bit.h"
13#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_defer.h"
15#include "xfs_inode.h" 14#include "xfs_inode.h"
16#include "xfs_bmap.h" 15#include "xfs_bmap.h"
17#include "xfs_bmap_util.h"
18#include "xfs_bmap_btree.h" 16#include "xfs_bmap_btree.h"
19#include "xfs_alloc.h"
20#include "xfs_error.h"
21#include "xfs_trans.h" 17#include "xfs_trans.h"
22#include "xfs_trans_space.h" 18#include "xfs_trans_space.h"
23#include "xfs_trace.h"
24#include "xfs_buf.h"
25#include "xfs_icache.h" 19#include "xfs_icache.h"
26#include "xfs_rtalloc.h" 20#include "xfs_rtalloc.h"
27 21
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index cc509743facd..113883c4f202 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -4,7 +4,6 @@
4 * All Rights Reserved. 4 * All Rights Reserved.
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include <linux/proc_fs.h>
8 7
9struct xstats xfsstats; 8struct xstats xfsstats;
10 9
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index a14d11d78bd8..f9450235533c 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -11,18 +11,15 @@
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_sb.h" 12#include "xfs_sb.h"
13#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_da_format.h"
15#include "xfs_inode.h" 14#include "xfs_inode.h"
16#include "xfs_btree.h" 15#include "xfs_btree.h"
17#include "xfs_bmap.h" 16#include "xfs_bmap.h"
18#include "xfs_alloc.h" 17#include "xfs_alloc.h"
19#include "xfs_error.h"
20#include "xfs_fsops.h" 18#include "xfs_fsops.h"
21#include "xfs_trans.h" 19#include "xfs_trans.h"
22#include "xfs_buf_item.h" 20#include "xfs_buf_item.h"
23#include "xfs_log.h" 21#include "xfs_log.h"
24#include "xfs_log_priv.h" 22#include "xfs_log_priv.h"
25#include "xfs_da_btree.h"
26#include "xfs_dir2.h" 23#include "xfs_dir2.h"
27#include "xfs_extfree_item.h" 24#include "xfs_extfree_item.h"
28#include "xfs_mru_cache.h" 25#include "xfs_mru_cache.h"
@@ -38,18 +35,8 @@
38#include "xfs_refcount_item.h" 35#include "xfs_refcount_item.h"
39#include "xfs_bmap_item.h" 36#include "xfs_bmap_item.h"
40#include "xfs_reflink.h" 37#include "xfs_reflink.h"
41#include "xfs_defer.h"
42 38
43#include <linux/namei.h>
44#include <linux/dax.h>
45#include <linux/init.h>
46#include <linux/slab.h>
47#include <linux/magic.h> 39#include <linux/magic.h>
48#include <linux/mount.h>
49#include <linux/mempool.h>
50#include <linux/writeback.h>
51#include <linux/kthread.h>
52#include <linux/freezer.h>
53#include <linux/parser.h> 40#include <linux/parser.h>
54 41
55static const struct super_operations xfs_super_operations; 42static const struct super_operations xfs_super_operations;
@@ -582,7 +569,7 @@ xfs_set_inode_alloc(
582 * Calculate how much should be reserved for inodes to meet 569 * Calculate how much should be reserved for inodes to meet
583 * the max inode percentage. Used only for inode32. 570 * the max inode percentage. Used only for inode32.
584 */ 571 */
585 if (mp->m_maxicount) { 572 if (M_IGEO(mp)->maxicount) {
586 uint64_t icount; 573 uint64_t icount;
587 574
588 icount = sbp->sb_dblocks * sbp->sb_imax_pct; 575 icount = sbp->sb_dblocks * sbp->sb_imax_pct;
@@ -840,16 +827,10 @@ xfs_init_mount_workqueues(
840 if (!mp->m_reclaim_workqueue) 827 if (!mp->m_reclaim_workqueue)
841 goto out_destroy_cil; 828 goto out_destroy_cil;
842 829
843 mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
844 WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
845 mp->m_fsname);
846 if (!mp->m_log_workqueue)
847 goto out_destroy_reclaim;
848
849 mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", 830 mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
850 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); 831 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
851 if (!mp->m_eofblocks_workqueue) 832 if (!mp->m_eofblocks_workqueue)
852 goto out_destroy_log; 833 goto out_destroy_reclaim;
853 834
854 mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0, 835 mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
855 mp->m_fsname); 836 mp->m_fsname);
@@ -860,8 +841,6 @@ xfs_init_mount_workqueues(
860 841
861out_destroy_eofb: 842out_destroy_eofb:
862 destroy_workqueue(mp->m_eofblocks_workqueue); 843 destroy_workqueue(mp->m_eofblocks_workqueue);
863out_destroy_log:
864 destroy_workqueue(mp->m_log_workqueue);
865out_destroy_reclaim: 844out_destroy_reclaim:
866 destroy_workqueue(mp->m_reclaim_workqueue); 845 destroy_workqueue(mp->m_reclaim_workqueue);
867out_destroy_cil: 846out_destroy_cil:
@@ -880,7 +859,6 @@ xfs_destroy_mount_workqueues(
880{ 859{
881 destroy_workqueue(mp->m_sync_workqueue); 860 destroy_workqueue(mp->m_sync_workqueue);
882 destroy_workqueue(mp->m_eofblocks_workqueue); 861 destroy_workqueue(mp->m_eofblocks_workqueue);
883 destroy_workqueue(mp->m_log_workqueue);
884 destroy_workqueue(mp->m_reclaim_workqueue); 862 destroy_workqueue(mp->m_reclaim_workqueue);
885 destroy_workqueue(mp->m_cil_workqueue); 863 destroy_workqueue(mp->m_cil_workqueue);
886 destroy_workqueue(mp->m_unwritten_workqueue); 864 destroy_workqueue(mp->m_unwritten_workqueue);
@@ -1131,10 +1109,10 @@ xfs_fs_statfs(
1131 1109
1132 fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree); 1110 fakeinos = XFS_FSB_TO_INO(mp, statp->f_bfree);
1133 statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER); 1111 statp->f_files = min(icount + fakeinos, (uint64_t)XFS_MAXINUMBER);
1134 if (mp->m_maxicount) 1112 if (M_IGEO(mp)->maxicount)
1135 statp->f_files = min_t(typeof(statp->f_files), 1113 statp->f_files = min_t(typeof(statp->f_files),
1136 statp->f_files, 1114 statp->f_files,
1137 mp->m_maxicount); 1115 M_IGEO(mp)->maxicount);
1138 1116
1139 /* If sb_icount overshot maxicount, report actual allocation */ 1117 /* If sb_icount overshot maxicount, report actual allocation */
1140 statp->f_files = max_t(typeof(statp->f_files), 1118 statp->f_files = max_t(typeof(statp->f_files),
@@ -1685,6 +1663,8 @@ xfs_fs_fill_super(
1685 sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits); 1663 sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
1686 sb->s_max_links = XFS_MAXLINK; 1664 sb->s_max_links = XFS_MAXLINK;
1687 sb->s_time_gran = 1; 1665 sb->s_time_gran = 1;
1666 sb->s_iflags |= SB_I_CGROUPWB;
1667
1688 set_posix_acl_flag(sb); 1668 set_posix_acl_flag(sb);
1689 1669
1690 /* version 5 superblocks support inode version counters. */ 1670 /* version 5 superblocks support inode version counters. */
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 21cb49a43d7c..763e43d22dee 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -38,6 +38,18 @@ extern void xfs_qm_exit(void);
38# define XFS_SCRUB_STRING 38# define XFS_SCRUB_STRING
39#endif 39#endif
40 40
41#ifdef CONFIG_XFS_ONLINE_REPAIR
42# define XFS_REPAIR_STRING "repair, "
43#else
44# define XFS_REPAIR_STRING
45#endif
46
47#ifdef CONFIG_XFS_WARN
48# define XFS_WARN_STRING "verbose warnings, "
49#else
50# define XFS_WARN_STRING
51#endif
52
41#ifdef DEBUG 53#ifdef DEBUG
42# define XFS_DBG_STRING "debug" 54# define XFS_DBG_STRING "debug"
43#else 55#else
@@ -49,6 +61,8 @@ extern void xfs_qm_exit(void);
49 XFS_SECURITY_STRING \ 61 XFS_SECURITY_STRING \
50 XFS_REALTIME_STRING \ 62 XFS_REALTIME_STRING \
51 XFS_SCRUB_STRING \ 63 XFS_SCRUB_STRING \
64 XFS_REPAIR_STRING \
65 XFS_WARN_STRING \
52 XFS_DBG_STRING /* DBG must be last */ 66 XFS_DBG_STRING /* DBG must be last */
53 67
54struct xfs_inode; 68struct xfs_inode;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index b2c1177c717f..ed66fd2de327 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -12,23 +12,14 @@
12#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
13#include "xfs_bit.h" 13#include "xfs_bit.h"
14#include "xfs_mount.h" 14#include "xfs_mount.h"
15#include "xfs_da_format.h"
16#include "xfs_da_btree.h"
17#include "xfs_defer.h"
18#include "xfs_dir2.h" 15#include "xfs_dir2.h"
19#include "xfs_inode.h" 16#include "xfs_inode.h"
20#include "xfs_ialloc.h"
21#include "xfs_alloc.h"
22#include "xfs_bmap.h" 17#include "xfs_bmap.h"
23#include "xfs_bmap_btree.h" 18#include "xfs_bmap_btree.h"
24#include "xfs_bmap_util.h"
25#include "xfs_error.h"
26#include "xfs_quota.h" 19#include "xfs_quota.h"
27#include "xfs_trans_space.h" 20#include "xfs_trans_space.h"
28#include "xfs_trace.h" 21#include "xfs_trace.h"
29#include "xfs_symlink.h"
30#include "xfs_trans.h" 22#include "xfs_trans.h"
31#include "xfs_log.h"
32 23
33/* ----- Kernel only functions below ----- */ 24/* ----- Kernel only functions below ----- */
34int 25int
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index 0cc034dfb786..31b3bdbd2eba 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -4,10 +4,7 @@
4 * All Rights Reserved. 4 * All Rights Reserved.
5 */ 5 */
6#include "xfs.h" 6#include "xfs.h"
7#include <linux/sysctl.h>
8#include <linux/proc_fs.h>
9#include "xfs_error.h" 7#include "xfs_error.h"
10#include "xfs_stats.h"
11 8
12static struct ctl_table_header *xfs_table_header; 9static struct ctl_table_header *xfs_table_header;
13 10
diff --git a/fs/xfs/xfs_sysctl.h b/fs/xfs/xfs_sysctl.h
index ad7f9be13087..8abf4640f1d5 100644
--- a/fs/xfs/xfs_sysctl.h
+++ b/fs/xfs/xfs_sysctl.h
@@ -82,6 +82,9 @@ enum {
82extern xfs_param_t xfs_params; 82extern xfs_param_t xfs_params;
83 83
84struct xfs_globals { 84struct xfs_globals {
85#ifdef DEBUG
86 int pwork_threads; /* parallel workqueue threads */
87#endif
85 int log_recovery_delay; /* log recovery delay (secs) */ 88 int log_recovery_delay; /* log recovery delay (secs) */
86 int mount_delay; /* mount setup delay (secs) */ 89 int mount_delay; /* mount setup delay (secs) */
87 bool bug_on_assert; /* BUG() the kernel on assert failure */ 90 bool bug_on_assert; /* BUG() the kernel on assert failure */
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index cabda13f3c64..ddd0bf7a4740 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -10,9 +10,7 @@
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_sysfs.h" 12#include "xfs_sysfs.h"
13#include "xfs_log.h"
14#include "xfs_log_priv.h" 13#include "xfs_log_priv.h"
15#include "xfs_stats.h"
16#include "xfs_mount.h" 14#include "xfs_mount.h"
17 15
18struct xfs_sysfs_attr { 16struct xfs_sysfs_attr {
@@ -206,11 +204,51 @@ always_cow_show(
206} 204}
207XFS_SYSFS_ATTR_RW(always_cow); 205XFS_SYSFS_ATTR_RW(always_cow);
208 206
207#ifdef DEBUG
208/*
209 * Override how many threads the parallel work queue is allowed to create.
210 * This has to be a debug-only global (instead of an errortag) because one of
211 * the main users of parallel workqueues is mount time quotacheck.
212 */
213STATIC ssize_t
214pwork_threads_store(
215 struct kobject *kobject,
216 const char *buf,
217 size_t count)
218{
219 int ret;
220 int val;
221
222 ret = kstrtoint(buf, 0, &val);
223 if (ret)
224 return ret;
225
226 if (val < -1 || val > num_possible_cpus())
227 return -EINVAL;
228
229 xfs_globals.pwork_threads = val;
230
231 return count;
232}
233
234STATIC ssize_t
235pwork_threads_show(
236 struct kobject *kobject,
237 char *buf)
238{
239 return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.pwork_threads);
240}
241XFS_SYSFS_ATTR_RW(pwork_threads);
242#endif /* DEBUG */
243
209static struct attribute *xfs_dbg_attrs[] = { 244static struct attribute *xfs_dbg_attrs[] = {
210 ATTR_LIST(bug_on_assert), 245 ATTR_LIST(bug_on_assert),
211 ATTR_LIST(log_recovery_delay), 246 ATTR_LIST(log_recovery_delay),
212 ATTR_LIST(mount_delay), 247 ATTR_LIST(mount_delay),
213 ATTR_LIST(always_cow), 248 ATTR_LIST(always_cow),
249#ifdef DEBUG
250 ATTR_LIST(pwork_threads),
251#endif
214 NULL, 252 NULL,
215}; 253};
216 254
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index cb6489c22cad..bc85b89f88ca 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -15,24 +15,16 @@
15#include "xfs_inode.h" 15#include "xfs_inode.h"
16#include "xfs_btree.h" 16#include "xfs_btree.h"
17#include "xfs_da_btree.h" 17#include "xfs_da_btree.h"
18#include "xfs_ialloc.h"
19#include "xfs_itable.h"
20#include "xfs_alloc.h" 18#include "xfs_alloc.h"
21#include "xfs_bmap.h" 19#include "xfs_bmap.h"
22#include "xfs_attr.h" 20#include "xfs_attr.h"
23#include "xfs_attr_leaf.h"
24#include "xfs_trans.h" 21#include "xfs_trans.h"
25#include "xfs_log.h"
26#include "xfs_log_priv.h" 22#include "xfs_log_priv.h"
27#include "xfs_buf_item.h" 23#include "xfs_buf_item.h"
28#include "xfs_quota.h" 24#include "xfs_quota.h"
29#include "xfs_iomap.h"
30#include "xfs_aops.h"
31#include "xfs_dquot_item.h" 25#include "xfs_dquot_item.h"
32#include "xfs_dquot.h" 26#include "xfs_dquot.h"
33#include "xfs_log_recover.h" 27#include "xfs_log_recover.h"
34#include "xfs_inode_item.h"
35#include "xfs_bmap_btree.h"
36#include "xfs_filestream.h" 28#include "xfs_filestream.h"
37#include "xfs_fsmap.h" 29#include "xfs_fsmap.h"
38 30
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2464ea351f83..8094b1920eef 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -475,7 +475,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_ordered);
475DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin); 475DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
476DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin); 476DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
477DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale); 477DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
478DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock); 478DEFINE_BUF_ITEM_EVENT(xfs_buf_item_release);
479DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed); 479DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
480DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push); 480DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
481DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf); 481DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
@@ -3360,6 +3360,7 @@ DEFINE_TRANS_EVENT(xfs_trans_dup);
3360DEFINE_TRANS_EVENT(xfs_trans_free); 3360DEFINE_TRANS_EVENT(xfs_trans_free);
3361DEFINE_TRANS_EVENT(xfs_trans_roll); 3361DEFINE_TRANS_EVENT(xfs_trans_roll);
3362DEFINE_TRANS_EVENT(xfs_trans_add_item); 3362DEFINE_TRANS_EVENT(xfs_trans_add_item);
3363DEFINE_TRANS_EVENT(xfs_trans_commit_items);
3363DEFINE_TRANS_EVENT(xfs_trans_free_items); 3364DEFINE_TRANS_EVENT(xfs_trans_free_items);
3364 3365
3365TRACE_EVENT(xfs_iunlink_update_bucket, 3366TRACE_EVENT(xfs_iunlink_update_bucket,
@@ -3516,6 +3517,64 @@ DEFINE_EVENT(xfs_inode_corrupt_class, name, \
3516DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick); 3517DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_sick);
3517DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy); 3518DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy);
3518 3519
3520TRACE_EVENT(xfs_iwalk_ag,
3521 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
3522 xfs_agino_t startino),
3523 TP_ARGS(mp, agno, startino),
3524 TP_STRUCT__entry(
3525 __field(dev_t, dev)
3526 __field(xfs_agnumber_t, agno)
3527 __field(xfs_agino_t, startino)
3528 ),
3529 TP_fast_assign(
3530 __entry->dev = mp->m_super->s_dev;
3531 __entry->agno = agno;
3532 __entry->startino = startino;
3533 ),
3534 TP_printk("dev %d:%d agno %d startino %u",
3535 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
3536 __entry->startino)
3537)
3538
3539TRACE_EVENT(xfs_iwalk_ag_rec,
3540 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
3541 struct xfs_inobt_rec_incore *irec),
3542 TP_ARGS(mp, agno, irec),
3543 TP_STRUCT__entry(
3544 __field(dev_t, dev)
3545 __field(xfs_agnumber_t, agno)
3546 __field(xfs_agino_t, startino)
3547 __field(uint64_t, freemask)
3548 ),
3549 TP_fast_assign(
3550 __entry->dev = mp->m_super->s_dev;
3551 __entry->agno = agno;
3552 __entry->startino = irec->ir_startino;
3553 __entry->freemask = irec->ir_free;
3554 ),
3555 TP_printk("dev %d:%d agno %d startino %u freemask 0x%llx",
3556 MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
3557 __entry->startino, __entry->freemask)
3558)
3559
3560TRACE_EVENT(xfs_pwork_init,
3561 TP_PROTO(struct xfs_mount *mp, unsigned int nr_threads, pid_t pid),
3562 TP_ARGS(mp, nr_threads, pid),
3563 TP_STRUCT__entry(
3564 __field(dev_t, dev)
3565 __field(unsigned int, nr_threads)
3566 __field(pid_t, pid)
3567 ),
3568 TP_fast_assign(
3569 __entry->dev = mp->m_super->s_dev;
3570 __entry->nr_threads = nr_threads;
3571 __entry->pid = pid;
3572 ),
3573 TP_printk("dev %d:%d nr_threads %u pid %u",
3574 MAJOR(__entry->dev), MINOR(__entry->dev),
3575 __entry->nr_threads, __entry->pid)
3576)
3577
3519#endif /* _TRACE_XFS_H */ 3578#endif /* _TRACE_XFS_H */
3520 3579
3521#undef TRACE_INCLUDE_PATH 3580#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 912b42f5fe4a..d42a68d8313b 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -11,7 +11,6 @@
11#include "xfs_log_format.h" 11#include "xfs_log_format.h"
12#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
13#include "xfs_mount.h" 13#include "xfs_mount.h"
14#include "xfs_inode.h"
15#include "xfs_extent_busy.h" 14#include "xfs_extent_busy.h"
16#include "xfs_quota.h" 15#include "xfs_quota.h"
17#include "xfs_trans.h" 16#include "xfs_trans.h"
@@ -264,9 +263,7 @@ xfs_trans_alloc(
264 * GFP_NOFS allocation context so that we avoid lockdep false positives 263 * GFP_NOFS allocation context so that we avoid lockdep false positives
265 * by doing GFP_KERNEL allocations inside sb_start_intwrite(). 264 * by doing GFP_KERNEL allocations inside sb_start_intwrite().
266 */ 265 */
267 tp = kmem_zone_zalloc(xfs_trans_zone, 266 tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
268 (flags & XFS_TRANS_NOFS) ? KM_NOFS : KM_SLEEP);
269
270 if (!(flags & XFS_TRANS_NO_WRITECOUNT)) 267 if (!(flags & XFS_TRANS_NO_WRITECOUNT))
271 sb_start_intwrite(mp->m_super); 268 sb_start_intwrite(mp->m_super);
272 269
@@ -452,7 +449,7 @@ xfs_trans_apply_sb_deltas(
452 xfs_buf_t *bp; 449 xfs_buf_t *bp;
453 int whole = 0; 450 int whole = 0;
454 451
455 bp = xfs_trans_getsb(tp, tp->t_mountp, 0); 452 bp = xfs_trans_getsb(tp, tp->t_mountp);
456 sbp = XFS_BUF_TO_SBP(bp); 453 sbp = XFS_BUF_TO_SBP(bp);
457 454
458 /* 455 /*
@@ -767,10 +764,9 @@ xfs_trans_del_item(
767} 764}
768 765
769/* Detach and unlock all of the items in a transaction */ 766/* Detach and unlock all of the items in a transaction */
770void 767static void
771xfs_trans_free_items( 768xfs_trans_free_items(
772 struct xfs_trans *tp, 769 struct xfs_trans *tp,
773 xfs_lsn_t commit_lsn,
774 bool abort) 770 bool abort)
775{ 771{
776 struct xfs_log_item *lip, *next; 772 struct xfs_log_item *lip, *next;
@@ -779,11 +775,10 @@ xfs_trans_free_items(
779 775
780 list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) { 776 list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
781 xfs_trans_del_item(lip); 777 xfs_trans_del_item(lip);
782 if (commit_lsn != NULLCOMMITLSN)
783 lip->li_ops->iop_committing(lip, commit_lsn);
784 if (abort) 778 if (abort)
785 set_bit(XFS_LI_ABORTED, &lip->li_flags); 779 set_bit(XFS_LI_ABORTED, &lip->li_flags);
786 lip->li_ops->iop_unlock(lip); 780 if (lip->li_ops->iop_release)
781 lip->li_ops->iop_release(lip);
787 } 782 }
788} 783}
789 784
@@ -804,7 +799,8 @@ xfs_log_item_batch_insert(
804 for (i = 0; i < nr_items; i++) { 799 for (i = 0; i < nr_items; i++) {
805 struct xfs_log_item *lip = log_items[i]; 800 struct xfs_log_item *lip = log_items[i];
806 801
807 lip->li_ops->iop_unpin(lip, 0); 802 if (lip->li_ops->iop_unpin)
803 lip->li_ops->iop_unpin(lip, 0);
808 } 804 }
809} 805}
810 806
@@ -815,7 +811,7 @@ xfs_log_item_batch_insert(
815 * 811 *
816 * If we are called with the aborted flag set, it is because a log write during 812 * If we are called with the aborted flag set, it is because a log write during
817 * a CIL checkpoint commit has failed. In this case, all the items in the 813 * a CIL checkpoint commit has failed. In this case, all the items in the
818 * checkpoint have already gone through iop_commited and iop_unlock, which 814 * checkpoint have already gone through iop_committed and iop_committing, which
819 * means that checkpoint commit abort handling is treated exactly the same 815 * means that checkpoint commit abort handling is treated exactly the same
820 * as an iclog write error even though we haven't started any IO yet. Hence in 816 * as an iclog write error even though we haven't started any IO yet. Hence in
821 * this case all we need to do is iop_committed processing, followed by an 817 * this case all we need to do is iop_committed processing, followed by an
@@ -833,7 +829,7 @@ xfs_trans_committed_bulk(
833 struct xfs_ail *ailp, 829 struct xfs_ail *ailp,
834 struct xfs_log_vec *log_vector, 830 struct xfs_log_vec *log_vector,
835 xfs_lsn_t commit_lsn, 831 xfs_lsn_t commit_lsn,
836 int aborted) 832 bool aborted)
837{ 833{
838#define LOG_ITEM_BATCH_SIZE 32 834#define LOG_ITEM_BATCH_SIZE 32
839 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; 835 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
@@ -852,7 +848,16 @@ xfs_trans_committed_bulk(
852 848
853 if (aborted) 849 if (aborted)
854 set_bit(XFS_LI_ABORTED, &lip->li_flags); 850 set_bit(XFS_LI_ABORTED, &lip->li_flags);
855 item_lsn = lip->li_ops->iop_committed(lip, commit_lsn); 851
852 if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
853 lip->li_ops->iop_release(lip);
854 continue;
855 }
856
857 if (lip->li_ops->iop_committed)
858 item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
859 else
860 item_lsn = commit_lsn;
856 861
857 /* item_lsn of -1 means the item needs no further processing */ 862 /* item_lsn of -1 means the item needs no further processing */
858 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) 863 if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
@@ -864,7 +869,8 @@ xfs_trans_committed_bulk(
864 */ 869 */
865 if (aborted) { 870 if (aborted) {
866 ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount)); 871 ASSERT(XFS_FORCED_SHUTDOWN(ailp->ail_mount));
867 lip->li_ops->iop_unpin(lip, 1); 872 if (lip->li_ops->iop_unpin)
873 lip->li_ops->iop_unpin(lip, 1);
868 continue; 874 continue;
869 } 875 }
870 876
@@ -882,7 +888,8 @@ xfs_trans_committed_bulk(
882 xfs_trans_ail_update(ailp, lip, item_lsn); 888 xfs_trans_ail_update(ailp, lip, item_lsn);
883 else 889 else
884 spin_unlock(&ailp->ail_lock); 890 spin_unlock(&ailp->ail_lock);
885 lip->li_ops->iop_unpin(lip, 0); 891 if (lip->li_ops->iop_unpin)
892 lip->li_ops->iop_unpin(lip, 0);
886 continue; 893 continue;
887 } 894 }
888 895
@@ -998,7 +1005,7 @@ out_unreserve:
998 tp->t_ticket = NULL; 1005 tp->t_ticket = NULL;
999 } 1006 }
1000 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 1007 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
1001 xfs_trans_free_items(tp, NULLCOMMITLSN, !!error); 1008 xfs_trans_free_items(tp, !!error);
1002 xfs_trans_free(tp); 1009 xfs_trans_free(tp);
1003 1010
1004 XFS_STATS_INC(mp, xs_trans_empty); 1011 XFS_STATS_INC(mp, xs_trans_empty);
@@ -1060,7 +1067,7 @@ xfs_trans_cancel(
1060 /* mark this thread as no longer being in a transaction */ 1067 /* mark this thread as no longer being in a transaction */
1061 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 1068 current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
1062 1069
1063 xfs_trans_free_items(tp, NULLCOMMITLSN, dirty); 1070 xfs_trans_free_items(tp, dirty);
1064 xfs_trans_free(tp); 1071 xfs_trans_free(tp);
1065} 1072}
1066 1073
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index c6e1c5704a8c..64d7f171ebd3 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -27,7 +27,7 @@ struct xfs_cud_log_item;
27struct xfs_bui_log_item; 27struct xfs_bui_log_item;
28struct xfs_bud_log_item; 28struct xfs_bud_log_item;
29 29
30typedef struct xfs_log_item { 30struct xfs_log_item {
31 struct list_head li_ail; /* AIL pointers */ 31 struct list_head li_ail; /* AIL pointers */
32 struct list_head li_trans; /* transaction list */ 32 struct list_head li_trans; /* transaction list */
33 xfs_lsn_t li_lsn; /* last on-disk lsn */ 33 xfs_lsn_t li_lsn; /* last on-disk lsn */
@@ -48,7 +48,7 @@ typedef struct xfs_log_item {
48 struct xfs_log_vec *li_lv; /* active log vector */ 48 struct xfs_log_vec *li_lv; /* active log vector */
49 struct xfs_log_vec *li_lv_shadow; /* standby vector */ 49 struct xfs_log_vec *li_lv_shadow; /* standby vector */
50 xfs_lsn_t li_seq; /* CIL commit seq */ 50 xfs_lsn_t li_seq; /* CIL commit seq */
51} xfs_log_item_t; 51};
52 52
53/* 53/*
54 * li_flags use the (set/test/clear)_bit atomic interfaces because updates can 54 * li_flags use the (set/test/clear)_bit atomic interfaces because updates can
@@ -67,17 +67,24 @@ typedef struct xfs_log_item {
67 { (1 << XFS_LI_DIRTY), "DIRTY" } 67 { (1 << XFS_LI_DIRTY), "DIRTY" }
68 68
69struct xfs_item_ops { 69struct xfs_item_ops {
70 void (*iop_size)(xfs_log_item_t *, int *, int *); 70 unsigned flags;
71 void (*iop_format)(xfs_log_item_t *, struct xfs_log_vec *); 71 void (*iop_size)(struct xfs_log_item *, int *, int *);
72 void (*iop_pin)(xfs_log_item_t *); 72 void (*iop_format)(struct xfs_log_item *, struct xfs_log_vec *);
73 void (*iop_unpin)(xfs_log_item_t *, int remove); 73 void (*iop_pin)(struct xfs_log_item *);
74 void (*iop_unpin)(struct xfs_log_item *, int remove);
74 uint (*iop_push)(struct xfs_log_item *, struct list_head *); 75 uint (*iop_push)(struct xfs_log_item *, struct list_head *);
75 void (*iop_unlock)(xfs_log_item_t *); 76 void (*iop_committing)(struct xfs_log_item *, xfs_lsn_t commit_lsn);
76 xfs_lsn_t (*iop_committed)(xfs_log_item_t *, xfs_lsn_t); 77 void (*iop_release)(struct xfs_log_item *);
77 void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t); 78 xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t);
78 void (*iop_error)(xfs_log_item_t *, xfs_buf_t *); 79 void (*iop_error)(struct xfs_log_item *, xfs_buf_t *);
79}; 80};
80 81
82/*
83 * Release the log item as soon as committed. This is for items just logging
84 * intents that never need to be written back in place.
85 */
86#define XFS_ITEM_RELEASE_WHEN_COMMITTED (1 << 0)
87
81void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, 88void xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item,
82 int type, const struct xfs_item_ops *ops); 89 int type, const struct xfs_item_ops *ops);
83 90
@@ -203,7 +210,7 @@ xfs_trans_read_buf(
203 flags, bpp, ops); 210 flags, bpp, ops);
204} 211}
205 212
206struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int); 213struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *);
207 214
208void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *); 215void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
209void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *); 216void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
@@ -223,14 +230,6 @@ void xfs_trans_dirty_buf(struct xfs_trans *, struct xfs_buf *);
223bool xfs_trans_buf_is_dirty(struct xfs_buf *bp); 230bool xfs_trans_buf_is_dirty(struct xfs_buf *bp);
224void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint); 231void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
225 232
226struct xfs_efd_log_item *xfs_trans_get_efd(struct xfs_trans *,
227 struct xfs_efi_log_item *,
228 uint);
229int xfs_trans_free_extent(struct xfs_trans *,
230 struct xfs_efd_log_item *, xfs_fsblock_t,
231 xfs_extlen_t,
232 const struct xfs_owner_info *,
233 bool);
234int xfs_trans_commit(struct xfs_trans *); 233int xfs_trans_commit(struct xfs_trans *);
235int xfs_trans_roll(struct xfs_trans **); 234int xfs_trans_roll(struct xfs_trans **);
236int xfs_trans_roll_inode(struct xfs_trans **, struct xfs_inode *); 235int xfs_trans_roll_inode(struct xfs_trans **, struct xfs_inode *);
@@ -245,37 +244,4 @@ void xfs_trans_buf_copy_type(struct xfs_buf *dst_bp,
245 244
246extern kmem_zone_t *xfs_trans_zone; 245extern kmem_zone_t *xfs_trans_zone;
247 246
248/* rmap updates */
249enum xfs_rmap_intent_type;
250
251struct xfs_rud_log_item *xfs_trans_get_rud(struct xfs_trans *tp,
252 struct xfs_rui_log_item *ruip);
253int xfs_trans_log_finish_rmap_update(struct xfs_trans *tp,
254 struct xfs_rud_log_item *rudp, enum xfs_rmap_intent_type type,
255 uint64_t owner, int whichfork, xfs_fileoff_t startoff,
256 xfs_fsblock_t startblock, xfs_filblks_t blockcount,
257 xfs_exntst_t state, struct xfs_btree_cur **pcur);
258
259/* refcount updates */
260enum xfs_refcount_intent_type;
261
262struct xfs_cud_log_item *xfs_trans_get_cud(struct xfs_trans *tp,
263 struct xfs_cui_log_item *cuip);
264int xfs_trans_log_finish_refcount_update(struct xfs_trans *tp,
265 struct xfs_cud_log_item *cudp,
266 enum xfs_refcount_intent_type type, xfs_fsblock_t startblock,
267 xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb,
268 xfs_extlen_t *new_len, struct xfs_btree_cur **pcur);
269
270/* mapping updates */
271enum xfs_bmap_intent_type;
272
273struct xfs_bud_log_item *xfs_trans_get_bud(struct xfs_trans *tp,
274 struct xfs_bui_log_item *buip);
275int xfs_trans_log_finish_bmap_update(struct xfs_trans *tp,
276 struct xfs_bud_log_item *rudp, enum xfs_bmap_intent_type type,
277 struct xfs_inode *ip, int whichfork, xfs_fileoff_t startoff,
278 xfs_fsblock_t startblock, xfs_filblks_t *blockcount,
279 xfs_exntst_t state);
280
281#endif /* __XFS_TRANS_H__ */ 247#endif /* __XFS_TRANS_H__ */
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index d3a4e89bf4a0..6ccfd75d3c24 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -6,6 +6,7 @@
6 */ 6 */
7#include "xfs.h" 7#include "xfs.h"
8#include "xfs_fs.h" 8#include "xfs_fs.h"
9#include "xfs_shared.h"
9#include "xfs_format.h" 10#include "xfs_format.h"
10#include "xfs_log_format.h" 11#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 12#include "xfs_trans_resv.h"
@@ -74,29 +75,29 @@ xfs_ail_check(
74 * Return a pointer to the last item in the AIL. If the AIL is empty, then 75 * Return a pointer to the last item in the AIL. If the AIL is empty, then
75 * return NULL. 76 * return NULL.
76 */ 77 */
77static xfs_log_item_t * 78static struct xfs_log_item *
78xfs_ail_max( 79xfs_ail_max(
79 struct xfs_ail *ailp) 80 struct xfs_ail *ailp)
80{ 81{
81 if (list_empty(&ailp->ail_head)) 82 if (list_empty(&ailp->ail_head))
82 return NULL; 83 return NULL;
83 84
84 return list_entry(ailp->ail_head.prev, xfs_log_item_t, li_ail); 85 return list_entry(ailp->ail_head.prev, struct xfs_log_item, li_ail);
85} 86}
86 87
87/* 88/*
88 * Return a pointer to the item which follows the given item in the AIL. If 89 * Return a pointer to the item which follows the given item in the AIL. If
89 * the given item is the last item in the list, then return NULL. 90 * the given item is the last item in the list, then return NULL.
90 */ 91 */
91static xfs_log_item_t * 92static struct xfs_log_item *
92xfs_ail_next( 93xfs_ail_next(
93 struct xfs_ail *ailp, 94 struct xfs_ail *ailp,
94 xfs_log_item_t *lip) 95 struct xfs_log_item *lip)
95{ 96{
96 if (lip->li_ail.next == &ailp->ail_head) 97 if (lip->li_ail.next == &ailp->ail_head)
97 return NULL; 98 return NULL;
98 99
99 return list_first_entry(&lip->li_ail, xfs_log_item_t, li_ail); 100 return list_first_entry(&lip->li_ail, struct xfs_log_item, li_ail);
100} 101}
101 102
102/* 103/*
@@ -109,10 +110,10 @@ xfs_ail_next(
109 */ 110 */
110xfs_lsn_t 111xfs_lsn_t
111xfs_ail_min_lsn( 112xfs_ail_min_lsn(
112 struct xfs_ail *ailp) 113 struct xfs_ail *ailp)
113{ 114{
114 xfs_lsn_t lsn = 0; 115 xfs_lsn_t lsn = 0;
115 xfs_log_item_t *lip; 116 struct xfs_log_item *lip;
116 117
117 spin_lock(&ailp->ail_lock); 118 spin_lock(&ailp->ail_lock);
118 lip = xfs_ail_min(ailp); 119 lip = xfs_ail_min(ailp);
@@ -128,10 +129,10 @@ xfs_ail_min_lsn(
128 */ 129 */
129static xfs_lsn_t 130static xfs_lsn_t
130xfs_ail_max_lsn( 131xfs_ail_max_lsn(
131 struct xfs_ail *ailp) 132 struct xfs_ail *ailp)
132{ 133{
133 xfs_lsn_t lsn = 0; 134 xfs_lsn_t lsn = 0;
134 xfs_log_item_t *lip; 135 struct xfs_log_item *lip;
135 136
136 spin_lock(&ailp->ail_lock); 137 spin_lock(&ailp->ail_lock);
137 lip = xfs_ail_max(ailp); 138 lip = xfs_ail_max(ailp);
@@ -216,13 +217,13 @@ xfs_trans_ail_cursor_clear(
216 * ascending traversal. Pass a @lsn of zero to initialise the cursor to the 217 * ascending traversal. Pass a @lsn of zero to initialise the cursor to the
217 * first item in the AIL. Returns NULL if the list is empty. 218 * first item in the AIL. Returns NULL if the list is empty.
218 */ 219 */
219xfs_log_item_t * 220struct xfs_log_item *
220xfs_trans_ail_cursor_first( 221xfs_trans_ail_cursor_first(
221 struct xfs_ail *ailp, 222 struct xfs_ail *ailp,
222 struct xfs_ail_cursor *cur, 223 struct xfs_ail_cursor *cur,
223 xfs_lsn_t lsn) 224 xfs_lsn_t lsn)
224{ 225{
225 xfs_log_item_t *lip; 226 struct xfs_log_item *lip;
226 227
227 xfs_trans_ail_cursor_init(ailp, cur); 228 xfs_trans_ail_cursor_init(ailp, cur);
228 229
@@ -248,7 +249,7 @@ __xfs_trans_ail_cursor_last(
248 struct xfs_ail *ailp, 249 struct xfs_ail *ailp,
249 xfs_lsn_t lsn) 250 xfs_lsn_t lsn)
250{ 251{
251 xfs_log_item_t *lip; 252 struct xfs_log_item *lip;
252 253
253 list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) { 254 list_for_each_entry_reverse(lip, &ailp->ail_head, li_ail) {
254 if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0) 255 if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
@@ -327,8 +328,8 @@ xfs_ail_splice(
327 */ 328 */
328static void 329static void
329xfs_ail_delete( 330xfs_ail_delete(
330 struct xfs_ail *ailp, 331 struct xfs_ail *ailp,
331 xfs_log_item_t *lip) 332 struct xfs_log_item *lip)
332{ 333{
333 xfs_ail_check(ailp, lip); 334 xfs_ail_check(ailp, lip);
334 list_del(&lip->li_ail); 335 list_del(&lip->li_ail);
@@ -347,6 +348,14 @@ xfsaild_push_item(
347 if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN)) 348 if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN))
348 return XFS_ITEM_PINNED; 349 return XFS_ITEM_PINNED;
349 350
351 /*
352 * Consider the item pinned if a push callback is not defined so the
353 * caller will force the log. This should only happen for intent items
354 * as they are unpinned once the associated done item is committed to
355 * the on-disk log.
356 */
357 if (!lip->li_ops->iop_push)
358 return XFS_ITEM_PINNED;
350 return lip->li_ops->iop_push(lip, &ailp->ail_buf_list); 359 return lip->li_ops->iop_push(lip, &ailp->ail_buf_list);
351} 360}
352 361
@@ -356,7 +365,7 @@ xfsaild_push(
356{ 365{
357 xfs_mount_t *mp = ailp->ail_mount; 366 xfs_mount_t *mp = ailp->ail_mount;
358 struct xfs_ail_cursor cur; 367 struct xfs_ail_cursor cur;
359 xfs_log_item_t *lip; 368 struct xfs_log_item *lip;
360 xfs_lsn_t lsn; 369 xfs_lsn_t lsn;
361 xfs_lsn_t target; 370 xfs_lsn_t target;
362 long tout; 371 long tout;
@@ -611,10 +620,10 @@ xfsaild(
611 */ 620 */
612void 621void
613xfs_ail_push( 622xfs_ail_push(
614 struct xfs_ail *ailp, 623 struct xfs_ail *ailp,
615 xfs_lsn_t threshold_lsn) 624 xfs_lsn_t threshold_lsn)
616{ 625{
617 xfs_log_item_t *lip; 626 struct xfs_log_item *lip;
618 627
619 lip = xfs_ail_min(ailp); 628 lip = xfs_ail_min(ailp);
620 if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) || 629 if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
@@ -699,7 +708,7 @@ xfs_trans_ail_update_bulk(
699 int nr_items, 708 int nr_items,
700 xfs_lsn_t lsn) __releases(ailp->ail_lock) 709 xfs_lsn_t lsn) __releases(ailp->ail_lock)
701{ 710{
702 xfs_log_item_t *mlip; 711 struct xfs_log_item *mlip;
703 int mlip_changed = 0; 712 int mlip_changed = 0;
704 int i; 713 int i;
705 LIST_HEAD(tmp); 714 LIST_HEAD(tmp);
diff --git a/fs/xfs/xfs_trans_bmap.c b/fs/xfs/xfs_trans_bmap.c
deleted file mode 100644
index e1c7d55b32c3..000000000000
--- a/fs/xfs/xfs_trans_bmap.c
+++ /dev/null
@@ -1,232 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_mount.h"
13#include "xfs_defer.h"
14#include "xfs_trans.h"
15#include "xfs_trans_priv.h"
16#include "xfs_bmap_item.h"
17#include "xfs_alloc.h"
18#include "xfs_bmap.h"
19#include "xfs_inode.h"
20
21/*
22 * This routine is called to allocate a "bmap update done"
23 * log item.
24 */
25struct xfs_bud_log_item *
26xfs_trans_get_bud(
27 struct xfs_trans *tp,
28 struct xfs_bui_log_item *buip)
29{
30 struct xfs_bud_log_item *budp;
31
32 budp = xfs_bud_init(tp->t_mountp, buip);
33 xfs_trans_add_item(tp, &budp->bud_item);
34 return budp;
35}
36
37/*
38 * Finish an bmap update and log it to the BUD. Note that the
39 * transaction is marked dirty regardless of whether the bmap update
40 * succeeds or fails to support the BUI/BUD lifecycle rules.
41 */
42int
43xfs_trans_log_finish_bmap_update(
44 struct xfs_trans *tp,
45 struct xfs_bud_log_item *budp,
46 enum xfs_bmap_intent_type type,
47 struct xfs_inode *ip,
48 int whichfork,
49 xfs_fileoff_t startoff,
50 xfs_fsblock_t startblock,
51 xfs_filblks_t *blockcount,
52 xfs_exntst_t state)
53{
54 int error;
55
56 error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff,
57 startblock, blockcount, state);
58
59 /*
60 * Mark the transaction dirty, even on error. This ensures the
61 * transaction is aborted, which:
62 *
63 * 1.) releases the BUI and frees the BUD
64 * 2.) shuts down the filesystem
65 */
66 tp->t_flags |= XFS_TRANS_DIRTY;
67 set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags);
68
69 return error;
70}
71
72/* Sort bmap intents by inode. */
73static int
74xfs_bmap_update_diff_items(
75 void *priv,
76 struct list_head *a,
77 struct list_head *b)
78{
79 struct xfs_bmap_intent *ba;
80 struct xfs_bmap_intent *bb;
81
82 ba = container_of(a, struct xfs_bmap_intent, bi_list);
83 bb = container_of(b, struct xfs_bmap_intent, bi_list);
84 return ba->bi_owner->i_ino - bb->bi_owner->i_ino;
85}
86
87/* Get an BUI. */
88STATIC void *
89xfs_bmap_update_create_intent(
90 struct xfs_trans *tp,
91 unsigned int count)
92{
93 struct xfs_bui_log_item *buip;
94
95 ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS);
96 ASSERT(tp != NULL);
97
98 buip = xfs_bui_init(tp->t_mountp);
99 ASSERT(buip != NULL);
100
101 /*
102 * Get a log_item_desc to point at the new item.
103 */
104 xfs_trans_add_item(tp, &buip->bui_item);
105 return buip;
106}
107
108/* Set the map extent flags for this mapping. */
109static void
110xfs_trans_set_bmap_flags(
111 struct xfs_map_extent *bmap,
112 enum xfs_bmap_intent_type type,
113 int whichfork,
114 xfs_exntst_t state)
115{
116 bmap->me_flags = 0;
117 switch (type) {
118 case XFS_BMAP_MAP:
119 case XFS_BMAP_UNMAP:
120 bmap->me_flags = type;
121 break;
122 default:
123 ASSERT(0);
124 }
125 if (state == XFS_EXT_UNWRITTEN)
126 bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN;
127 if (whichfork == XFS_ATTR_FORK)
128 bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK;
129}
130
131/* Log bmap updates in the intent item. */
132STATIC void
133xfs_bmap_update_log_item(
134 struct xfs_trans *tp,
135 void *intent,
136 struct list_head *item)
137{
138 struct xfs_bui_log_item *buip = intent;
139 struct xfs_bmap_intent *bmap;
140 uint next_extent;
141 struct xfs_map_extent *map;
142
143 bmap = container_of(item, struct xfs_bmap_intent, bi_list);
144
145 tp->t_flags |= XFS_TRANS_DIRTY;
146 set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags);
147
148 /*
149 * atomic_inc_return gives us the value after the increment;
150 * we want to use it as an array index so we need to subtract 1 from
151 * it.
152 */
153 next_extent = atomic_inc_return(&buip->bui_next_extent) - 1;
154 ASSERT(next_extent < buip->bui_format.bui_nextents);
155 map = &buip->bui_format.bui_extents[next_extent];
156 map->me_owner = bmap->bi_owner->i_ino;
157 map->me_startblock = bmap->bi_bmap.br_startblock;
158 map->me_startoff = bmap->bi_bmap.br_startoff;
159 map->me_len = bmap->bi_bmap.br_blockcount;
160 xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork,
161 bmap->bi_bmap.br_state);
162}
163
164/* Get an BUD so we can process all the deferred rmap updates. */
165STATIC void *
166xfs_bmap_update_create_done(
167 struct xfs_trans *tp,
168 void *intent,
169 unsigned int count)
170{
171 return xfs_trans_get_bud(tp, intent);
172}
173
174/* Process a deferred rmap update. */
175STATIC int
176xfs_bmap_update_finish_item(
177 struct xfs_trans *tp,
178 struct list_head *item,
179 void *done_item,
180 void **state)
181{
182 struct xfs_bmap_intent *bmap;
183 xfs_filblks_t count;
184 int error;
185
186 bmap = container_of(item, struct xfs_bmap_intent, bi_list);
187 count = bmap->bi_bmap.br_blockcount;
188 error = xfs_trans_log_finish_bmap_update(tp, done_item,
189 bmap->bi_type,
190 bmap->bi_owner, bmap->bi_whichfork,
191 bmap->bi_bmap.br_startoff,
192 bmap->bi_bmap.br_startblock,
193 &count,
194 bmap->bi_bmap.br_state);
195 if (!error && count > 0) {
196 ASSERT(bmap->bi_type == XFS_BMAP_UNMAP);
197 bmap->bi_bmap.br_blockcount = count;
198 return -EAGAIN;
199 }
200 kmem_free(bmap);
201 return error;
202}
203
204/* Abort all pending BUIs. */
205STATIC void
206xfs_bmap_update_abort_intent(
207 void *intent)
208{
209 xfs_bui_release(intent);
210}
211
212/* Cancel a deferred rmap update. */
213STATIC void
214xfs_bmap_update_cancel_item(
215 struct list_head *item)
216{
217 struct xfs_bmap_intent *bmap;
218
219 bmap = container_of(item, struct xfs_bmap_intent, bi_list);
220 kmem_free(bmap);
221}
222
223const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
224 .max_items = XFS_BUI_MAX_FAST_EXTENTS,
225 .diff_items = xfs_bmap_update_diff_items,
226 .create_intent = xfs_bmap_update_create_intent,
227 .abort_intent = xfs_bmap_update_abort_intent,
228 .log_item = xfs_bmap_update_log_item,
229 .create_done = xfs_bmap_update_create_done,
230 .finish_item = xfs_bmap_update_finish_item,
231 .cancel_item = xfs_bmap_update_cancel_item,
232};
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 7d65ebf1e847..b5b3a78ef31c 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -10,11 +10,9 @@
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_mount.h" 12#include "xfs_mount.h"
13#include "xfs_inode.h"
14#include "xfs_trans.h" 13#include "xfs_trans.h"
15#include "xfs_buf_item.h" 14#include "xfs_buf_item.h"
16#include "xfs_trans_priv.h" 15#include "xfs_trans_priv.h"
17#include "xfs_error.h"
18#include "xfs_trace.h" 16#include "xfs_trace.h"
19 17
20/* 18/*
@@ -174,8 +172,7 @@ xfs_trans_get_buf_map(
174xfs_buf_t * 172xfs_buf_t *
175xfs_trans_getsb( 173xfs_trans_getsb(
176 xfs_trans_t *tp, 174 xfs_trans_t *tp,
177 struct xfs_mount *mp, 175 struct xfs_mount *mp)
178 int flags)
179{ 176{
180 xfs_buf_t *bp; 177 xfs_buf_t *bp;
181 struct xfs_buf_log_item *bip; 178 struct xfs_buf_log_item *bip;
@@ -185,7 +182,7 @@ xfs_trans_getsb(
185 * if tp is NULL. 182 * if tp is NULL.
186 */ 183 */
187 if (tp == NULL) 184 if (tp == NULL)
188 return xfs_getsb(mp, flags); 185 return xfs_getsb(mp);
189 186
190 /* 187 /*
191 * If the superblock buffer already has this transaction 188 * If the superblock buffer already has this transaction
@@ -203,7 +200,7 @@ xfs_trans_getsb(
203 return bp; 200 return bp;
204 } 201 }
205 202
206 bp = xfs_getsb(mp, flags); 203 bp = xfs_getsb(mp);
207 if (bp == NULL) 204 if (bp == NULL)
208 return NULL; 205 return NULL;
209 206
@@ -428,7 +425,7 @@ xfs_trans_brelse(
428 425
429/* 426/*
430 * Mark the buffer as not needing to be unlocked when the buf item's 427 * Mark the buffer as not needing to be unlocked when the buf item's
431 * iop_unlock() routine is called. The buffer must already be locked 428 * iop_committing() routine is called. The buffer must already be locked
432 * and associated with the given transaction. 429 * and associated with the given transaction.
433 */ 430 */
434/* ARGSUSED */ 431/* ARGSUSED */
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index cd664a03613f..1027c9ca6eb8 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -11,7 +11,6 @@
11#include "xfs_trans_resv.h" 11#include "xfs_trans_resv.h"
12#include "xfs_mount.h" 12#include "xfs_mount.h"
13#include "xfs_inode.h" 13#include "xfs_inode.h"
14#include "xfs_error.h"
15#include "xfs_trans.h" 14#include "xfs_trans.h"
16#include "xfs_trans_priv.h" 15#include "xfs_trans_priv.h"
17#include "xfs_quota.h" 16#include "xfs_quota.h"
@@ -29,7 +28,6 @@ xfs_trans_dqjoin(
29 xfs_trans_t *tp, 28 xfs_trans_t *tp,
30 xfs_dquot_t *dqp) 29 xfs_dquot_t *dqp)
31{ 30{
32 ASSERT(dqp->q_transp != tp);
33 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 31 ASSERT(XFS_DQ_IS_LOCKED(dqp));
34 ASSERT(dqp->q_logitem.qli_dquot == dqp); 32 ASSERT(dqp->q_logitem.qli_dquot == dqp);
35 33
@@ -37,15 +35,8 @@ xfs_trans_dqjoin(
37 * Get a log_item_desc to point at the new item. 35 * Get a log_item_desc to point at the new item.
38 */ 36 */
39 xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); 37 xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
40
41 /*
42 * Initialize d_transp so we can later determine if this dquot is
43 * associated with this transaction.
44 */
45 dqp->q_transp = tp;
46} 38}
47 39
48
49/* 40/*
50 * This is called to mark the dquot as needing 41 * This is called to mark the dquot as needing
51 * to be logged when the transaction is committed. The dquot must 42 * to be logged when the transaction is committed. The dquot must
@@ -61,7 +52,6 @@ xfs_trans_log_dquot(
61 xfs_trans_t *tp, 52 xfs_trans_t *tp,
62 xfs_dquot_t *dqp) 53 xfs_dquot_t *dqp)
63{ 54{
64 ASSERT(dqp->q_transp == tp);
65 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 55 ASSERT(XFS_DQ_IS_LOCKED(dqp));
66 56
67 tp->t_flags |= XFS_TRANS_DIRTY; 57 tp->t_flags |= XFS_TRANS_DIRTY;
@@ -347,7 +337,6 @@ xfs_trans_apply_dquot_deltas(
347 break; 337 break;
348 338
349 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 339 ASSERT(XFS_DQ_IS_LOCKED(dqp));
350 ASSERT(dqp->q_transp == tp);
351 340
352 /* 341 /*
353 * adjust the actual number of blocks used 342 * adjust the actual number of blocks used
diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c
deleted file mode 100644
index 8ee7a3f8bb20..000000000000
--- a/fs/xfs/xfs_trans_extfree.c
+++ /dev/null
@@ -1,286 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2000,2005 Silicon Graphics, Inc.
4 * All Rights Reserved.
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_bit.h"
13#include "xfs_mount.h"
14#include "xfs_defer.h"
15#include "xfs_trans.h"
16#include "xfs_trans_priv.h"
17#include "xfs_extfree_item.h"
18#include "xfs_alloc.h"
19#include "xfs_bmap.h"
20#include "xfs_trace.h"
21
22/*
23 * This routine is called to allocate an "extent free done"
24 * log item that will hold nextents worth of extents. The
25 * caller must use all nextents extents, because we are not
26 * flexible about this at all.
27 */
28struct xfs_efd_log_item *
29xfs_trans_get_efd(struct xfs_trans *tp,
30 struct xfs_efi_log_item *efip,
31 uint nextents)
32{
33 struct xfs_efd_log_item *efdp;
34
35 ASSERT(tp != NULL);
36 ASSERT(nextents > 0);
37
38 efdp = xfs_efd_init(tp->t_mountp, efip, nextents);
39 ASSERT(efdp != NULL);
40
41 /*
42 * Get a log_item_desc to point at the new item.
43 */
44 xfs_trans_add_item(tp, &efdp->efd_item);
45 return efdp;
46}
47
48/*
49 * Free an extent and log it to the EFD. Note that the transaction is marked
50 * dirty regardless of whether the extent free succeeds or fails to support the
51 * EFI/EFD lifecycle rules.
52 */
53int
54xfs_trans_free_extent(
55 struct xfs_trans *tp,
56 struct xfs_efd_log_item *efdp,
57 xfs_fsblock_t start_block,
58 xfs_extlen_t ext_len,
59 const struct xfs_owner_info *oinfo,
60 bool skip_discard)
61{
62 struct xfs_mount *mp = tp->t_mountp;
63 struct xfs_extent *extp;
64 uint next_extent;
65 xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block);
66 xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp,
67 start_block);
68 int error;
69
70 trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len);
71
72 error = __xfs_free_extent(tp, start_block, ext_len,
73 oinfo, XFS_AG_RESV_NONE, skip_discard);
74 /*
75 * Mark the transaction dirty, even on error. This ensures the
76 * transaction is aborted, which:
77 *
78 * 1.) releases the EFI and frees the EFD
79 * 2.) shuts down the filesystem
80 */
81 tp->t_flags |= XFS_TRANS_DIRTY;
82 set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
83
84 next_extent = efdp->efd_next_extent;
85 ASSERT(next_extent < efdp->efd_format.efd_nextents);
86 extp = &(efdp->efd_format.efd_extents[next_extent]);
87 extp->ext_start = start_block;
88 extp->ext_len = ext_len;
89 efdp->efd_next_extent++;
90
91 return error;
92}
93
94/* Sort bmap items by AG. */
95static int
96xfs_extent_free_diff_items(
97 void *priv,
98 struct list_head *a,
99 struct list_head *b)
100{
101 struct xfs_mount *mp = priv;
102 struct xfs_extent_free_item *ra;
103 struct xfs_extent_free_item *rb;
104
105 ra = container_of(a, struct xfs_extent_free_item, xefi_list);
106 rb = container_of(b, struct xfs_extent_free_item, xefi_list);
107 return XFS_FSB_TO_AGNO(mp, ra->xefi_startblock) -
108 XFS_FSB_TO_AGNO(mp, rb->xefi_startblock);
109}
110
111/* Get an EFI. */
112STATIC void *
113xfs_extent_free_create_intent(
114 struct xfs_trans *tp,
115 unsigned int count)
116{
117 struct xfs_efi_log_item *efip;
118
119 ASSERT(tp != NULL);
120 ASSERT(count > 0);
121
122 efip = xfs_efi_init(tp->t_mountp, count);
123 ASSERT(efip != NULL);
124
125 /*
126 * Get a log_item_desc to point at the new item.
127 */
128 xfs_trans_add_item(tp, &efip->efi_item);
129 return efip;
130}
131
132/* Log a free extent to the intent item. */
133STATIC void
134xfs_extent_free_log_item(
135 struct xfs_trans *tp,
136 void *intent,
137 struct list_head *item)
138{
139 struct xfs_efi_log_item *efip = intent;
140 struct xfs_extent_free_item *free;
141 uint next_extent;
142 struct xfs_extent *extp;
143
144 free = container_of(item, struct xfs_extent_free_item, xefi_list);
145
146 tp->t_flags |= XFS_TRANS_DIRTY;
147 set_bit(XFS_LI_DIRTY, &efip->efi_item.li_flags);
148
149 /*
150 * atomic_inc_return gives us the value after the increment;
151 * we want to use it as an array index so we need to subtract 1 from
152 * it.
153 */
154 next_extent = atomic_inc_return(&efip->efi_next_extent) - 1;
155 ASSERT(next_extent < efip->efi_format.efi_nextents);
156 extp = &efip->efi_format.efi_extents[next_extent];
157 extp->ext_start = free->xefi_startblock;
158 extp->ext_len = free->xefi_blockcount;
159}
160
161/* Get an EFD so we can process all the free extents. */
162STATIC void *
163xfs_extent_free_create_done(
164 struct xfs_trans *tp,
165 void *intent,
166 unsigned int count)
167{
168 return xfs_trans_get_efd(tp, intent, count);
169}
170
171/* Process a free extent. */
172STATIC int
173xfs_extent_free_finish_item(
174 struct xfs_trans *tp,
175 struct list_head *item,
176 void *done_item,
177 void **state)
178{
179 struct xfs_extent_free_item *free;
180 int error;
181
182 free = container_of(item, struct xfs_extent_free_item, xefi_list);
183 error = xfs_trans_free_extent(tp, done_item,
184 free->xefi_startblock,
185 free->xefi_blockcount,
186 &free->xefi_oinfo, free->xefi_skip_discard);
187 kmem_free(free);
188 return error;
189}
190
191/* Abort all pending EFIs. */
192STATIC void
193xfs_extent_free_abort_intent(
194 void *intent)
195{
196 xfs_efi_release(intent);
197}
198
199/* Cancel a free extent. */
200STATIC void
201xfs_extent_free_cancel_item(
202 struct list_head *item)
203{
204 struct xfs_extent_free_item *free;
205
206 free = container_of(item, struct xfs_extent_free_item, xefi_list);
207 kmem_free(free);
208}
209
210const struct xfs_defer_op_type xfs_extent_free_defer_type = {
211 .max_items = XFS_EFI_MAX_FAST_EXTENTS,
212 .diff_items = xfs_extent_free_diff_items,
213 .create_intent = xfs_extent_free_create_intent,
214 .abort_intent = xfs_extent_free_abort_intent,
215 .log_item = xfs_extent_free_log_item,
216 .create_done = xfs_extent_free_create_done,
217 .finish_item = xfs_extent_free_finish_item,
218 .cancel_item = xfs_extent_free_cancel_item,
219};
220
221/*
222 * AGFL blocks are accounted differently in the reserve pools and are not
223 * inserted into the busy extent list.
224 */
225STATIC int
226xfs_agfl_free_finish_item(
227 struct xfs_trans *tp,
228 struct list_head *item,
229 void *done_item,
230 void **state)
231{
232 struct xfs_mount *mp = tp->t_mountp;
233 struct xfs_efd_log_item *efdp = done_item;
234 struct xfs_extent_free_item *free;
235 struct xfs_extent *extp;
236 struct xfs_buf *agbp;
237 int error;
238 xfs_agnumber_t agno;
239 xfs_agblock_t agbno;
240 uint next_extent;
241
242 free = container_of(item, struct xfs_extent_free_item, xefi_list);
243 ASSERT(free->xefi_blockcount == 1);
244 agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
245 agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
246
247 trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount);
248
249 error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
250 if (!error)
251 error = xfs_free_agfl_block(tp, agno, agbno, agbp,
252 &free->xefi_oinfo);
253
254 /*
255 * Mark the transaction dirty, even on error. This ensures the
256 * transaction is aborted, which:
257 *
258 * 1.) releases the EFI and frees the EFD
259 * 2.) shuts down the filesystem
260 */
261 tp->t_flags |= XFS_TRANS_DIRTY;
262 set_bit(XFS_LI_DIRTY, &efdp->efd_item.li_flags);
263
264 next_extent = efdp->efd_next_extent;
265 ASSERT(next_extent < efdp->efd_format.efd_nextents);
266 extp = &(efdp->efd_format.efd_extents[next_extent]);
267 extp->ext_start = free->xefi_startblock;
268 extp->ext_len = free->xefi_blockcount;
269 efdp->efd_next_extent++;
270
271 kmem_free(free);
272 return error;
273}
274
275
276/* sub-type with special handling for AGFL deferred frees */
277const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
278 .max_items = XFS_EFI_MAX_FAST_EXTENTS,
279 .diff_items = xfs_extent_free_diff_items,
280 .create_intent = xfs_extent_free_create_intent,
281 .abort_intent = xfs_extent_free_abort_intent,
282 .log_item = xfs_extent_free_log_item,
283 .create_done = xfs_extent_free_create_done,
284 .finish_item = xfs_agfl_free_finish_item,
285 .cancel_item = xfs_extent_free_cancel_item,
286};
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 542927321a61..93d14e47269d 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -8,13 +8,10 @@
8#include "xfs_shared.h" 8#include "xfs_shared.h"
9#include "xfs_format.h" 9#include "xfs_format.h"
10#include "xfs_log_format.h" 10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_mount.h"
13#include "xfs_inode.h" 11#include "xfs_inode.h"
14#include "xfs_trans.h" 12#include "xfs_trans.h"
15#include "xfs_trans_priv.h" 13#include "xfs_trans_priv.h"
16#include "xfs_inode_item.h" 14#include "xfs_inode_item.h"
17#include "xfs_trace.h"
18 15
19#include <linux/iversion.h> 16#include <linux/iversion.h>
20 17
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 091eae9f4e74..2e073c1c4614 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -16,12 +16,10 @@ struct xfs_log_vec;
16void xfs_trans_init(struct xfs_mount *); 16void xfs_trans_init(struct xfs_mount *);
17void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); 17void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
18void xfs_trans_del_item(struct xfs_log_item *); 18void xfs_trans_del_item(struct xfs_log_item *);
19void xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
20 bool abort);
21void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); 19void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
22 20
23void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, 21void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
24 xfs_lsn_t commit_lsn, int aborted); 22 xfs_lsn_t commit_lsn, bool aborted);
25/* 23/*
26 * AIL traversal cursor. 24 * AIL traversal cursor.
27 * 25 *
diff --git a/fs/xfs/xfs_trans_refcount.c b/fs/xfs/xfs_trans_refcount.c
deleted file mode 100644
index 8d734728dd1b..000000000000
--- a/fs/xfs/xfs_trans_refcount.c
+++ /dev/null
@@ -1,240 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_mount.h"
13#include "xfs_defer.h"
14#include "xfs_trans.h"
15#include "xfs_trans_priv.h"
16#include "xfs_refcount_item.h"
17#include "xfs_alloc.h"
18#include "xfs_refcount.h"
19
20/*
21 * This routine is called to allocate a "refcount update done"
22 * log item.
23 */
24struct xfs_cud_log_item *
25xfs_trans_get_cud(
26 struct xfs_trans *tp,
27 struct xfs_cui_log_item *cuip)
28{
29 struct xfs_cud_log_item *cudp;
30
31 cudp = xfs_cud_init(tp->t_mountp, cuip);
32 xfs_trans_add_item(tp, &cudp->cud_item);
33 return cudp;
34}
35
36/*
37 * Finish an refcount update and log it to the CUD. Note that the
38 * transaction is marked dirty regardless of whether the refcount
39 * update succeeds or fails to support the CUI/CUD lifecycle rules.
40 */
41int
42xfs_trans_log_finish_refcount_update(
43 struct xfs_trans *tp,
44 struct xfs_cud_log_item *cudp,
45 enum xfs_refcount_intent_type type,
46 xfs_fsblock_t startblock,
47 xfs_extlen_t blockcount,
48 xfs_fsblock_t *new_fsb,
49 xfs_extlen_t *new_len,
50 struct xfs_btree_cur **pcur)
51{
52 int error;
53
54 error = xfs_refcount_finish_one(tp, type, startblock,
55 blockcount, new_fsb, new_len, pcur);
56
57 /*
58 * Mark the transaction dirty, even on error. This ensures the
59 * transaction is aborted, which:
60 *
61 * 1.) releases the CUI and frees the CUD
62 * 2.) shuts down the filesystem
63 */
64 tp->t_flags |= XFS_TRANS_DIRTY;
65 set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags);
66
67 return error;
68}
69
70/* Sort refcount intents by AG. */
71static int
72xfs_refcount_update_diff_items(
73 void *priv,
74 struct list_head *a,
75 struct list_head *b)
76{
77 struct xfs_mount *mp = priv;
78 struct xfs_refcount_intent *ra;
79 struct xfs_refcount_intent *rb;
80
81 ra = container_of(a, struct xfs_refcount_intent, ri_list);
82 rb = container_of(b, struct xfs_refcount_intent, ri_list);
83 return XFS_FSB_TO_AGNO(mp, ra->ri_startblock) -
84 XFS_FSB_TO_AGNO(mp, rb->ri_startblock);
85}
86
87/* Get an CUI. */
88STATIC void *
89xfs_refcount_update_create_intent(
90 struct xfs_trans *tp,
91 unsigned int count)
92{
93 struct xfs_cui_log_item *cuip;
94
95 ASSERT(tp != NULL);
96 ASSERT(count > 0);
97
98 cuip = xfs_cui_init(tp->t_mountp, count);
99 ASSERT(cuip != NULL);
100
101 /*
102 * Get a log_item_desc to point at the new item.
103 */
104 xfs_trans_add_item(tp, &cuip->cui_item);
105 return cuip;
106}
107
108/* Set the phys extent flags for this reverse mapping. */
109static void
110xfs_trans_set_refcount_flags(
111 struct xfs_phys_extent *refc,
112 enum xfs_refcount_intent_type type)
113{
114 refc->pe_flags = 0;
115 switch (type) {
116 case XFS_REFCOUNT_INCREASE:
117 case XFS_REFCOUNT_DECREASE:
118 case XFS_REFCOUNT_ALLOC_COW:
119 case XFS_REFCOUNT_FREE_COW:
120 refc->pe_flags |= type;
121 break;
122 default:
123 ASSERT(0);
124 }
125}
126
127/* Log refcount updates in the intent item. */
128STATIC void
129xfs_refcount_update_log_item(
130 struct xfs_trans *tp,
131 void *intent,
132 struct list_head *item)
133{
134 struct xfs_cui_log_item *cuip = intent;
135 struct xfs_refcount_intent *refc;
136 uint next_extent;
137 struct xfs_phys_extent *ext;
138
139 refc = container_of(item, struct xfs_refcount_intent, ri_list);
140
141 tp->t_flags |= XFS_TRANS_DIRTY;
142 set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags);
143
144 /*
145 * atomic_inc_return gives us the value after the increment;
146 * we want to use it as an array index so we need to subtract 1 from
147 * it.
148 */
149 next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1;
150 ASSERT(next_extent < cuip->cui_format.cui_nextents);
151 ext = &cuip->cui_format.cui_extents[next_extent];
152 ext->pe_startblock = refc->ri_startblock;
153 ext->pe_len = refc->ri_blockcount;
154 xfs_trans_set_refcount_flags(ext, refc->ri_type);
155}
156
157/* Get an CUD so we can process all the deferred refcount updates. */
158STATIC void *
159xfs_refcount_update_create_done(
160 struct xfs_trans *tp,
161 void *intent,
162 unsigned int count)
163{
164 return xfs_trans_get_cud(tp, intent);
165}
166
167/* Process a deferred refcount update. */
168STATIC int
169xfs_refcount_update_finish_item(
170 struct xfs_trans *tp,
171 struct list_head *item,
172 void *done_item,
173 void **state)
174{
175 struct xfs_refcount_intent *refc;
176 xfs_fsblock_t new_fsb;
177 xfs_extlen_t new_aglen;
178 int error;
179
180 refc = container_of(item, struct xfs_refcount_intent, ri_list);
181 error = xfs_trans_log_finish_refcount_update(tp, done_item,
182 refc->ri_type,
183 refc->ri_startblock,
184 refc->ri_blockcount,
185 &new_fsb, &new_aglen,
186 (struct xfs_btree_cur **)state);
187 /* Did we run out of reservation? Requeue what we didn't finish. */
188 if (!error && new_aglen > 0) {
189 ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE ||
190 refc->ri_type == XFS_REFCOUNT_DECREASE);
191 refc->ri_startblock = new_fsb;
192 refc->ri_blockcount = new_aglen;
193 return -EAGAIN;
194 }
195 kmem_free(refc);
196 return error;
197}
198
199/* Clean up after processing deferred refcounts. */
200STATIC void
201xfs_refcount_update_finish_cleanup(
202 struct xfs_trans *tp,
203 void *state,
204 int error)
205{
206 struct xfs_btree_cur *rcur = state;
207
208 xfs_refcount_finish_one_cleanup(tp, rcur, error);
209}
210
211/* Abort all pending CUIs. */
212STATIC void
213xfs_refcount_update_abort_intent(
214 void *intent)
215{
216 xfs_cui_release(intent);
217}
218
219/* Cancel a deferred refcount update. */
220STATIC void
221xfs_refcount_update_cancel_item(
222 struct list_head *item)
223{
224 struct xfs_refcount_intent *refc;
225
226 refc = container_of(item, struct xfs_refcount_intent, ri_list);
227 kmem_free(refc);
228}
229
230const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
231 .max_items = XFS_CUI_MAX_FAST_EXTENTS,
232 .diff_items = xfs_refcount_update_diff_items,
233 .create_intent = xfs_refcount_update_create_intent,
234 .abort_intent = xfs_refcount_update_abort_intent,
235 .log_item = xfs_refcount_update_log_item,
236 .create_done = xfs_refcount_update_create_done,
237 .finish_item = xfs_refcount_update_finish_item,
238 .finish_cleanup = xfs_refcount_update_finish_cleanup,
239 .cancel_item = xfs_refcount_update_cancel_item,
240};
diff --git a/fs/xfs/xfs_trans_rmap.c b/fs/xfs/xfs_trans_rmap.c
deleted file mode 100644
index 5c7936b1be13..000000000000
--- a/fs/xfs/xfs_trans_rmap.c
+++ /dev/null
@@ -1,257 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * Copyright (C) 2016 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 */
6#include "xfs.h"
7#include "xfs_fs.h"
8#include "xfs_shared.h"
9#include "xfs_format.h"
10#include "xfs_log_format.h"
11#include "xfs_trans_resv.h"
12#include "xfs_mount.h"
13#include "xfs_defer.h"
14#include "xfs_trans.h"
15#include "xfs_trans_priv.h"
16#include "xfs_rmap_item.h"
17#include "xfs_alloc.h"
18#include "xfs_rmap.h"
19
20/* Set the map extent flags for this reverse mapping. */
21static void
22xfs_trans_set_rmap_flags(
23 struct xfs_map_extent *rmap,
24 enum xfs_rmap_intent_type type,
25 int whichfork,
26 xfs_exntst_t state)
27{
28 rmap->me_flags = 0;
29 if (state == XFS_EXT_UNWRITTEN)
30 rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN;
31 if (whichfork == XFS_ATTR_FORK)
32 rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK;
33 switch (type) {
34 case XFS_RMAP_MAP:
35 rmap->me_flags |= XFS_RMAP_EXTENT_MAP;
36 break;
37 case XFS_RMAP_MAP_SHARED:
38 rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED;
39 break;
40 case XFS_RMAP_UNMAP:
41 rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP;
42 break;
43 case XFS_RMAP_UNMAP_SHARED:
44 rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED;
45 break;
46 case XFS_RMAP_CONVERT:
47 rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT;
48 break;
49 case XFS_RMAP_CONVERT_SHARED:
50 rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED;
51 break;
52 case XFS_RMAP_ALLOC:
53 rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC;
54 break;
55 case XFS_RMAP_FREE:
56 rmap->me_flags |= XFS_RMAP_EXTENT_FREE;
57 break;
58 default:
59 ASSERT(0);
60 }
61}
62
63struct xfs_rud_log_item *
64xfs_trans_get_rud(
65 struct xfs_trans *tp,
66 struct xfs_rui_log_item *ruip)
67{
68 struct xfs_rud_log_item *rudp;
69
70 rudp = xfs_rud_init(tp->t_mountp, ruip);
71 xfs_trans_add_item(tp, &rudp->rud_item);
72 return rudp;
73}
74
75/*
76 * Finish an rmap update and log it to the RUD. Note that the transaction is
77 * marked dirty regardless of whether the rmap update succeeds or fails to
78 * support the RUI/RUD lifecycle rules.
79 */
80int
81xfs_trans_log_finish_rmap_update(
82 struct xfs_trans *tp,
83 struct xfs_rud_log_item *rudp,
84 enum xfs_rmap_intent_type type,
85 uint64_t owner,
86 int whichfork,
87 xfs_fileoff_t startoff,
88 xfs_fsblock_t startblock,
89 xfs_filblks_t blockcount,
90 xfs_exntst_t state,
91 struct xfs_btree_cur **pcur)
92{
93 int error;
94
95 error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff,
96 startblock, blockcount, state, pcur);
97
98 /*
99 * Mark the transaction dirty, even on error. This ensures the
100 * transaction is aborted, which:
101 *
102 * 1.) releases the RUI and frees the RUD
103 * 2.) shuts down the filesystem
104 */
105 tp->t_flags |= XFS_TRANS_DIRTY;
106 set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags);
107
108 return error;
109}
110
111/* Sort rmap intents by AG. */
112static int
113xfs_rmap_update_diff_items(
114 void *priv,
115 struct list_head *a,
116 struct list_head *b)
117{
118 struct xfs_mount *mp = priv;
119 struct xfs_rmap_intent *ra;
120 struct xfs_rmap_intent *rb;
121
122 ra = container_of(a, struct xfs_rmap_intent, ri_list);
123 rb = container_of(b, struct xfs_rmap_intent, ri_list);
124 return XFS_FSB_TO_AGNO(mp, ra->ri_bmap.br_startblock) -
125 XFS_FSB_TO_AGNO(mp, rb->ri_bmap.br_startblock);
126}
127
128/* Get an RUI. */
129STATIC void *
130xfs_rmap_update_create_intent(
131 struct xfs_trans *tp,
132 unsigned int count)
133{
134 struct xfs_rui_log_item *ruip;
135
136 ASSERT(tp != NULL);
137 ASSERT(count > 0);
138
139 ruip = xfs_rui_init(tp->t_mountp, count);
140 ASSERT(ruip != NULL);
141
142 /*
143 * Get a log_item_desc to point at the new item.
144 */
145 xfs_trans_add_item(tp, &ruip->rui_item);
146 return ruip;
147}
148
149/* Log rmap updates in the intent item. */
150STATIC void
151xfs_rmap_update_log_item(
152 struct xfs_trans *tp,
153 void *intent,
154 struct list_head *item)
155{
156 struct xfs_rui_log_item *ruip = intent;
157 struct xfs_rmap_intent *rmap;
158 uint next_extent;
159 struct xfs_map_extent *map;
160
161 rmap = container_of(item, struct xfs_rmap_intent, ri_list);
162
163 tp->t_flags |= XFS_TRANS_DIRTY;
164 set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags);
165
166 /*
167 * atomic_inc_return gives us the value after the increment;
168 * we want to use it as an array index so we need to subtract 1 from
169 * it.
170 */
171 next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1;
172 ASSERT(next_extent < ruip->rui_format.rui_nextents);
173 map = &ruip->rui_format.rui_extents[next_extent];
174 map->me_owner = rmap->ri_owner;
175 map->me_startblock = rmap->ri_bmap.br_startblock;
176 map->me_startoff = rmap->ri_bmap.br_startoff;
177 map->me_len = rmap->ri_bmap.br_blockcount;
178 xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork,
179 rmap->ri_bmap.br_state);
180}
181
182/* Get an RUD so we can process all the deferred rmap updates. */
183STATIC void *
184xfs_rmap_update_create_done(
185 struct xfs_trans *tp,
186 void *intent,
187 unsigned int count)
188{
189 return xfs_trans_get_rud(tp, intent);
190}
191
192/* Process a deferred rmap update. */
193STATIC int
194xfs_rmap_update_finish_item(
195 struct xfs_trans *tp,
196 struct list_head *item,
197 void *done_item,
198 void **state)
199{
200 struct xfs_rmap_intent *rmap;
201 int error;
202
203 rmap = container_of(item, struct xfs_rmap_intent, ri_list);
204 error = xfs_trans_log_finish_rmap_update(tp, done_item,
205 rmap->ri_type,
206 rmap->ri_owner, rmap->ri_whichfork,
207 rmap->ri_bmap.br_startoff,
208 rmap->ri_bmap.br_startblock,
209 rmap->ri_bmap.br_blockcount,
210 rmap->ri_bmap.br_state,
211 (struct xfs_btree_cur **)state);
212 kmem_free(rmap);
213 return error;
214}
215
216/* Clean up after processing deferred rmaps. */
217STATIC void
218xfs_rmap_update_finish_cleanup(
219 struct xfs_trans *tp,
220 void *state,
221 int error)
222{
223 struct xfs_btree_cur *rcur = state;
224
225 xfs_rmap_finish_one_cleanup(tp, rcur, error);
226}
227
228/* Abort all pending RUIs. */
229STATIC void
230xfs_rmap_update_abort_intent(
231 void *intent)
232{
233 xfs_rui_release(intent);
234}
235
236/* Cancel a deferred rmap update. */
237STATIC void
238xfs_rmap_update_cancel_item(
239 struct list_head *item)
240{
241 struct xfs_rmap_intent *rmap;
242
243 rmap = container_of(item, struct xfs_rmap_intent, ri_list);
244 kmem_free(rmap);
245}
246
247const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
248 .max_items = XFS_RUI_MAX_FAST_EXTENTS,
249 .diff_items = xfs_rmap_update_diff_items,
250 .create_intent = xfs_rmap_update_create_intent,
251 .abort_intent = xfs_rmap_update_abort_intent,
252 .log_item = xfs_rmap_update_log_item,
253 .create_done = xfs_rmap_update_create_done,
254 .finish_item = xfs_rmap_update_finish_item,
255 .finish_cleanup = xfs_rmap_update_finish_cleanup,
256 .cancel_item = xfs_rmap_update_cancel_item,
257};
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 9a63016009a1..3123b5aaad2a 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -5,15 +5,12 @@
5 */ 5 */
6 6
7#include "xfs.h" 7#include "xfs.h"
8#include "xfs_shared.h"
8#include "xfs_format.h" 9#include "xfs_format.h"
9#include "xfs_log_format.h" 10#include "xfs_log_format.h"
10#include "xfs_trans_resv.h"
11#include "xfs_mount.h"
12#include "xfs_da_format.h" 11#include "xfs_da_format.h"
13#include "xfs_inode.h" 12#include "xfs_inode.h"
14#include "xfs_attr.h" 13#include "xfs_attr.h"
15#include "xfs_attr_leaf.h"
16#include "xfs_acl.h"
17 14
18#include <linux/posix_acl_xattr.h> 15#include <linux/posix_acl_xattr.h>
19#include <linux/xattr.h> 16#include <linux/xattr.h>