aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_itable.c
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2010-06-23 04:11:11 -0400
committerDave Chinner <david@fromorbit.com>2010-06-23 04:11:11 -0400
commit7dce11dbac54fce777eea0f5fb25b2694ccd7900 (patch)
tree90d6d109ca110bcbf47a954a8283ec250be07003 /fs/xfs/xfs_itable.c
parent1817176a86352f65210139d4c794ad2d19fc6b63 (diff)
xfs: always use iget in bulkstat
The non-coherent bulkstat versionsthat look directly at the inode buffers causes various problems with performance optimizations that make increased use of just logging inodes. This patch makes bulkstat always use iget, which should be fast enough for normal use with the radix-tree based inode cache introduced a while ago. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com>
Diffstat (limited to 'fs/xfs/xfs_itable.c')
-rw-r--r--fs/xfs/xfs_itable.c281
1 files changed, 40 insertions, 241 deletions
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index b1b801e4a28e..83d7827793e4 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -49,24 +49,41 @@ xfs_internal_inum(
49 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); 49 (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino)));
50} 50}
51 51
52STATIC int 52/*
53xfs_bulkstat_one_iget( 53 * Return stat information for one inode.
54 xfs_mount_t *mp, /* mount point for filesystem */ 54 * Return 0 if ok, else errno.
55 xfs_ino_t ino, /* inode number to get data for */ 55 */
56 xfs_daddr_t bno, /* starting bno of inode cluster */ 56int
57 xfs_bstat_t *buf, /* return buffer */ 57xfs_bulkstat_one_int(
58 int *stat) /* BULKSTAT_RV_... */ 58 struct xfs_mount *mp, /* mount point for filesystem */
59 xfs_ino_t ino, /* inode to get data for */
60 void __user *buffer, /* buffer to place output in */
61 int ubsize, /* size of buffer */
62 bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
63 xfs_daddr_t bno, /* starting bno of cluster */
64 int *ubused, /* bytes used by me */
65 int *stat) /* BULKSTAT_RV_... */
59{ 66{
60 xfs_icdinode_t *dic; /* dinode core info pointer */ 67 struct xfs_icdinode *dic; /* dinode core info pointer */
61 xfs_inode_t *ip; /* incore inode pointer */ 68 struct xfs_inode *ip; /* incore inode pointer */
62 struct inode *inode; 69 struct inode *inode;
63 int error; 70 struct xfs_bstat *buf; /* return buffer */
71 int error = 0; /* error value */
72
73 *stat = BULKSTAT_RV_NOTHING;
74
75 if (!buffer || xfs_internal_inum(mp, ino))
76 return XFS_ERROR(EINVAL);
77
78 buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
79 if (!buf)
80 return XFS_ERROR(ENOMEM);
64 81
65 error = xfs_iget(mp, NULL, ino, 82 error = xfs_iget(mp, NULL, ino,
66 XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); 83 XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno);
67 if (error) { 84 if (error) {
68 *stat = BULKSTAT_RV_NOTHING; 85 *stat = BULKSTAT_RV_NOTHING;
69 return error; 86 goto out_free;
70 } 87 }
71 88
72 ASSERT(ip != NULL); 89 ASSERT(ip != NULL);
@@ -127,77 +144,16 @@ xfs_bulkstat_one_iget(
127 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; 144 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
128 break; 145 break;
129 } 146 }
130
131 xfs_iput(ip, XFS_ILOCK_SHARED); 147 xfs_iput(ip, XFS_ILOCK_SHARED);
132 return error;
133}
134 148
135STATIC void 149 error = formatter(buffer, ubsize, ubused, buf);
136xfs_bulkstat_one_dinode(
137 xfs_mount_t *mp, /* mount point for filesystem */
138 xfs_ino_t ino, /* inode number to get data for */
139 xfs_dinode_t *dic, /* dinode inode pointer */
140 xfs_bstat_t *buf) /* return buffer */
141{
142 /*
143 * The inode format changed when we moved the link count and
144 * made it 32 bits long. If this is an old format inode,
145 * convert it in memory to look like a new one. If it gets
146 * flushed to disk we will convert back before flushing or
147 * logging it. We zero out the new projid field and the old link
148 * count field. We'll handle clearing the pad field (the remains
149 * of the old uuid field) when we actually convert the inode to
150 * the new format. We don't change the version number so that we
151 * can distinguish this from a real new format inode.
152 */
153 if (dic->di_version == 1) {
154 buf->bs_nlink = be16_to_cpu(dic->di_onlink);
155 buf->bs_projid = 0;
156 } else {
157 buf->bs_nlink = be32_to_cpu(dic->di_nlink);
158 buf->bs_projid = be16_to_cpu(dic->di_projid);
159 }
160 150
161 buf->bs_ino = ino; 151 if (!error)
162 buf->bs_mode = be16_to_cpu(dic->di_mode); 152 *stat = BULKSTAT_RV_DIDONE;
163 buf->bs_uid = be32_to_cpu(dic->di_uid);
164 buf->bs_gid = be32_to_cpu(dic->di_gid);
165 buf->bs_size = be64_to_cpu(dic->di_size);
166 buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec);
167 buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec);
168 buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec);
169 buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec);
170 buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec);
171 buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec);
172 buf->bs_xflags = xfs_dic2xflags(dic);
173 buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog;
174 buf->bs_extents = be32_to_cpu(dic->di_nextents);
175 buf->bs_gen = be32_to_cpu(dic->di_gen);
176 memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
177 buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask);
178 buf->bs_dmstate = be16_to_cpu(dic->di_dmstate);
179 buf->bs_aextents = be16_to_cpu(dic->di_anextents);
180 buf->bs_forkoff = XFS_DFORK_BOFF(dic);
181 153
182 switch (dic->di_format) { 154 out_free:
183 case XFS_DINODE_FMT_DEV: 155 kmem_free(buf);
184 buf->bs_rdev = xfs_dinode_get_rdev(dic); 156 return error;
185 buf->bs_blksize = BLKDEV_IOSIZE;
186 buf->bs_blocks = 0;
187 break;
188 case XFS_DINODE_FMT_LOCAL:
189 case XFS_DINODE_FMT_UUID:
190 buf->bs_rdev = 0;
191 buf->bs_blksize = mp->m_sb.sb_blocksize;
192 buf->bs_blocks = 0;
193 break;
194 case XFS_DINODE_FMT_EXTENTS:
195 case XFS_DINODE_FMT_BTREE:
196 buf->bs_rdev = 0;
197 buf->bs_blksize = mp->m_sb.sb_blocksize;
198 buf->bs_blocks = be64_to_cpu(dic->di_nblocks);
199 break;
200 }
201} 157}
202 158
203/* Return 0 on success or positive error */ 159/* Return 0 on success or positive error */
@@ -217,118 +173,19 @@ xfs_bulkstat_one_fmt(
217 return 0; 173 return 0;
218} 174}
219 175
220/*
221 * Return stat information for one inode.
222 * Return 0 if ok, else errno.
223 */
224int /* error status */
225xfs_bulkstat_one_int(
226 xfs_mount_t *mp, /* mount point for filesystem */
227 xfs_ino_t ino, /* inode number to get data for */
228 void __user *buffer, /* buffer to place output in */
229 int ubsize, /* size of buffer */
230 bulkstat_one_fmt_pf formatter, /* formatter, copy to user */
231 xfs_daddr_t bno, /* starting bno of inode cluster */
232 int *ubused, /* bytes used by me */
233 void *dibuff, /* on-disk inode buffer */
234 int *stat) /* BULKSTAT_RV_... */
235{
236 xfs_bstat_t *buf; /* return buffer */
237 int error = 0; /* error value */
238 xfs_dinode_t *dip; /* dinode inode pointer */
239
240 dip = (xfs_dinode_t *)dibuff;
241 *stat = BULKSTAT_RV_NOTHING;
242
243 if (!buffer || xfs_internal_inum(mp, ino))
244 return XFS_ERROR(EINVAL);
245
246 buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
247
248 if (dip == NULL) {
249 /* We're not being passed a pointer to a dinode. This happens
250 * if BULKSTAT_FG_IGET is selected. Do the iget.
251 */
252 error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat);
253 if (error)
254 goto out_free;
255 } else {
256 xfs_bulkstat_one_dinode(mp, ino, dip, buf);
257 }
258
259 error = formatter(buffer, ubsize, ubused, buf);
260 if (error)
261 goto out_free;
262
263 *stat = BULKSTAT_RV_DIDONE;
264
265 out_free:
266 kmem_free(buf);
267 return error;
268}
269
270int 176int
271xfs_bulkstat_one( 177xfs_bulkstat_one(
272 xfs_mount_t *mp, /* mount point for filesystem */ 178 xfs_mount_t *mp, /* mount point for filesystem */
273 xfs_ino_t ino, /* inode number to get data for */ 179 xfs_ino_t ino, /* inode number to get data for */
274 void __user *buffer, /* buffer to place output in */ 180 void __user *buffer, /* buffer to place output in */
275 int ubsize, /* size of buffer */ 181 int ubsize, /* size of buffer */
276 void *private_data, /* my private data */
277 xfs_daddr_t bno, /* starting bno of inode cluster */ 182 xfs_daddr_t bno, /* starting bno of inode cluster */
278 int *ubused, /* bytes used by me */ 183 int *ubused, /* bytes used by me */
279 void *dibuff, /* on-disk inode buffer */
280 int *stat) /* BULKSTAT_RV_... */ 184 int *stat) /* BULKSTAT_RV_... */
281{ 185{
282 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, 186 return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
283 xfs_bulkstat_one_fmt, bno, 187 xfs_bulkstat_one_fmt, bno,
284 ubused, dibuff, stat); 188 ubused, stat);
285}
286
287/*
288 * Test to see whether we can use the ondisk inode directly, based
289 * on the given bulkstat flags, filling in dipp accordingly.
290 * Returns zero if the inode is dodgey.
291 */
292STATIC int
293xfs_bulkstat_use_dinode(
294 xfs_mount_t *mp,
295 int flags,
296 xfs_buf_t *bp,
297 int clustidx,
298 xfs_dinode_t **dipp)
299{
300 xfs_dinode_t *dip;
301 unsigned int aformat;
302
303 *dipp = NULL;
304 if (!bp || (flags & BULKSTAT_FG_IGET))
305 return 1;
306 dip = (xfs_dinode_t *)
307 xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog);
308 /*
309 * Check the buffer containing the on-disk inode for di_mode == 0.
310 * This is to prevent xfs_bulkstat from picking up just reclaimed
311 * inodes that have their in-core state initialized but not flushed
312 * to disk yet. This is a temporary hack that would require a proper
313 * fix in the future.
314 */
315 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
316 !XFS_DINODE_GOOD_VERSION(dip->di_version) ||
317 !dip->di_mode)
318 return 0;
319 if (flags & BULKSTAT_FG_QUICK) {
320 *dipp = dip;
321 return 1;
322 }
323 /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */
324 aformat = dip->di_aformat;
325 if ((XFS_DFORK_Q(dip) == 0) ||
326 (aformat == XFS_DINODE_FMT_LOCAL) ||
327 (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) {
328 *dipp = dip;
329 return 1;
330 }
331 return 1;
332} 189}
333 190
334#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) 191#define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size)
@@ -342,10 +199,8 @@ xfs_bulkstat(
342 xfs_ino_t *lastinop, /* last inode returned */ 199 xfs_ino_t *lastinop, /* last inode returned */
343 int *ubcountp, /* size of buffer/count returned */ 200 int *ubcountp, /* size of buffer/count returned */
344 bulkstat_one_pf formatter, /* func that'd fill a single buf */ 201 bulkstat_one_pf formatter, /* func that'd fill a single buf */
345 void *private_data,/* private data for formatter */
346 size_t statstruct_size, /* sizeof struct filling */ 202 size_t statstruct_size, /* sizeof struct filling */
347 char __user *ubuffer, /* buffer with inode stats */ 203 char __user *ubuffer, /* buffer with inode stats */
348 int flags, /* defined in xfs_itable.h */
349 int *done) /* 1 if there are more stats to get */ 204 int *done) /* 1 if there are more stats to get */
350{ 205{
351 xfs_agblock_t agbno=0;/* allocation group block number */ 206 xfs_agblock_t agbno=0;/* allocation group block number */
@@ -380,14 +235,12 @@ xfs_bulkstat(
380 int ubelem; /* spaces used in user's buffer */ 235 int ubelem; /* spaces used in user's buffer */
381 int ubused; /* bytes used by formatter */ 236 int ubused; /* bytes used by formatter */
382 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ 237 xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */
383 xfs_dinode_t *dip; /* ptr into bp for specific inode */
384 238
385 /* 239 /*
386 * Get the last inode value, see if there's nothing to do. 240 * Get the last inode value, see if there's nothing to do.
387 */ 241 */
388 ino = (xfs_ino_t)*lastinop; 242 ino = (xfs_ino_t)*lastinop;
389 lastino = ino; 243 lastino = ino;
390 dip = NULL;
391 agno = XFS_INO_TO_AGNO(mp, ino); 244 agno = XFS_INO_TO_AGNO(mp, ino);
392 agino = XFS_INO_TO_AGINO(mp, ino); 245 agino = XFS_INO_TO_AGINO(mp, ino);
393 if (agno >= mp->m_sb.sb_agcount || 246 if (agno >= mp->m_sb.sb_agcount ||
@@ -612,37 +465,6 @@ xfs_bulkstat(
612 irbp->ir_startino) + 465 irbp->ir_startino) +
613 ((chunkidx & nimask) >> 466 ((chunkidx & nimask) >>
614 mp->m_sb.sb_inopblog); 467 mp->m_sb.sb_inopblog);
615
616 if (flags & (BULKSTAT_FG_QUICK |
617 BULKSTAT_FG_INLINE)) {
618 int offset;
619
620 ino = XFS_AGINO_TO_INO(mp, agno,
621 agino);
622 bno = XFS_AGB_TO_DADDR(mp, agno,
623 agbno);
624
625 /*
626 * Get the inode cluster buffer
627 */
628 if (bp)
629 xfs_buf_relse(bp);
630
631 error = xfs_inotobp(mp, NULL, ino, &dip,
632 &bp, &offset,
633 XFS_IGET_BULKSTAT);
634
635 if (!error)
636 clustidx = offset / mp->m_sb.sb_inodesize;
637 if (XFS_TEST_ERROR(error != 0,
638 mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
639 XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
640 bp = NULL;
641 ubleft = 0;
642 rval = error;
643 break;
644 }
645 }
646 } 468 }
647 ino = XFS_AGINO_TO_INO(mp, agno, agino); 469 ino = XFS_AGINO_TO_INO(mp, agno, agino);
648 bno = XFS_AGB_TO_DADDR(mp, agno, agbno); 470 bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
@@ -658,35 +480,13 @@ xfs_bulkstat(
658 * when the chunk is used up. 480 * when the chunk is used up.
659 */ 481 */
660 irbp->ir_freecount++; 482 irbp->ir_freecount++;
661 if (!xfs_bulkstat_use_dinode(mp, flags, bp,
662 clustidx, &dip)) {
663 lastino = ino;
664 continue;
665 }
666 /*
667 * If we need to do an iget, cannot hold bp.
668 * Drop it, until starting the next cluster.
669 */
670 if ((flags & BULKSTAT_FG_INLINE) && !dip) {
671 if (bp)
672 xfs_buf_relse(bp);
673 bp = NULL;
674 }
675 483
676 /* 484 /*
677 * Get the inode and fill in a single buffer. 485 * Get the inode and fill in a single buffer.
678 * BULKSTAT_FG_QUICK uses dip to fill it in.
679 * BULKSTAT_FG_IGET uses igets.
680 * BULKSTAT_FG_INLINE uses dip if we have an
681 * inline attr fork, else igets.
682 * See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
683 * This is also used to count inodes/blks, etc
684 * in xfs_qm_quotacheck.
685 */ 486 */
686 ubused = statstruct_size; 487 ubused = statstruct_size;
687 error = formatter(mp, ino, ubufp, 488 error = formatter(mp, ino, ubufp, ubleft, bno,
688 ubleft, private_data, 489 &ubused, &fmterror);
689 bno, &ubused, dip, &fmterror);
690 if (fmterror == BULKSTAT_RV_NOTHING) { 490 if (fmterror == BULKSTAT_RV_NOTHING) {
691 if (error && error != ENOENT && 491 if (error && error != ENOENT &&
692 error != EINVAL) { 492 error != EINVAL) {
@@ -779,7 +579,7 @@ xfs_bulkstat_single(
779 579
780 ino = (xfs_ino_t)*lastinop; 580 ino = (xfs_ino_t)*lastinop;
781 error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 581 error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t),
782 NULL, 0, NULL, NULL, &res); 582 0, NULL, &res);
783 if (error) { 583 if (error) {
784 /* 584 /*
785 * Special case way failed, do it the "long" way 585 * Special case way failed, do it the "long" way
@@ -788,8 +588,7 @@ xfs_bulkstat_single(
788 (*lastinop)--; 588 (*lastinop)--;
789 count = 1; 589 count = 1;
790 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, 590 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
791 NULL, sizeof(xfs_bstat_t), buffer, 591 sizeof(xfs_bstat_t), buffer, done))
792 BULKSTAT_FG_IGET, done))
793 return error; 592 return error;
794 if (count == 0 || (xfs_ino_t)*lastinop != ino) 593 if (count == 0 || (xfs_ino_t)*lastinop != ino)
795 return error == EFSCORRUPTED ? 594 return error == EFSCORRUPTED ?