diff options
author | Christoph Hellwig <hch@lst.de> | 2010-06-23 04:11:11 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2010-06-23 04:11:11 -0400 |
commit | 7dce11dbac54fce777eea0f5fb25b2694ccd7900 (patch) | |
tree | 90d6d109ca110bcbf47a954a8283ec250be07003 /fs/xfs/xfs_itable.c | |
parent | 1817176a86352f65210139d4c794ad2d19fc6b63 (diff) |
xfs: always use iget in bulkstat
The non-coherent bulkstat versionsthat look directly at the inode
buffers causes various problems with performance optimizations that
make increased use of just logging inodes. This patch makes bulkstat
always use iget, which should be fast enough for normal use with the
radix-tree based inode cache introduced a while ago.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Diffstat (limited to 'fs/xfs/xfs_itable.c')
-rw-r--r-- | fs/xfs/xfs_itable.c | 281 |
1 files changed, 40 insertions, 241 deletions
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index b1b801e4a28e..83d7827793e4 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -49,24 +49,41 @@ xfs_internal_inum( | |||
49 | (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); | 49 | (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))); |
50 | } | 50 | } |
51 | 51 | ||
52 | STATIC int | 52 | /* |
53 | xfs_bulkstat_one_iget( | 53 | * Return stat information for one inode. |
54 | xfs_mount_t *mp, /* mount point for filesystem */ | 54 | * Return 0 if ok, else errno. |
55 | xfs_ino_t ino, /* inode number to get data for */ | 55 | */ |
56 | xfs_daddr_t bno, /* starting bno of inode cluster */ | 56 | int |
57 | xfs_bstat_t *buf, /* return buffer */ | 57 | xfs_bulkstat_one_int( |
58 | int *stat) /* BULKSTAT_RV_... */ | 58 | struct xfs_mount *mp, /* mount point for filesystem */ |
59 | xfs_ino_t ino, /* inode to get data for */ | ||
60 | void __user *buffer, /* buffer to place output in */ | ||
61 | int ubsize, /* size of buffer */ | ||
62 | bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ | ||
63 | xfs_daddr_t bno, /* starting bno of cluster */ | ||
64 | int *ubused, /* bytes used by me */ | ||
65 | int *stat) /* BULKSTAT_RV_... */ | ||
59 | { | 66 | { |
60 | xfs_icdinode_t *dic; /* dinode core info pointer */ | 67 | struct xfs_icdinode *dic; /* dinode core info pointer */ |
61 | xfs_inode_t *ip; /* incore inode pointer */ | 68 | struct xfs_inode *ip; /* incore inode pointer */ |
62 | struct inode *inode; | 69 | struct inode *inode; |
63 | int error; | 70 | struct xfs_bstat *buf; /* return buffer */ |
71 | int error = 0; /* error value */ | ||
72 | |||
73 | *stat = BULKSTAT_RV_NOTHING; | ||
74 | |||
75 | if (!buffer || xfs_internal_inum(mp, ino)) | ||
76 | return XFS_ERROR(EINVAL); | ||
77 | |||
78 | buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL); | ||
79 | if (!buf) | ||
80 | return XFS_ERROR(ENOMEM); | ||
64 | 81 | ||
65 | error = xfs_iget(mp, NULL, ino, | 82 | error = xfs_iget(mp, NULL, ino, |
66 | XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); | 83 | XFS_IGET_BULKSTAT, XFS_ILOCK_SHARED, &ip, bno); |
67 | if (error) { | 84 | if (error) { |
68 | *stat = BULKSTAT_RV_NOTHING; | 85 | *stat = BULKSTAT_RV_NOTHING; |
69 | return error; | 86 | goto out_free; |
70 | } | 87 | } |
71 | 88 | ||
72 | ASSERT(ip != NULL); | 89 | ASSERT(ip != NULL); |
@@ -127,77 +144,16 @@ xfs_bulkstat_one_iget( | |||
127 | buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; | 144 | buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks; |
128 | break; | 145 | break; |
129 | } | 146 | } |
130 | |||
131 | xfs_iput(ip, XFS_ILOCK_SHARED); | 147 | xfs_iput(ip, XFS_ILOCK_SHARED); |
132 | return error; | ||
133 | } | ||
134 | 148 | ||
135 | STATIC void | 149 | error = formatter(buffer, ubsize, ubused, buf); |
136 | xfs_bulkstat_one_dinode( | ||
137 | xfs_mount_t *mp, /* mount point for filesystem */ | ||
138 | xfs_ino_t ino, /* inode number to get data for */ | ||
139 | xfs_dinode_t *dic, /* dinode inode pointer */ | ||
140 | xfs_bstat_t *buf) /* return buffer */ | ||
141 | { | ||
142 | /* | ||
143 | * The inode format changed when we moved the link count and | ||
144 | * made it 32 bits long. If this is an old format inode, | ||
145 | * convert it in memory to look like a new one. If it gets | ||
146 | * flushed to disk we will convert back before flushing or | ||
147 | * logging it. We zero out the new projid field and the old link | ||
148 | * count field. We'll handle clearing the pad field (the remains | ||
149 | * of the old uuid field) when we actually convert the inode to | ||
150 | * the new format. We don't change the version number so that we | ||
151 | * can distinguish this from a real new format inode. | ||
152 | */ | ||
153 | if (dic->di_version == 1) { | ||
154 | buf->bs_nlink = be16_to_cpu(dic->di_onlink); | ||
155 | buf->bs_projid = 0; | ||
156 | } else { | ||
157 | buf->bs_nlink = be32_to_cpu(dic->di_nlink); | ||
158 | buf->bs_projid = be16_to_cpu(dic->di_projid); | ||
159 | } | ||
160 | 150 | ||
161 | buf->bs_ino = ino; | 151 | if (!error) |
162 | buf->bs_mode = be16_to_cpu(dic->di_mode); | 152 | *stat = BULKSTAT_RV_DIDONE; |
163 | buf->bs_uid = be32_to_cpu(dic->di_uid); | ||
164 | buf->bs_gid = be32_to_cpu(dic->di_gid); | ||
165 | buf->bs_size = be64_to_cpu(dic->di_size); | ||
166 | buf->bs_atime.tv_sec = be32_to_cpu(dic->di_atime.t_sec); | ||
167 | buf->bs_atime.tv_nsec = be32_to_cpu(dic->di_atime.t_nsec); | ||
168 | buf->bs_mtime.tv_sec = be32_to_cpu(dic->di_mtime.t_sec); | ||
169 | buf->bs_mtime.tv_nsec = be32_to_cpu(dic->di_mtime.t_nsec); | ||
170 | buf->bs_ctime.tv_sec = be32_to_cpu(dic->di_ctime.t_sec); | ||
171 | buf->bs_ctime.tv_nsec = be32_to_cpu(dic->di_ctime.t_nsec); | ||
172 | buf->bs_xflags = xfs_dic2xflags(dic); | ||
173 | buf->bs_extsize = be32_to_cpu(dic->di_extsize) << mp->m_sb.sb_blocklog; | ||
174 | buf->bs_extents = be32_to_cpu(dic->di_nextents); | ||
175 | buf->bs_gen = be32_to_cpu(dic->di_gen); | ||
176 | memset(buf->bs_pad, 0, sizeof(buf->bs_pad)); | ||
177 | buf->bs_dmevmask = be32_to_cpu(dic->di_dmevmask); | ||
178 | buf->bs_dmstate = be16_to_cpu(dic->di_dmstate); | ||
179 | buf->bs_aextents = be16_to_cpu(dic->di_anextents); | ||
180 | buf->bs_forkoff = XFS_DFORK_BOFF(dic); | ||
181 | 153 | ||
182 | switch (dic->di_format) { | 154 | out_free: |
183 | case XFS_DINODE_FMT_DEV: | 155 | kmem_free(buf); |
184 | buf->bs_rdev = xfs_dinode_get_rdev(dic); | 156 | return error; |
185 | buf->bs_blksize = BLKDEV_IOSIZE; | ||
186 | buf->bs_blocks = 0; | ||
187 | break; | ||
188 | case XFS_DINODE_FMT_LOCAL: | ||
189 | case XFS_DINODE_FMT_UUID: | ||
190 | buf->bs_rdev = 0; | ||
191 | buf->bs_blksize = mp->m_sb.sb_blocksize; | ||
192 | buf->bs_blocks = 0; | ||
193 | break; | ||
194 | case XFS_DINODE_FMT_EXTENTS: | ||
195 | case XFS_DINODE_FMT_BTREE: | ||
196 | buf->bs_rdev = 0; | ||
197 | buf->bs_blksize = mp->m_sb.sb_blocksize; | ||
198 | buf->bs_blocks = be64_to_cpu(dic->di_nblocks); | ||
199 | break; | ||
200 | } | ||
201 | } | 157 | } |
202 | 158 | ||
203 | /* Return 0 on success or positive error */ | 159 | /* Return 0 on success or positive error */ |
@@ -217,118 +173,19 @@ xfs_bulkstat_one_fmt( | |||
217 | return 0; | 173 | return 0; |
218 | } | 174 | } |
219 | 175 | ||
220 | /* | ||
221 | * Return stat information for one inode. | ||
222 | * Return 0 if ok, else errno. | ||
223 | */ | ||
224 | int /* error status */ | ||
225 | xfs_bulkstat_one_int( | ||
226 | xfs_mount_t *mp, /* mount point for filesystem */ | ||
227 | xfs_ino_t ino, /* inode number to get data for */ | ||
228 | void __user *buffer, /* buffer to place output in */ | ||
229 | int ubsize, /* size of buffer */ | ||
230 | bulkstat_one_fmt_pf formatter, /* formatter, copy to user */ | ||
231 | xfs_daddr_t bno, /* starting bno of inode cluster */ | ||
232 | int *ubused, /* bytes used by me */ | ||
233 | void *dibuff, /* on-disk inode buffer */ | ||
234 | int *stat) /* BULKSTAT_RV_... */ | ||
235 | { | ||
236 | xfs_bstat_t *buf; /* return buffer */ | ||
237 | int error = 0; /* error value */ | ||
238 | xfs_dinode_t *dip; /* dinode inode pointer */ | ||
239 | |||
240 | dip = (xfs_dinode_t *)dibuff; | ||
241 | *stat = BULKSTAT_RV_NOTHING; | ||
242 | |||
243 | if (!buffer || xfs_internal_inum(mp, ino)) | ||
244 | return XFS_ERROR(EINVAL); | ||
245 | |||
246 | buf = kmem_alloc(sizeof(*buf), KM_SLEEP); | ||
247 | |||
248 | if (dip == NULL) { | ||
249 | /* We're not being passed a pointer to a dinode. This happens | ||
250 | * if BULKSTAT_FG_IGET is selected. Do the iget. | ||
251 | */ | ||
252 | error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat); | ||
253 | if (error) | ||
254 | goto out_free; | ||
255 | } else { | ||
256 | xfs_bulkstat_one_dinode(mp, ino, dip, buf); | ||
257 | } | ||
258 | |||
259 | error = formatter(buffer, ubsize, ubused, buf); | ||
260 | if (error) | ||
261 | goto out_free; | ||
262 | |||
263 | *stat = BULKSTAT_RV_DIDONE; | ||
264 | |||
265 | out_free: | ||
266 | kmem_free(buf); | ||
267 | return error; | ||
268 | } | ||
269 | |||
270 | int | 176 | int |
271 | xfs_bulkstat_one( | 177 | xfs_bulkstat_one( |
272 | xfs_mount_t *mp, /* mount point for filesystem */ | 178 | xfs_mount_t *mp, /* mount point for filesystem */ |
273 | xfs_ino_t ino, /* inode number to get data for */ | 179 | xfs_ino_t ino, /* inode number to get data for */ |
274 | void __user *buffer, /* buffer to place output in */ | 180 | void __user *buffer, /* buffer to place output in */ |
275 | int ubsize, /* size of buffer */ | 181 | int ubsize, /* size of buffer */ |
276 | void *private_data, /* my private data */ | ||
277 | xfs_daddr_t bno, /* starting bno of inode cluster */ | 182 | xfs_daddr_t bno, /* starting bno of inode cluster */ |
278 | int *ubused, /* bytes used by me */ | 183 | int *ubused, /* bytes used by me */ |
279 | void *dibuff, /* on-disk inode buffer */ | ||
280 | int *stat) /* BULKSTAT_RV_... */ | 184 | int *stat) /* BULKSTAT_RV_... */ |
281 | { | 185 | { |
282 | return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, | 186 | return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, |
283 | xfs_bulkstat_one_fmt, bno, | 187 | xfs_bulkstat_one_fmt, bno, |
284 | ubused, dibuff, stat); | 188 | ubused, stat); |
285 | } | ||
286 | |||
287 | /* | ||
288 | * Test to see whether we can use the ondisk inode directly, based | ||
289 | * on the given bulkstat flags, filling in dipp accordingly. | ||
290 | * Returns zero if the inode is dodgey. | ||
291 | */ | ||
292 | STATIC int | ||
293 | xfs_bulkstat_use_dinode( | ||
294 | xfs_mount_t *mp, | ||
295 | int flags, | ||
296 | xfs_buf_t *bp, | ||
297 | int clustidx, | ||
298 | xfs_dinode_t **dipp) | ||
299 | { | ||
300 | xfs_dinode_t *dip; | ||
301 | unsigned int aformat; | ||
302 | |||
303 | *dipp = NULL; | ||
304 | if (!bp || (flags & BULKSTAT_FG_IGET)) | ||
305 | return 1; | ||
306 | dip = (xfs_dinode_t *) | ||
307 | xfs_buf_offset(bp, clustidx << mp->m_sb.sb_inodelog); | ||
308 | /* | ||
309 | * Check the buffer containing the on-disk inode for di_mode == 0. | ||
310 | * This is to prevent xfs_bulkstat from picking up just reclaimed | ||
311 | * inodes that have their in-core state initialized but not flushed | ||
312 | * to disk yet. This is a temporary hack that would require a proper | ||
313 | * fix in the future. | ||
314 | */ | ||
315 | if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC || | ||
316 | !XFS_DINODE_GOOD_VERSION(dip->di_version) || | ||
317 | !dip->di_mode) | ||
318 | return 0; | ||
319 | if (flags & BULKSTAT_FG_QUICK) { | ||
320 | *dipp = dip; | ||
321 | return 1; | ||
322 | } | ||
323 | /* BULKSTAT_FG_INLINE: if attr fork is local, or not there, use it */ | ||
324 | aformat = dip->di_aformat; | ||
325 | if ((XFS_DFORK_Q(dip) == 0) || | ||
326 | (aformat == XFS_DINODE_FMT_LOCAL) || | ||
327 | (aformat == XFS_DINODE_FMT_EXTENTS && !dip->di_anextents)) { | ||
328 | *dipp = dip; | ||
329 | return 1; | ||
330 | } | ||
331 | return 1; | ||
332 | } | 189 | } |
333 | 190 | ||
334 | #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) | 191 | #define XFS_BULKSTAT_UBLEFT(ubleft) ((ubleft) >= statstruct_size) |
@@ -342,10 +199,8 @@ xfs_bulkstat( | |||
342 | xfs_ino_t *lastinop, /* last inode returned */ | 199 | xfs_ino_t *lastinop, /* last inode returned */ |
343 | int *ubcountp, /* size of buffer/count returned */ | 200 | int *ubcountp, /* size of buffer/count returned */ |
344 | bulkstat_one_pf formatter, /* func that'd fill a single buf */ | 201 | bulkstat_one_pf formatter, /* func that'd fill a single buf */ |
345 | void *private_data,/* private data for formatter */ | ||
346 | size_t statstruct_size, /* sizeof struct filling */ | 202 | size_t statstruct_size, /* sizeof struct filling */ |
347 | char __user *ubuffer, /* buffer with inode stats */ | 203 | char __user *ubuffer, /* buffer with inode stats */ |
348 | int flags, /* defined in xfs_itable.h */ | ||
349 | int *done) /* 1 if there are more stats to get */ | 204 | int *done) /* 1 if there are more stats to get */ |
350 | { | 205 | { |
351 | xfs_agblock_t agbno=0;/* allocation group block number */ | 206 | xfs_agblock_t agbno=0;/* allocation group block number */ |
@@ -380,14 +235,12 @@ xfs_bulkstat( | |||
380 | int ubelem; /* spaces used in user's buffer */ | 235 | int ubelem; /* spaces used in user's buffer */ |
381 | int ubused; /* bytes used by formatter */ | 236 | int ubused; /* bytes used by formatter */ |
382 | xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ | 237 | xfs_buf_t *bp; /* ptr to on-disk inode cluster buf */ |
383 | xfs_dinode_t *dip; /* ptr into bp for specific inode */ | ||
384 | 238 | ||
385 | /* | 239 | /* |
386 | * Get the last inode value, see if there's nothing to do. | 240 | * Get the last inode value, see if there's nothing to do. |
387 | */ | 241 | */ |
388 | ino = (xfs_ino_t)*lastinop; | 242 | ino = (xfs_ino_t)*lastinop; |
389 | lastino = ino; | 243 | lastino = ino; |
390 | dip = NULL; | ||
391 | agno = XFS_INO_TO_AGNO(mp, ino); | 244 | agno = XFS_INO_TO_AGNO(mp, ino); |
392 | agino = XFS_INO_TO_AGINO(mp, ino); | 245 | agino = XFS_INO_TO_AGINO(mp, ino); |
393 | if (agno >= mp->m_sb.sb_agcount || | 246 | if (agno >= mp->m_sb.sb_agcount || |
@@ -612,37 +465,6 @@ xfs_bulkstat( | |||
612 | irbp->ir_startino) + | 465 | irbp->ir_startino) + |
613 | ((chunkidx & nimask) >> | 466 | ((chunkidx & nimask) >> |
614 | mp->m_sb.sb_inopblog); | 467 | mp->m_sb.sb_inopblog); |
615 | |||
616 | if (flags & (BULKSTAT_FG_QUICK | | ||
617 | BULKSTAT_FG_INLINE)) { | ||
618 | int offset; | ||
619 | |||
620 | ino = XFS_AGINO_TO_INO(mp, agno, | ||
621 | agino); | ||
622 | bno = XFS_AGB_TO_DADDR(mp, agno, | ||
623 | agbno); | ||
624 | |||
625 | /* | ||
626 | * Get the inode cluster buffer | ||
627 | */ | ||
628 | if (bp) | ||
629 | xfs_buf_relse(bp); | ||
630 | |||
631 | error = xfs_inotobp(mp, NULL, ino, &dip, | ||
632 | &bp, &offset, | ||
633 | XFS_IGET_BULKSTAT); | ||
634 | |||
635 | if (!error) | ||
636 | clustidx = offset / mp->m_sb.sb_inodesize; | ||
637 | if (XFS_TEST_ERROR(error != 0, | ||
638 | mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK, | ||
639 | XFS_RANDOM_BULKSTAT_READ_CHUNK)) { | ||
640 | bp = NULL; | ||
641 | ubleft = 0; | ||
642 | rval = error; | ||
643 | break; | ||
644 | } | ||
645 | } | ||
646 | } | 468 | } |
647 | ino = XFS_AGINO_TO_INO(mp, agno, agino); | 469 | ino = XFS_AGINO_TO_INO(mp, agno, agino); |
648 | bno = XFS_AGB_TO_DADDR(mp, agno, agbno); | 470 | bno = XFS_AGB_TO_DADDR(mp, agno, agbno); |
@@ -658,35 +480,13 @@ xfs_bulkstat( | |||
658 | * when the chunk is used up. | 480 | * when the chunk is used up. |
659 | */ | 481 | */ |
660 | irbp->ir_freecount++; | 482 | irbp->ir_freecount++; |
661 | if (!xfs_bulkstat_use_dinode(mp, flags, bp, | ||
662 | clustidx, &dip)) { | ||
663 | lastino = ino; | ||
664 | continue; | ||
665 | } | ||
666 | /* | ||
667 | * If we need to do an iget, cannot hold bp. | ||
668 | * Drop it, until starting the next cluster. | ||
669 | */ | ||
670 | if ((flags & BULKSTAT_FG_INLINE) && !dip) { | ||
671 | if (bp) | ||
672 | xfs_buf_relse(bp); | ||
673 | bp = NULL; | ||
674 | } | ||
675 | 483 | ||
676 | /* | 484 | /* |
677 | * Get the inode and fill in a single buffer. | 485 | * Get the inode and fill in a single buffer. |
678 | * BULKSTAT_FG_QUICK uses dip to fill it in. | ||
679 | * BULKSTAT_FG_IGET uses igets. | ||
680 | * BULKSTAT_FG_INLINE uses dip if we have an | ||
681 | * inline attr fork, else igets. | ||
682 | * See: xfs_bulkstat_one & xfs_dm_bulkstat_one. | ||
683 | * This is also used to count inodes/blks, etc | ||
684 | * in xfs_qm_quotacheck. | ||
685 | */ | 486 | */ |
686 | ubused = statstruct_size; | 487 | ubused = statstruct_size; |
687 | error = formatter(mp, ino, ubufp, | 488 | error = formatter(mp, ino, ubufp, ubleft, bno, |
688 | ubleft, private_data, | 489 | &ubused, &fmterror); |
689 | bno, &ubused, dip, &fmterror); | ||
690 | if (fmterror == BULKSTAT_RV_NOTHING) { | 490 | if (fmterror == BULKSTAT_RV_NOTHING) { |
691 | if (error && error != ENOENT && | 491 | if (error && error != ENOENT && |
692 | error != EINVAL) { | 492 | error != EINVAL) { |
@@ -779,7 +579,7 @@ xfs_bulkstat_single( | |||
779 | 579 | ||
780 | ino = (xfs_ino_t)*lastinop; | 580 | ino = (xfs_ino_t)*lastinop; |
781 | error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), | 581 | error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), |
782 | NULL, 0, NULL, NULL, &res); | 582 | 0, NULL, &res); |
783 | if (error) { | 583 | if (error) { |
784 | /* | 584 | /* |
785 | * Special case way failed, do it the "long" way | 585 | * Special case way failed, do it the "long" way |
@@ -788,8 +588,7 @@ xfs_bulkstat_single( | |||
788 | (*lastinop)--; | 588 | (*lastinop)--; |
789 | count = 1; | 589 | count = 1; |
790 | if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, | 590 | if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one, |
791 | NULL, sizeof(xfs_bstat_t), buffer, | 591 | sizeof(xfs_bstat_t), buffer, done)) |
792 | BULKSTAT_FG_IGET, done)) | ||
793 | return error; | 592 | return error; |
794 | if (count == 0 || (xfs_ino_t)*lastinop != ino) | 593 | if (count == 0 || (xfs_ino_t)*lastinop != ino) |
795 | return error == EFSCORRUPTED ? | 594 | return error == EFSCORRUPTED ? |