diff options
author | David Woodhouse <dwmw2@infradead.org> | 2008-04-22 07:34:25 -0400 |
---|---|---|
committer | David Woodhouse <dwmw2@infradead.org> | 2008-04-22 07:34:25 -0400 |
commit | f838bad1b3be8ca0c785ee0e0c570dfda74cf377 (patch) | |
tree | 5a842a8056a708cfad55a20fa8ab733dd94b0903 /fs/xfs/xfs_inode.c | |
parent | dd919660aacdf4adfcd279556aa03e595f7f0fc2 (diff) | |
parent | 807501475fce0ebe68baedf87f202c3e4ee0d12c (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs/xfs/xfs_inode.c')
-rw-r--r-- | fs/xfs/xfs_inode.c | 829 |
1 files changed, 403 insertions, 426 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index a550546a7083..ca12acb90394 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -55,7 +55,6 @@ | |||
55 | 55 | ||
56 | kmem_zone_t *xfs_ifork_zone; | 56 | kmem_zone_t *xfs_ifork_zone; |
57 | kmem_zone_t *xfs_inode_zone; | 57 | kmem_zone_t *xfs_inode_zone; |
58 | kmem_zone_t *xfs_icluster_zone; | ||
59 | 58 | ||
60 | /* | 59 | /* |
61 | * Used in xfs_itruncate(). This is the maximum number of extents | 60 | * Used in xfs_itruncate(). This is the maximum number of extents |
@@ -126,6 +125,90 @@ xfs_inobp_check( | |||
126 | #endif | 125 | #endif |
127 | 126 | ||
128 | /* | 127 | /* |
128 | * Find the buffer associated with the given inode map | ||
129 | * We do basic validation checks on the buffer once it has been | ||
130 | * retrieved from disk. | ||
131 | */ | ||
132 | STATIC int | ||
133 | xfs_imap_to_bp( | ||
134 | xfs_mount_t *mp, | ||
135 | xfs_trans_t *tp, | ||
136 | xfs_imap_t *imap, | ||
137 | xfs_buf_t **bpp, | ||
138 | uint buf_flags, | ||
139 | uint imap_flags) | ||
140 | { | ||
141 | int error; | ||
142 | int i; | ||
143 | int ni; | ||
144 | xfs_buf_t *bp; | ||
145 | |||
146 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, | ||
147 | (int)imap->im_len, buf_flags, &bp); | ||
148 | if (error) { | ||
149 | if (error != EAGAIN) { | ||
150 | cmn_err(CE_WARN, | ||
151 | "xfs_imap_to_bp: xfs_trans_read_buf()returned " | ||
152 | "an error %d on %s. Returning error.", | ||
153 | error, mp->m_fsname); | ||
154 | } else { | ||
155 | ASSERT(buf_flags & XFS_BUF_TRYLOCK); | ||
156 | } | ||
157 | return error; | ||
158 | } | ||
159 | |||
160 | /* | ||
161 | * Validate the magic number and version of every inode in the buffer | ||
162 | * (if DEBUG kernel) or the first inode in the buffer, otherwise. | ||
163 | */ | ||
164 | #ifdef DEBUG | ||
165 | ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; | ||
166 | #else /* usual case */ | ||
167 | ni = 1; | ||
168 | #endif | ||
169 | |||
170 | for (i = 0; i < ni; i++) { | ||
171 | int di_ok; | ||
172 | xfs_dinode_t *dip; | ||
173 | |||
174 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | ||
175 | (i << mp->m_sb.sb_inodelog)); | ||
176 | di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && | ||
177 | XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); | ||
178 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, | ||
179 | XFS_ERRTAG_ITOBP_INOTOBP, | ||
180 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
181 | if (imap_flags & XFS_IMAP_BULKSTAT) { | ||
182 | xfs_trans_brelse(tp, bp); | ||
183 | return XFS_ERROR(EINVAL); | ||
184 | } | ||
185 | XFS_CORRUPTION_ERROR("xfs_imap_to_bp", | ||
186 | XFS_ERRLEVEL_HIGH, mp, dip); | ||
187 | #ifdef DEBUG | ||
188 | cmn_err(CE_PANIC, | ||
189 | "Device %s - bad inode magic/vsn " | ||
190 | "daddr %lld #%d (magic=%x)", | ||
191 | XFS_BUFTARG_NAME(mp->m_ddev_targp), | ||
192 | (unsigned long long)imap->im_blkno, i, | ||
193 | be16_to_cpu(dip->di_core.di_magic)); | ||
194 | #endif | ||
195 | xfs_trans_brelse(tp, bp); | ||
196 | return XFS_ERROR(EFSCORRUPTED); | ||
197 | } | ||
198 | } | ||
199 | |||
200 | xfs_inobp_check(mp, bp); | ||
201 | |||
202 | /* | ||
203 | * Mark the buffer as an inode buffer now that it looks good | ||
204 | */ | ||
205 | XFS_BUF_SET_VTYPE(bp, B_FS_INO); | ||
206 | |||
207 | *bpp = bp; | ||
208 | return 0; | ||
209 | } | ||
210 | |||
211 | /* | ||
129 | * This routine is called to map an inode number within a file | 212 | * This routine is called to map an inode number within a file |
130 | * system to the buffer containing the on-disk version of the | 213 | * system to the buffer containing the on-disk version of the |
131 | * inode. It returns a pointer to the buffer containing the | 214 | * inode. It returns a pointer to the buffer containing the |
@@ -147,72 +230,19 @@ xfs_inotobp( | |||
147 | xfs_buf_t **bpp, | 230 | xfs_buf_t **bpp, |
148 | int *offset) | 231 | int *offset) |
149 | { | 232 | { |
150 | int di_ok; | ||
151 | xfs_imap_t imap; | 233 | xfs_imap_t imap; |
152 | xfs_buf_t *bp; | 234 | xfs_buf_t *bp; |
153 | int error; | 235 | int error; |
154 | xfs_dinode_t *dip; | ||
155 | 236 | ||
156 | /* | ||
157 | * Call the space management code to find the location of the | ||
158 | * inode on disk. | ||
159 | */ | ||
160 | imap.im_blkno = 0; | 237 | imap.im_blkno = 0; |
161 | error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); | 238 | error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); |
162 | if (error != 0) { | 239 | if (error) |
163 | cmn_err(CE_WARN, | ||
164 | "xfs_inotobp: xfs_imap() returned an " | ||
165 | "error %d on %s. Returning error.", error, mp->m_fsname); | ||
166 | return error; | 240 | return error; |
167 | } | ||
168 | 241 | ||
169 | /* | 242 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0); |
170 | * If the inode number maps to a block outside the bounds of the | 243 | if (error) |
171 | * file system then return NULL rather than calling read_buf | ||
172 | * and panicing when we get an error from the driver. | ||
173 | */ | ||
174 | if ((imap.im_blkno + imap.im_len) > | ||
175 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | ||
176 | cmn_err(CE_WARN, | ||
177 | "xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds " | ||
178 | "of the file system %s. Returning EINVAL.", | ||
179 | (unsigned long long)imap.im_blkno, | ||
180 | imap.im_len, mp->m_fsname); | ||
181 | return XFS_ERROR(EINVAL); | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will | ||
186 | * default to just a read_buf() call. | ||
187 | */ | ||
188 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, | ||
189 | (int)imap.im_len, XFS_BUF_LOCK, &bp); | ||
190 | |||
191 | if (error) { | ||
192 | cmn_err(CE_WARN, | ||
193 | "xfs_inotobp: xfs_trans_read_buf() returned an " | ||
194 | "error %d on %s. Returning error.", error, mp->m_fsname); | ||
195 | return error; | 244 | return error; |
196 | } | ||
197 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0); | ||
198 | di_ok = | ||
199 | be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && | ||
200 | XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); | ||
201 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, | ||
202 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
203 | XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip); | ||
204 | xfs_trans_brelse(tp, bp); | ||
205 | cmn_err(CE_WARN, | ||
206 | "xfs_inotobp: XFS_TEST_ERROR() returned an " | ||
207 | "error on %s. Returning EFSCORRUPTED.", mp->m_fsname); | ||
208 | return XFS_ERROR(EFSCORRUPTED); | ||
209 | } | ||
210 | 245 | ||
211 | xfs_inobp_check(mp, bp); | ||
212 | |||
213 | /* | ||
214 | * Set *dipp to point to the on-disk inode in the buffer. | ||
215 | */ | ||
216 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); | 246 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); |
217 | *bpp = bp; | 247 | *bpp = bp; |
218 | *offset = imap.im_boffset; | 248 | *offset = imap.im_boffset; |
@@ -248,46 +278,21 @@ xfs_itobp( | |||
248 | xfs_dinode_t **dipp, | 278 | xfs_dinode_t **dipp, |
249 | xfs_buf_t **bpp, | 279 | xfs_buf_t **bpp, |
250 | xfs_daddr_t bno, | 280 | xfs_daddr_t bno, |
251 | uint imap_flags) | 281 | uint imap_flags, |
282 | uint buf_flags) | ||
252 | { | 283 | { |
253 | xfs_imap_t imap; | 284 | xfs_imap_t imap; |
254 | xfs_buf_t *bp; | 285 | xfs_buf_t *bp; |
255 | int error; | 286 | int error; |
256 | int i; | ||
257 | int ni; | ||
258 | 287 | ||
259 | if (ip->i_blkno == (xfs_daddr_t)0) { | 288 | if (ip->i_blkno == (xfs_daddr_t)0) { |
260 | /* | ||
261 | * Call the space management code to find the location of the | ||
262 | * inode on disk. | ||
263 | */ | ||
264 | imap.im_blkno = bno; | 289 | imap.im_blkno = bno; |
265 | if ((error = xfs_imap(mp, tp, ip->i_ino, &imap, | 290 | error = xfs_imap(mp, tp, ip->i_ino, &imap, |
266 | XFS_IMAP_LOOKUP | imap_flags))) | 291 | XFS_IMAP_LOOKUP | imap_flags); |
292 | if (error) | ||
267 | return error; | 293 | return error; |
268 | 294 | ||
269 | /* | 295 | /* |
270 | * If the inode number maps to a block outside the bounds | ||
271 | * of the file system then return NULL rather than calling | ||
272 | * read_buf and panicing when we get an error from the | ||
273 | * driver. | ||
274 | */ | ||
275 | if ((imap.im_blkno + imap.im_len) > | ||
276 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | ||
277 | #ifdef DEBUG | ||
278 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " | ||
279 | "(imap.im_blkno (0x%llx) " | ||
280 | "+ imap.im_len (0x%llx)) > " | ||
281 | " XFS_FSB_TO_BB(mp, " | ||
282 | "mp->m_sb.sb_dblocks) (0x%llx)", | ||
283 | (unsigned long long) imap.im_blkno, | ||
284 | (unsigned long long) imap.im_len, | ||
285 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); | ||
286 | #endif /* DEBUG */ | ||
287 | return XFS_ERROR(EINVAL); | ||
288 | } | ||
289 | |||
290 | /* | ||
291 | * Fill in the fields in the inode that will be used to | 296 | * Fill in the fields in the inode that will be used to |
292 | * map the inode to its buffer from now on. | 297 | * map the inode to its buffer from now on. |
293 | */ | 298 | */ |
@@ -305,76 +310,17 @@ xfs_itobp( | |||
305 | } | 310 | } |
306 | ASSERT(bno == 0 || bno == imap.im_blkno); | 311 | ASSERT(bno == 0 || bno == imap.im_blkno); |
307 | 312 | ||
308 | /* | 313 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags); |
309 | * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will | 314 | if (error) |
310 | * default to just a read_buf() call. | ||
311 | */ | ||
312 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, | ||
313 | (int)imap.im_len, XFS_BUF_LOCK, &bp); | ||
314 | if (error) { | ||
315 | #ifdef DEBUG | ||
316 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " | ||
317 | "xfs_trans_read_buf() returned error %d, " | ||
318 | "imap.im_blkno 0x%llx, imap.im_len 0x%llx", | ||
319 | error, (unsigned long long) imap.im_blkno, | ||
320 | (unsigned long long) imap.im_len); | ||
321 | #endif /* DEBUG */ | ||
322 | return error; | 315 | return error; |
323 | } | ||
324 | |||
325 | /* | ||
326 | * Validate the magic number and version of every inode in the buffer | ||
327 | * (if DEBUG kernel) or the first inode in the buffer, otherwise. | ||
328 | * No validation is done here in userspace (xfs_repair). | ||
329 | */ | ||
330 | #if !defined(__KERNEL__) | ||
331 | ni = 0; | ||
332 | #elif defined(DEBUG) | ||
333 | ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog; | ||
334 | #else /* usual case */ | ||
335 | ni = 1; | ||
336 | #endif | ||
337 | |||
338 | for (i = 0; i < ni; i++) { | ||
339 | int di_ok; | ||
340 | xfs_dinode_t *dip; | ||
341 | 316 | ||
342 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | 317 | if (!bp) { |
343 | (i << mp->m_sb.sb_inodelog)); | 318 | ASSERT(buf_flags & XFS_BUF_TRYLOCK); |
344 | di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && | 319 | ASSERT(tp == NULL); |
345 | XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); | 320 | *bpp = NULL; |
346 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, | 321 | return EAGAIN; |
347 | XFS_ERRTAG_ITOBP_INOTOBP, | ||
348 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
349 | if (imap_flags & XFS_IMAP_BULKSTAT) { | ||
350 | xfs_trans_brelse(tp, bp); | ||
351 | return XFS_ERROR(EINVAL); | ||
352 | } | ||
353 | #ifdef DEBUG | ||
354 | cmn_err(CE_ALERT, | ||
355 | "Device %s - bad inode magic/vsn " | ||
356 | "daddr %lld #%d (magic=%x)", | ||
357 | XFS_BUFTARG_NAME(mp->m_ddev_targp), | ||
358 | (unsigned long long)imap.im_blkno, i, | ||
359 | be16_to_cpu(dip->di_core.di_magic)); | ||
360 | #endif | ||
361 | XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH, | ||
362 | mp, dip); | ||
363 | xfs_trans_brelse(tp, bp); | ||
364 | return XFS_ERROR(EFSCORRUPTED); | ||
365 | } | ||
366 | } | 322 | } |
367 | 323 | ||
368 | xfs_inobp_check(mp, bp); | ||
369 | |||
370 | /* | ||
371 | * Mark the buffer as an inode buffer now that it looks good | ||
372 | */ | ||
373 | XFS_BUF_SET_VTYPE(bp, B_FS_INO); | ||
374 | |||
375 | /* | ||
376 | * Set *dipp to point to the on-disk inode in the buffer. | ||
377 | */ | ||
378 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); | 324 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); |
379 | *bpp = bp; | 325 | *bpp = bp; |
380 | return 0; | 326 | return 0; |
@@ -878,7 +824,7 @@ xfs_iread( | |||
878 | * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will | 824 | * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will |
879 | * know that this is a new incore inode. | 825 | * know that this is a new incore inode. |
880 | */ | 826 | */ |
881 | error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags); | 827 | error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); |
882 | if (error) { | 828 | if (error) { |
883 | kmem_zone_free(xfs_inode_zone, ip); | 829 | kmem_zone_free(xfs_inode_zone, ip); |
884 | return error; | 830 | return error; |
@@ -1147,7 +1093,7 @@ xfs_ialloc( | |||
1147 | * the inode version number now. This way we only do the conversion | 1093 | * the inode version number now. This way we only do the conversion |
1148 | * here rather than here and in the flush/logging code. | 1094 | * here rather than here and in the flush/logging code. |
1149 | */ | 1095 | */ |
1150 | if (XFS_SB_VERSION_HASNLINK(&tp->t_mountp->m_sb) && | 1096 | if (xfs_sb_version_hasnlink(&tp->t_mountp->m_sb) && |
1151 | ip->i_d.di_version == XFS_DINODE_VERSION_1) { | 1097 | ip->i_d.di_version == XFS_DINODE_VERSION_1) { |
1152 | ip->i_d.di_version = XFS_DINODE_VERSION_2; | 1098 | ip->i_d.di_version = XFS_DINODE_VERSION_2; |
1153 | /* | 1099 | /* |
@@ -1518,51 +1464,50 @@ xfs_itruncate_start( | |||
1518 | } | 1464 | } |
1519 | 1465 | ||
1520 | /* | 1466 | /* |
1521 | * Shrink the file to the given new_size. The new | 1467 | * Shrink the file to the given new_size. The new size must be smaller than |
1522 | * size must be smaller than the current size. | 1468 | * the current size. This will free up the underlying blocks in the removed |
1523 | * This will free up the underlying blocks | 1469 | * range after a call to xfs_itruncate_start() or xfs_atruncate_start(). |
1524 | * in the removed range after a call to xfs_itruncate_start() | ||
1525 | * or xfs_atruncate_start(). | ||
1526 | * | 1470 | * |
1527 | * The transaction passed to this routine must have made | 1471 | * The transaction passed to this routine must have made a permanent log |
1528 | * a permanent log reservation of at least XFS_ITRUNCATE_LOG_RES. | 1472 | * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the |
1529 | * This routine may commit the given transaction and | 1473 | * given transaction and start new ones, so make sure everything involved in |
1530 | * start new ones, so make sure everything involved in | 1474 | * the transaction is tidy before calling here. Some transaction will be |
1531 | * the transaction is tidy before calling here. | 1475 | * returned to the caller to be committed. The incoming transaction must |
1532 | * Some transaction will be returned to the caller to be | 1476 | * already include the inode, and both inode locks must be held exclusively. |
1533 | * committed. The incoming transaction must already include | 1477 | * The inode must also be "held" within the transaction. On return the inode |
1534 | * the inode, and both inode locks must be held exclusively. | 1478 | * will be "held" within the returned transaction. This routine does NOT |
1535 | * The inode must also be "held" within the transaction. On | 1479 | * require any disk space to be reserved for it within the transaction. |
1536 | * return the inode will be "held" within the returned transaction. | ||
1537 | * This routine does NOT require any disk space to be reserved | ||
1538 | * for it within the transaction. | ||
1539 | * | 1480 | * |
1540 | * The fork parameter must be either xfs_attr_fork or xfs_data_fork, | 1481 | * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it |
1541 | * and it indicates the fork which is to be truncated. For the | 1482 | * indicates the fork which is to be truncated. For the attribute fork we only |
1542 | * attribute fork we only support truncation to size 0. | 1483 | * support truncation to size 0. |
1543 | * | 1484 | * |
1544 | * We use the sync parameter to indicate whether or not the first | 1485 | * We use the sync parameter to indicate whether or not the first transaction |
1545 | * transaction we perform might have to be synchronous. For the attr fork, | 1486 | * we perform might have to be synchronous. For the attr fork, it needs to be |
1546 | * it needs to be so if the unlink of the inode is not yet known to be | 1487 | * so if the unlink of the inode is not yet known to be permanent in the log. |
1547 | * permanent in the log. This keeps us from freeing and reusing the | 1488 | * This keeps us from freeing and reusing the blocks of the attribute fork |
1548 | * blocks of the attribute fork before the unlink of the inode becomes | 1489 | * before the unlink of the inode becomes permanent. |
1549 | * permanent. | ||
1550 | * | 1490 | * |
1551 | * For the data fork, we normally have to run synchronously if we're | 1491 | * For the data fork, we normally have to run synchronously if we're being |
1552 | * being called out of the inactive path or we're being called | 1492 | * called out of the inactive path or we're being called out of the create path |
1553 | * out of the create path where we're truncating an existing file. | 1493 | * where we're truncating an existing file. Either way, the truncate needs to |
1554 | * Either way, the truncate needs to be sync so blocks don't reappear | 1494 | * be sync so blocks don't reappear in the file with altered data in case of a |
1555 | * in the file with altered data in case of a crash. wsync filesystems | 1495 | * crash. wsync filesystems can run the first case async because anything that |
1556 | * can run the first case async because anything that shrinks the inode | 1496 | * shrinks the inode has to run sync so by the time we're called here from |
1557 | * has to run sync so by the time we're called here from inactive, the | 1497 | * inactive, the inode size is permanently set to 0. |
1558 | * inode size is permanently set to 0. | ||
1559 | * | 1498 | * |
1560 | * Calls from the truncate path always need to be sync unless we're | 1499 | * Calls from the truncate path always need to be sync unless we're in a wsync |
1561 | * in a wsync filesystem and the file has already been unlinked. | 1500 | * filesystem and the file has already been unlinked. |
1562 | * | 1501 | * |
1563 | * The caller is responsible for correctly setting the sync parameter. | 1502 | * The caller is responsible for correctly setting the sync parameter. It gets |
1564 | * It gets too hard for us to guess here which path we're being called | 1503 | * too hard for us to guess here which path we're being called out of just |
1565 | * out of just based on inode state. | 1504 | * based on inode state. |
1505 | * | ||
1506 | * If we get an error, we must return with the inode locked and linked into the | ||
1507 | * current transaction. This keeps things simple for the higher level code, | ||
1508 | * because it always knows that the inode is locked and held in the transaction | ||
1509 | * that returns to it whether errors occur or not. We don't mark the inode | ||
1510 | * dirty on error so that transactions can be easily aborted if possible. | ||
1566 | */ | 1511 | */ |
1567 | int | 1512 | int |
1568 | xfs_itruncate_finish( | 1513 | xfs_itruncate_finish( |
@@ -1741,65 +1686,51 @@ xfs_itruncate_finish( | |||
1741 | */ | 1686 | */ |
1742 | error = xfs_bmap_finish(tp, &free_list, &committed); | 1687 | error = xfs_bmap_finish(tp, &free_list, &committed); |
1743 | ntp = *tp; | 1688 | ntp = *tp; |
1689 | if (committed) { | ||
1690 | /* link the inode into the next xact in the chain */ | ||
1691 | xfs_trans_ijoin(ntp, ip, | ||
1692 | XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
1693 | xfs_trans_ihold(ntp, ip); | ||
1694 | } | ||
1695 | |||
1744 | if (error) { | 1696 | if (error) { |
1745 | /* | 1697 | /* |
1746 | * If the bmap finish call encounters an error, | 1698 | * If the bmap finish call encounters an error, return |
1747 | * return to the caller where the transaction | 1699 | * to the caller where the transaction can be properly |
1748 | * can be properly aborted. We just need to | 1700 | * aborted. We just need to make sure we're not |
1749 | * make sure we're not holding any resources | 1701 | * holding any resources that we were not when we came |
1750 | * that we were not when we came in. | 1702 | * in. |
1751 | * | 1703 | * |
1752 | * Aborting from this point might lose some | 1704 | * Aborting from this point might lose some blocks in |
1753 | * blocks in the file system, but oh well. | 1705 | * the file system, but oh well. |
1754 | */ | 1706 | */ |
1755 | xfs_bmap_cancel(&free_list); | 1707 | xfs_bmap_cancel(&free_list); |
1756 | if (committed) { | ||
1757 | /* | ||
1758 | * If the passed in transaction committed | ||
1759 | * in xfs_bmap_finish(), then we want to | ||
1760 | * add the inode to this one before returning. | ||
1761 | * This keeps things simple for the higher | ||
1762 | * level code, because it always knows that | ||
1763 | * the inode is locked and held in the | ||
1764 | * transaction that returns to it whether | ||
1765 | * errors occur or not. We don't mark the | ||
1766 | * inode dirty so that this transaction can | ||
1767 | * be easily aborted if possible. | ||
1768 | */ | ||
1769 | xfs_trans_ijoin(ntp, ip, | ||
1770 | XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
1771 | xfs_trans_ihold(ntp, ip); | ||
1772 | } | ||
1773 | return error; | 1708 | return error; |
1774 | } | 1709 | } |
1775 | 1710 | ||
1776 | if (committed) { | 1711 | if (committed) { |
1777 | /* | 1712 | /* |
1778 | * The first xact was committed, | 1713 | * Mark the inode dirty so it will be logged and |
1779 | * so add the inode to the new one. | 1714 | * moved forward in the log as part of every commit. |
1780 | * Mark it dirty so it will be logged | ||
1781 | * and moved forward in the log as | ||
1782 | * part of every commit. | ||
1783 | */ | 1715 | */ |
1784 | xfs_trans_ijoin(ntp, ip, | ||
1785 | XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
1786 | xfs_trans_ihold(ntp, ip); | ||
1787 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); | 1716 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); |
1788 | } | 1717 | } |
1718 | |||
1789 | ntp = xfs_trans_dup(ntp); | 1719 | ntp = xfs_trans_dup(ntp); |
1790 | (void) xfs_trans_commit(*tp, 0); | 1720 | error = xfs_trans_commit(*tp, 0); |
1791 | *tp = ntp; | 1721 | *tp = ntp; |
1792 | error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, | 1722 | |
1793 | XFS_TRANS_PERM_LOG_RES, | 1723 | /* link the inode into the next transaction in the chain */ |
1794 | XFS_ITRUNCATE_LOG_COUNT); | ||
1795 | /* | ||
1796 | * Add the inode being truncated to the next chained | ||
1797 | * transaction. | ||
1798 | */ | ||
1799 | xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 1724 | xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
1800 | xfs_trans_ihold(ntp, ip); | 1725 | xfs_trans_ihold(ntp, ip); |
1726 | |||
1727 | if (!error) | ||
1728 | error = xfs_trans_reserve(ntp, 0, | ||
1729 | XFS_ITRUNCATE_LOG_RES(mp), 0, | ||
1730 | XFS_TRANS_PERM_LOG_RES, | ||
1731 | XFS_ITRUNCATE_LOG_COUNT); | ||
1801 | if (error) | 1732 | if (error) |
1802 | return (error); | 1733 | return error; |
1803 | } | 1734 | } |
1804 | /* | 1735 | /* |
1805 | * Only update the size in the case of the data fork, but | 1736 | * Only update the size in the case of the data fork, but |
@@ -1967,7 +1898,7 @@ xfs_iunlink( | |||
1967 | * Here we put the head pointer into our next pointer, | 1898 | * Here we put the head pointer into our next pointer, |
1968 | * and then we fall through to point the head at us. | 1899 | * and then we fall through to point the head at us. |
1969 | */ | 1900 | */ |
1970 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); | 1901 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
1971 | if (error) | 1902 | if (error) |
1972 | return error; | 1903 | return error; |
1973 | 1904 | ||
@@ -2075,7 +2006,7 @@ xfs_iunlink_remove( | |||
2075 | * of dealing with the buffer when there is no need to | 2006 | * of dealing with the buffer when there is no need to |
2076 | * change it. | 2007 | * change it. |
2077 | */ | 2008 | */ |
2078 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); | 2009 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
2079 | if (error) { | 2010 | if (error) { |
2080 | cmn_err(CE_WARN, | 2011 | cmn_err(CE_WARN, |
2081 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", | 2012 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", |
@@ -2137,7 +2068,7 @@ xfs_iunlink_remove( | |||
2137 | * Now last_ibp points to the buffer previous to us on | 2068 | * Now last_ibp points to the buffer previous to us on |
2138 | * the unlinked list. Pull us from the list. | 2069 | * the unlinked list. Pull us from the list. |
2139 | */ | 2070 | */ |
2140 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); | 2071 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
2141 | if (error) { | 2072 | if (error) { |
2142 | cmn_err(CE_WARN, | 2073 | cmn_err(CE_WARN, |
2143 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", | 2074 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", |
@@ -2172,13 +2103,6 @@ xfs_iunlink_remove( | |||
2172 | return 0; | 2103 | return 0; |
2173 | } | 2104 | } |
2174 | 2105 | ||
2175 | STATIC_INLINE int xfs_inode_clean(xfs_inode_t *ip) | ||
2176 | { | ||
2177 | return (((ip->i_itemp == NULL) || | ||
2178 | !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && | ||
2179 | (ip->i_update_core == 0)); | ||
2180 | } | ||
2181 | |||
2182 | STATIC void | 2106 | STATIC void |
2183 | xfs_ifree_cluster( | 2107 | xfs_ifree_cluster( |
2184 | xfs_inode_t *free_ip, | 2108 | xfs_inode_t *free_ip, |
@@ -2400,7 +2324,7 @@ xfs_ifree( | |||
2400 | 2324 | ||
2401 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2325 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
2402 | 2326 | ||
2403 | error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0); | 2327 | error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
2404 | if (error) | 2328 | if (error) |
2405 | return error; | 2329 | return error; |
2406 | 2330 | ||
@@ -2678,14 +2602,31 @@ xfs_imap( | |||
2678 | fsbno = imap->im_blkno ? | 2602 | fsbno = imap->im_blkno ? |
2679 | XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; | 2603 | XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; |
2680 | error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); | 2604 | error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); |
2681 | if (error != 0) { | 2605 | if (error) |
2682 | return error; | 2606 | return error; |
2683 | } | 2607 | |
2684 | imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); | 2608 | imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); |
2685 | imap->im_len = XFS_FSB_TO_BB(mp, len); | 2609 | imap->im_len = XFS_FSB_TO_BB(mp, len); |
2686 | imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); | 2610 | imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); |
2687 | imap->im_ioffset = (ushort)off; | 2611 | imap->im_ioffset = (ushort)off; |
2688 | imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); | 2612 | imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); |
2613 | |||
2614 | /* | ||
2615 | * If the inode number maps to a block outside the bounds | ||
2616 | * of the file system then return NULL rather than calling | ||
2617 | * read_buf and panicing when we get an error from the | ||
2618 | * driver. | ||
2619 | */ | ||
2620 | if ((imap->im_blkno + imap->im_len) > | ||
2621 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | ||
2622 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | ||
2623 | "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " | ||
2624 | " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", | ||
2625 | (unsigned long long) imap->im_blkno, | ||
2626 | (unsigned long long) imap->im_len, | ||
2627 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); | ||
2628 | return EINVAL; | ||
2629 | } | ||
2689 | return 0; | 2630 | return 0; |
2690 | } | 2631 | } |
2691 | 2632 | ||
@@ -2826,38 +2767,41 @@ xfs_iunpin( | |||
2826 | } | 2767 | } |
2827 | 2768 | ||
2828 | /* | 2769 | /* |
2829 | * This is called to wait for the given inode to be unpinned. | 2770 | * This is called to unpin an inode. It can be directed to wait or to return |
2830 | * It will sleep until this happens. The caller must have the | 2771 | * immediately without waiting for the inode to be unpinned. The caller must |
2831 | * inode locked in at least shared mode so that the buffer cannot | 2772 | * have the inode locked in at least shared mode so that the buffer cannot be |
2832 | * be subsequently pinned once someone is waiting for it to be | 2773 | * subsequently pinned once someone is waiting for it to be unpinned. |
2833 | * unpinned. | ||
2834 | */ | 2774 | */ |
2835 | STATIC void | 2775 | STATIC void |
2836 | xfs_iunpin_wait( | 2776 | __xfs_iunpin_wait( |
2837 | xfs_inode_t *ip) | 2777 | xfs_inode_t *ip, |
2778 | int wait) | ||
2838 | { | 2779 | { |
2839 | xfs_inode_log_item_t *iip; | 2780 | xfs_inode_log_item_t *iip = ip->i_itemp; |
2840 | xfs_lsn_t lsn; | ||
2841 | 2781 | ||
2842 | ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); | 2782 | ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); |
2843 | 2783 | if (atomic_read(&ip->i_pincount) == 0) | |
2844 | if (atomic_read(&ip->i_pincount) == 0) { | ||
2845 | return; | 2784 | return; |
2846 | } | ||
2847 | 2785 | ||
2848 | iip = ip->i_itemp; | 2786 | /* Give the log a push to start the unpinning I/O */ |
2849 | if (iip && iip->ili_last_lsn) { | 2787 | xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? |
2850 | lsn = iip->ili_last_lsn; | 2788 | iip->ili_last_lsn : 0, XFS_LOG_FORCE); |
2851 | } else { | 2789 | if (wait) |
2852 | lsn = (xfs_lsn_t)0; | 2790 | wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); |
2853 | } | 2791 | } |
2854 | 2792 | ||
2855 | /* | 2793 | static inline void |
2856 | * Give the log a push so we don't wait here too long. | 2794 | xfs_iunpin_wait( |
2857 | */ | 2795 | xfs_inode_t *ip) |
2858 | xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE); | 2796 | { |
2797 | __xfs_iunpin_wait(ip, 1); | ||
2798 | } | ||
2859 | 2799 | ||
2860 | wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); | 2800 | static inline void |
2801 | xfs_iunpin_nowait( | ||
2802 | xfs_inode_t *ip) | ||
2803 | { | ||
2804 | __xfs_iunpin_wait(ip, 0); | ||
2861 | } | 2805 | } |
2862 | 2806 | ||
2863 | 2807 | ||
@@ -2932,7 +2876,7 @@ xfs_iextents_copy( | |||
2932 | * format indicates the current state of the fork. | 2876 | * format indicates the current state of the fork. |
2933 | */ | 2877 | */ |
2934 | /*ARGSUSED*/ | 2878 | /*ARGSUSED*/ |
2935 | STATIC int | 2879 | STATIC void |
2936 | xfs_iflush_fork( | 2880 | xfs_iflush_fork( |
2937 | xfs_inode_t *ip, | 2881 | xfs_inode_t *ip, |
2938 | xfs_dinode_t *dip, | 2882 | xfs_dinode_t *dip, |
@@ -2953,16 +2897,16 @@ xfs_iflush_fork( | |||
2953 | static const short extflag[2] = | 2897 | static const short extflag[2] = |
2954 | { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; | 2898 | { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; |
2955 | 2899 | ||
2956 | if (iip == NULL) | 2900 | if (!iip) |
2957 | return 0; | 2901 | return; |
2958 | ifp = XFS_IFORK_PTR(ip, whichfork); | 2902 | ifp = XFS_IFORK_PTR(ip, whichfork); |
2959 | /* | 2903 | /* |
2960 | * This can happen if we gave up in iformat in an error path, | 2904 | * This can happen if we gave up in iformat in an error path, |
2961 | * for the attribute fork. | 2905 | * for the attribute fork. |
2962 | */ | 2906 | */ |
2963 | if (ifp == NULL) { | 2907 | if (!ifp) { |
2964 | ASSERT(whichfork == XFS_ATTR_FORK); | 2908 | ASSERT(whichfork == XFS_ATTR_FORK); |
2965 | return 0; | 2909 | return; |
2966 | } | 2910 | } |
2967 | cp = XFS_DFORK_PTR(dip, whichfork); | 2911 | cp = XFS_DFORK_PTR(dip, whichfork); |
2968 | mp = ip->i_mount; | 2912 | mp = ip->i_mount; |
@@ -3023,8 +2967,145 @@ xfs_iflush_fork( | |||
3023 | ASSERT(0); | 2967 | ASSERT(0); |
3024 | break; | 2968 | break; |
3025 | } | 2969 | } |
2970 | } | ||
2971 | |||
2972 | STATIC int | ||
2973 | xfs_iflush_cluster( | ||
2974 | xfs_inode_t *ip, | ||
2975 | xfs_buf_t *bp) | ||
2976 | { | ||
2977 | xfs_mount_t *mp = ip->i_mount; | ||
2978 | xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); | ||
2979 | unsigned long first_index, mask; | ||
2980 | int ilist_size; | ||
2981 | xfs_inode_t **ilist; | ||
2982 | xfs_inode_t *iq; | ||
2983 | int nr_found; | ||
2984 | int clcount = 0; | ||
2985 | int bufwasdelwri; | ||
2986 | int i; | ||
2987 | |||
2988 | ASSERT(pag->pagi_inodeok); | ||
2989 | ASSERT(pag->pag_ici_init); | ||
2990 | |||
2991 | ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *); | ||
2992 | ilist = kmem_alloc(ilist_size, KM_MAYFAIL); | ||
2993 | if (!ilist) | ||
2994 | return 0; | ||
2995 | |||
2996 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | ||
2997 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; | ||
2998 | read_lock(&pag->pag_ici_lock); | ||
2999 | /* really need a gang lookup range call here */ | ||
3000 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, | ||
3001 | first_index, | ||
3002 | XFS_INODE_CLUSTER_SIZE(mp)); | ||
3003 | if (nr_found == 0) | ||
3004 | goto out_free; | ||
3005 | |||
3006 | for (i = 0; i < nr_found; i++) { | ||
3007 | iq = ilist[i]; | ||
3008 | if (iq == ip) | ||
3009 | continue; | ||
3010 | /* if the inode lies outside this cluster, we're done. */ | ||
3011 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) | ||
3012 | break; | ||
3013 | /* | ||
3014 | * Do an un-protected check to see if the inode is dirty and | ||
3015 | * is a candidate for flushing. These checks will be repeated | ||
3016 | * later after the appropriate locks are acquired. | ||
3017 | */ | ||
3018 | if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) | ||
3019 | continue; | ||
3020 | |||
3021 | /* | ||
3022 | * Try to get locks. If any are unavailable or it is pinned, | ||
3023 | * then this inode cannot be flushed and is skipped. | ||
3024 | */ | ||
3025 | |||
3026 | if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) | ||
3027 | continue; | ||
3028 | if (!xfs_iflock_nowait(iq)) { | ||
3029 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3030 | continue; | ||
3031 | } | ||
3032 | if (xfs_ipincount(iq)) { | ||
3033 | xfs_ifunlock(iq); | ||
3034 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3035 | continue; | ||
3036 | } | ||
3037 | |||
3038 | /* | ||
3039 | * arriving here means that this inode can be flushed. First | ||
3040 | * re-check that it's dirty before flushing. | ||
3041 | */ | ||
3042 | if (!xfs_inode_clean(iq)) { | ||
3043 | int error; | ||
3044 | error = xfs_iflush_int(iq, bp); | ||
3045 | if (error) { | ||
3046 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3047 | goto cluster_corrupt_out; | ||
3048 | } | ||
3049 | clcount++; | ||
3050 | } else { | ||
3051 | xfs_ifunlock(iq); | ||
3052 | } | ||
3053 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3054 | } | ||
3055 | |||
3056 | if (clcount) { | ||
3057 | XFS_STATS_INC(xs_icluster_flushcnt); | ||
3058 | XFS_STATS_ADD(xs_icluster_flushinode, clcount); | ||
3059 | } | ||
3026 | 3060 | ||
3061 | out_free: | ||
3062 | read_unlock(&pag->pag_ici_lock); | ||
3063 | kmem_free(ilist, ilist_size); | ||
3027 | return 0; | 3064 | return 0; |
3065 | |||
3066 | |||
3067 | cluster_corrupt_out: | ||
3068 | /* | ||
3069 | * Corruption detected in the clustering loop. Invalidate the | ||
3070 | * inode buffer and shut down the filesystem. | ||
3071 | */ | ||
3072 | read_unlock(&pag->pag_ici_lock); | ||
3073 | /* | ||
3074 | * Clean up the buffer. If it was B_DELWRI, just release it -- | ||
3075 | * brelse can handle it with no problems. If not, shut down the | ||
3076 | * filesystem before releasing the buffer. | ||
3077 | */ | ||
3078 | bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); | ||
3079 | if (bufwasdelwri) | ||
3080 | xfs_buf_relse(bp); | ||
3081 | |||
3082 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
3083 | |||
3084 | if (!bufwasdelwri) { | ||
3085 | /* | ||
3086 | * Just like incore_relse: if we have b_iodone functions, | ||
3087 | * mark the buffer as an error and call them. Otherwise | ||
3088 | * mark it as stale and brelse. | ||
3089 | */ | ||
3090 | if (XFS_BUF_IODONE_FUNC(bp)) { | ||
3091 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | ||
3092 | XFS_BUF_UNDONE(bp); | ||
3093 | XFS_BUF_STALE(bp); | ||
3094 | XFS_BUF_SHUT(bp); | ||
3095 | XFS_BUF_ERROR(bp,EIO); | ||
3096 | xfs_biodone(bp); | ||
3097 | } else { | ||
3098 | XFS_BUF_STALE(bp); | ||
3099 | xfs_buf_relse(bp); | ||
3100 | } | ||
3101 | } | ||
3102 | |||
3103 | /* | ||
3104 | * Unlocks the flush lock | ||
3105 | */ | ||
3106 | xfs_iflush_abort(iq); | ||
3107 | kmem_free(ilist, ilist_size); | ||
3108 | return XFS_ERROR(EFSCORRUPTED); | ||
3028 | } | 3109 | } |
3029 | 3110 | ||
3030 | /* | 3111 | /* |
@@ -3046,11 +3127,7 @@ xfs_iflush( | |||
3046 | xfs_dinode_t *dip; | 3127 | xfs_dinode_t *dip; |
3047 | xfs_mount_t *mp; | 3128 | xfs_mount_t *mp; |
3048 | int error; | 3129 | int error; |
3049 | /* REFERENCED */ | 3130 | int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); |
3050 | xfs_inode_t *iq; | ||
3051 | int clcount; /* count of inodes clustered */ | ||
3052 | int bufwasdelwri; | ||
3053 | struct hlist_node *entry; | ||
3054 | enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; | 3131 | enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; |
3055 | 3132 | ||
3056 | XFS_STATS_INC(xs_iflush_count); | 3133 | XFS_STATS_INC(xs_iflush_count); |
@@ -3067,8 +3144,7 @@ xfs_iflush( | |||
3067 | * If the inode isn't dirty, then just release the inode | 3144 | * If the inode isn't dirty, then just release the inode |
3068 | * flush lock and do nothing. | 3145 | * flush lock and do nothing. |
3069 | */ | 3146 | */ |
3070 | if ((ip->i_update_core == 0) && | 3147 | if (xfs_inode_clean(ip)) { |
3071 | ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { | ||
3072 | ASSERT((iip != NULL) ? | 3148 | ASSERT((iip != NULL) ? |
3073 | !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); | 3149 | !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); |
3074 | xfs_ifunlock(ip); | 3150 | xfs_ifunlock(ip); |
@@ -3076,11 +3152,21 @@ xfs_iflush( | |||
3076 | } | 3152 | } |
3077 | 3153 | ||
3078 | /* | 3154 | /* |
3079 | * We can't flush the inode until it is unpinned, so | 3155 | * We can't flush the inode until it is unpinned, so wait for it if we |
3080 | * wait for it. We know noone new can pin it, because | 3156 | * are allowed to block. We know noone new can pin it, because we are |
3081 | * we are holding the inode lock shared and you need | 3157 | * holding the inode lock shared and you need to hold it exclusively to |
3082 | * to hold it exclusively to pin the inode. | 3158 | * pin the inode. |
3159 | * | ||
3160 | * If we are not allowed to block, force the log out asynchronously so | ||
3161 | * that when we come back the inode will be unpinned. If other inodes | ||
3162 | * in the same cluster are dirty, they will probably write the inode | ||
3163 | * out for us if they occur after the log force completes. | ||
3083 | */ | 3164 | */ |
3165 | if (noblock && xfs_ipincount(ip)) { | ||
3166 | xfs_iunpin_nowait(ip); | ||
3167 | xfs_ifunlock(ip); | ||
3168 | return EAGAIN; | ||
3169 | } | ||
3084 | xfs_iunpin_wait(ip); | 3170 | xfs_iunpin_wait(ip); |
3085 | 3171 | ||
3086 | /* | 3172 | /* |
@@ -3097,15 +3183,6 @@ xfs_iflush( | |||
3097 | } | 3183 | } |
3098 | 3184 | ||
3099 | /* | 3185 | /* |
3100 | * Get the buffer containing the on-disk inode. | ||
3101 | */ | ||
3102 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0); | ||
3103 | if (error) { | ||
3104 | xfs_ifunlock(ip); | ||
3105 | return error; | ||
3106 | } | ||
3107 | |||
3108 | /* | ||
3109 | * Decide how buffer will be flushed out. This is done before | 3186 | * Decide how buffer will be flushed out. This is done before |
3110 | * the call to xfs_iflush_int because this field is zeroed by it. | 3187 | * the call to xfs_iflush_int because this field is zeroed by it. |
3111 | */ | 3188 | */ |
@@ -3121,6 +3198,7 @@ xfs_iflush( | |||
3121 | case XFS_IFLUSH_DELWRI_ELSE_SYNC: | 3198 | case XFS_IFLUSH_DELWRI_ELSE_SYNC: |
3122 | flags = 0; | 3199 | flags = 0; |
3123 | break; | 3200 | break; |
3201 | case XFS_IFLUSH_ASYNC_NOBLOCK: | ||
3124 | case XFS_IFLUSH_ASYNC: | 3202 | case XFS_IFLUSH_ASYNC: |
3125 | case XFS_IFLUSH_DELWRI_ELSE_ASYNC: | 3203 | case XFS_IFLUSH_DELWRI_ELSE_ASYNC: |
3126 | flags = INT_ASYNC; | 3204 | flags = INT_ASYNC; |
@@ -3140,6 +3218,7 @@ xfs_iflush( | |||
3140 | case XFS_IFLUSH_DELWRI: | 3218 | case XFS_IFLUSH_DELWRI: |
3141 | flags = INT_DELWRI; | 3219 | flags = INT_DELWRI; |
3142 | break; | 3220 | break; |
3221 | case XFS_IFLUSH_ASYNC_NOBLOCK: | ||
3143 | case XFS_IFLUSH_ASYNC: | 3222 | case XFS_IFLUSH_ASYNC: |
3144 | flags = INT_ASYNC; | 3223 | flags = INT_ASYNC; |
3145 | break; | 3224 | break; |
@@ -3154,94 +3233,41 @@ xfs_iflush( | |||
3154 | } | 3233 | } |
3155 | 3234 | ||
3156 | /* | 3235 | /* |
3157 | * First flush out the inode that xfs_iflush was called with. | 3236 | * Get the buffer containing the on-disk inode. |
3158 | */ | 3237 | */ |
3159 | error = xfs_iflush_int(ip, bp); | 3238 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0, |
3160 | if (error) { | 3239 | noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); |
3161 | goto corrupt_out; | 3240 | if (error || !bp) { |
3241 | xfs_ifunlock(ip); | ||
3242 | return error; | ||
3162 | } | 3243 | } |
3163 | 3244 | ||
3164 | /* | 3245 | /* |
3165 | * inode clustering: | 3246 | * First flush out the inode that xfs_iflush was called with. |
3166 | * see if other inodes can be gathered into this write | ||
3167 | */ | 3247 | */ |
3168 | spin_lock(&ip->i_cluster->icl_lock); | 3248 | error = xfs_iflush_int(ip, bp); |
3169 | ip->i_cluster->icl_buf = bp; | 3249 | if (error) |
3170 | 3250 | goto corrupt_out; | |
3171 | clcount = 0; | ||
3172 | hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) { | ||
3173 | if (iq == ip) | ||
3174 | continue; | ||
3175 | |||
3176 | /* | ||
3177 | * Do an un-protected check to see if the inode is dirty and | ||
3178 | * is a candidate for flushing. These checks will be repeated | ||
3179 | * later after the appropriate locks are acquired. | ||
3180 | */ | ||
3181 | iip = iq->i_itemp; | ||
3182 | if ((iq->i_update_core == 0) && | ||
3183 | ((iip == NULL) || | ||
3184 | !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) && | ||
3185 | xfs_ipincount(iq) == 0) { | ||
3186 | continue; | ||
3187 | } | ||
3188 | |||
3189 | /* | ||
3190 | * Try to get locks. If any are unavailable, | ||
3191 | * then this inode cannot be flushed and is skipped. | ||
3192 | */ | ||
3193 | |||
3194 | /* get inode locks (just i_lock) */ | ||
3195 | if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) { | ||
3196 | /* get inode flush lock */ | ||
3197 | if (xfs_iflock_nowait(iq)) { | ||
3198 | /* check if pinned */ | ||
3199 | if (xfs_ipincount(iq) == 0) { | ||
3200 | /* arriving here means that | ||
3201 | * this inode can be flushed. | ||
3202 | * first re-check that it's | ||
3203 | * dirty | ||
3204 | */ | ||
3205 | iip = iq->i_itemp; | ||
3206 | if ((iq->i_update_core != 0)|| | ||
3207 | ((iip != NULL) && | ||
3208 | (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { | ||
3209 | clcount++; | ||
3210 | error = xfs_iflush_int(iq, bp); | ||
3211 | if (error) { | ||
3212 | xfs_iunlock(iq, | ||
3213 | XFS_ILOCK_SHARED); | ||
3214 | goto cluster_corrupt_out; | ||
3215 | } | ||
3216 | } else { | ||
3217 | xfs_ifunlock(iq); | ||
3218 | } | ||
3219 | } else { | ||
3220 | xfs_ifunlock(iq); | ||
3221 | } | ||
3222 | } | ||
3223 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
3224 | } | ||
3225 | } | ||
3226 | spin_unlock(&ip->i_cluster->icl_lock); | ||
3227 | |||
3228 | if (clcount) { | ||
3229 | XFS_STATS_INC(xs_icluster_flushcnt); | ||
3230 | XFS_STATS_ADD(xs_icluster_flushinode, clcount); | ||
3231 | } | ||
3232 | 3251 | ||
3233 | /* | 3252 | /* |
3234 | * If the buffer is pinned then push on the log so we won't | 3253 | * If the buffer is pinned then push on the log now so we won't |
3235 | * get stuck waiting in the write for too long. | 3254 | * get stuck waiting in the write for too long. |
3236 | */ | 3255 | */ |
3237 | if (XFS_BUF_ISPINNED(bp)){ | 3256 | if (XFS_BUF_ISPINNED(bp)) |
3238 | xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); | 3257 | xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); |
3239 | } | 3258 | |
3259 | /* | ||
3260 | * inode clustering: | ||
3261 | * see if other inodes can be gathered into this write | ||
3262 | */ | ||
3263 | error = xfs_iflush_cluster(ip, bp); | ||
3264 | if (error) | ||
3265 | goto cluster_corrupt_out; | ||
3240 | 3266 | ||
3241 | if (flags & INT_DELWRI) { | 3267 | if (flags & INT_DELWRI) { |
3242 | xfs_bdwrite(mp, bp); | 3268 | xfs_bdwrite(mp, bp); |
3243 | } else if (flags & INT_ASYNC) { | 3269 | } else if (flags & INT_ASYNC) { |
3244 | xfs_bawrite(mp, bp); | 3270 | error = xfs_bawrite(mp, bp); |
3245 | } else { | 3271 | } else { |
3246 | error = xfs_bwrite(mp, bp); | 3272 | error = xfs_bwrite(mp, bp); |
3247 | } | 3273 | } |
@@ -3250,52 +3276,11 @@ xfs_iflush( | |||
3250 | corrupt_out: | 3276 | corrupt_out: |
3251 | xfs_buf_relse(bp); | 3277 | xfs_buf_relse(bp); |
3252 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 3278 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
3253 | xfs_iflush_abort(ip); | ||
3254 | /* | ||
3255 | * Unlocks the flush lock | ||
3256 | */ | ||
3257 | return XFS_ERROR(EFSCORRUPTED); | ||
3258 | |||
3259 | cluster_corrupt_out: | 3279 | cluster_corrupt_out: |
3260 | /* Corruption detected in the clustering loop. Invalidate the | ||
3261 | * inode buffer and shut down the filesystem. | ||
3262 | */ | ||
3263 | spin_unlock(&ip->i_cluster->icl_lock); | ||
3264 | |||
3265 | /* | ||
3266 | * Clean up the buffer. If it was B_DELWRI, just release it -- | ||
3267 | * brelse can handle it with no problems. If not, shut down the | ||
3268 | * filesystem before releasing the buffer. | ||
3269 | */ | ||
3270 | if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) { | ||
3271 | xfs_buf_relse(bp); | ||
3272 | } | ||
3273 | |||
3274 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
3275 | |||
3276 | if(!bufwasdelwri) { | ||
3277 | /* | ||
3278 | * Just like incore_relse: if we have b_iodone functions, | ||
3279 | * mark the buffer as an error and call them. Otherwise | ||
3280 | * mark it as stale and brelse. | ||
3281 | */ | ||
3282 | if (XFS_BUF_IODONE_FUNC(bp)) { | ||
3283 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | ||
3284 | XFS_BUF_UNDONE(bp); | ||
3285 | XFS_BUF_STALE(bp); | ||
3286 | XFS_BUF_SHUT(bp); | ||
3287 | XFS_BUF_ERROR(bp,EIO); | ||
3288 | xfs_biodone(bp); | ||
3289 | } else { | ||
3290 | XFS_BUF_STALE(bp); | ||
3291 | xfs_buf_relse(bp); | ||
3292 | } | ||
3293 | } | ||
3294 | |||
3295 | xfs_iflush_abort(iq); | ||
3296 | /* | 3280 | /* |
3297 | * Unlocks the flush lock | 3281 | * Unlocks the flush lock |
3298 | */ | 3282 | */ |
3283 | xfs_iflush_abort(ip); | ||
3299 | return XFS_ERROR(EFSCORRUPTED); | 3284 | return XFS_ERROR(EFSCORRUPTED); |
3300 | } | 3285 | } |
3301 | 3286 | ||
@@ -3325,8 +3310,7 @@ xfs_iflush_int( | |||
3325 | * If the inode isn't dirty, then just release the inode | 3310 | * If the inode isn't dirty, then just release the inode |
3326 | * flush lock and do nothing. | 3311 | * flush lock and do nothing. |
3327 | */ | 3312 | */ |
3328 | if ((ip->i_update_core == 0) && | 3313 | if (xfs_inode_clean(ip)) { |
3329 | ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { | ||
3330 | xfs_ifunlock(ip); | 3314 | xfs_ifunlock(ip); |
3331 | return 0; | 3315 | return 0; |
3332 | } | 3316 | } |
@@ -3434,9 +3418,9 @@ xfs_iflush_int( | |||
3434 | * has been updated, then make the conversion permanent. | 3418 | * has been updated, then make the conversion permanent. |
3435 | */ | 3419 | */ |
3436 | ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || | 3420 | ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1 || |
3437 | XFS_SB_VERSION_HASNLINK(&mp->m_sb)); | 3421 | xfs_sb_version_hasnlink(&mp->m_sb)); |
3438 | if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { | 3422 | if (ip->i_d.di_version == XFS_DINODE_VERSION_1) { |
3439 | if (!XFS_SB_VERSION_HASNLINK(&mp->m_sb)) { | 3423 | if (!xfs_sb_version_hasnlink(&mp->m_sb)) { |
3440 | /* | 3424 | /* |
3441 | * Convert it back. | 3425 | * Convert it back. |
3442 | */ | 3426 | */ |
@@ -3459,16 +3443,9 @@ xfs_iflush_int( | |||
3459 | } | 3443 | } |
3460 | } | 3444 | } |
3461 | 3445 | ||
3462 | if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED) { | 3446 | xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); |
3463 | goto corrupt_out; | 3447 | if (XFS_IFORK_Q(ip)) |
3464 | } | 3448 | xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); |
3465 | |||
3466 | if (XFS_IFORK_Q(ip)) { | ||
3467 | /* | ||
3468 | * The only error from xfs_iflush_fork is on the data fork. | ||
3469 | */ | ||
3470 | (void) xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); | ||
3471 | } | ||
3472 | xfs_inobp_check(mp, bp); | 3449 | xfs_inobp_check(mp, bp); |
3473 | 3450 | ||
3474 | /* | 3451 | /* |