diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-18 11:39:39 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-18 11:39:39 -0400 |
| commit | 253ba4e79edc695b2925bd2ef34de06ff4d4070c (patch) | |
| tree | 259667140ca702d6a218cc54f4df275fbbda747b /fs/xfs/xfs_inode.c | |
| parent | 188da98800893691e47eea9335a234378e32aceb (diff) | |
| parent | 65e67f5165c8a156b34ee7adf65d5ed3b16a910d (diff) | |
Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6
* 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6: (87 commits)
[XFS] Fix merge failure
[XFS] The forward declarations for the xfs_ioctl() helpers and the
[XFS] Update XFS documentation for noikeep/ikeep.
[XFS] Update XFS Documentation for ikeep and ihashsize
[XFS] Remove unused HAVE_SPLICE macro.
[XFS] Remove CONFIG_XFS_SECURITY.
[XFS] xfs_bmap_compute_maxlevels should be based on di_forkoff
[XFS] Always use di_forkoff when checking for attr space.
[XFS] Ensure the inode is joined in xfs_itruncate_finish
[XFS] Remove periodic logging of in-core superblock counters.
[XFS] fix logic error in xfs_alloc_ag_vextent_near()
[XFS] Don't error out on good I/Os.
[XFS] Catch log unmount failures.
[XFS] Sanitise xfs_log_force error checking.
[XFS] Check for errors when changing buffer pointers.
[XFS] Don't allow silent errors in xfs_inactive().
[XFS] Catch errors from xfs_imap().
[XFS] xfs_bulkstat_one_dinode() never returns an error.
[XFS] xfs_iflush_fork() never returns an error.
[XFS] Catch unwritten extent conversion errors.
...
Diffstat (limited to 'fs/xfs/xfs_inode.c')
| -rw-r--r-- | fs/xfs/xfs_inode.c | 823 |
1 files changed, 400 insertions, 423 deletions
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index f43a6e01d68f..ca12acb90394 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
| @@ -55,7 +55,6 @@ | |||
| 55 | 55 | ||
| 56 | kmem_zone_t *xfs_ifork_zone; | 56 | kmem_zone_t *xfs_ifork_zone; |
| 57 | kmem_zone_t *xfs_inode_zone; | 57 | kmem_zone_t *xfs_inode_zone; |
| 58 | kmem_zone_t *xfs_icluster_zone; | ||
| 59 | 58 | ||
| 60 | /* | 59 | /* |
| 61 | * Used in xfs_itruncate(). This is the maximum number of extents | 60 | * Used in xfs_itruncate(). This is the maximum number of extents |
| @@ -126,6 +125,90 @@ xfs_inobp_check( | |||
| 126 | #endif | 125 | #endif |
| 127 | 126 | ||
| 128 | /* | 127 | /* |
| 128 | * Find the buffer associated with the given inode map | ||
| 129 | * We do basic validation checks on the buffer once it has been | ||
| 130 | * retrieved from disk. | ||
| 131 | */ | ||
| 132 | STATIC int | ||
| 133 | xfs_imap_to_bp( | ||
| 134 | xfs_mount_t *mp, | ||
| 135 | xfs_trans_t *tp, | ||
| 136 | xfs_imap_t *imap, | ||
| 137 | xfs_buf_t **bpp, | ||
| 138 | uint buf_flags, | ||
| 139 | uint imap_flags) | ||
| 140 | { | ||
| 141 | int error; | ||
| 142 | int i; | ||
| 143 | int ni; | ||
| 144 | xfs_buf_t *bp; | ||
| 145 | |||
| 146 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno, | ||
| 147 | (int)imap->im_len, buf_flags, &bp); | ||
| 148 | if (error) { | ||
| 149 | if (error != EAGAIN) { | ||
| 150 | cmn_err(CE_WARN, | ||
| 151 | "xfs_imap_to_bp: xfs_trans_read_buf()returned " | ||
| 152 | "an error %d on %s. Returning error.", | ||
| 153 | error, mp->m_fsname); | ||
| 154 | } else { | ||
| 155 | ASSERT(buf_flags & XFS_BUF_TRYLOCK); | ||
| 156 | } | ||
| 157 | return error; | ||
| 158 | } | ||
| 159 | |||
| 160 | /* | ||
| 161 | * Validate the magic number and version of every inode in the buffer | ||
| 162 | * (if DEBUG kernel) or the first inode in the buffer, otherwise. | ||
| 163 | */ | ||
| 164 | #ifdef DEBUG | ||
| 165 | ni = BBTOB(imap->im_len) >> mp->m_sb.sb_inodelog; | ||
| 166 | #else /* usual case */ | ||
| 167 | ni = 1; | ||
| 168 | #endif | ||
| 169 | |||
| 170 | for (i = 0; i < ni; i++) { | ||
| 171 | int di_ok; | ||
| 172 | xfs_dinode_t *dip; | ||
| 173 | |||
| 174 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | ||
| 175 | (i << mp->m_sb.sb_inodelog)); | ||
| 176 | di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && | ||
| 177 | XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); | ||
| 178 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, | ||
| 179 | XFS_ERRTAG_ITOBP_INOTOBP, | ||
| 180 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
| 181 | if (imap_flags & XFS_IMAP_BULKSTAT) { | ||
| 182 | xfs_trans_brelse(tp, bp); | ||
| 183 | return XFS_ERROR(EINVAL); | ||
| 184 | } | ||
| 185 | XFS_CORRUPTION_ERROR("xfs_imap_to_bp", | ||
| 186 | XFS_ERRLEVEL_HIGH, mp, dip); | ||
| 187 | #ifdef DEBUG | ||
| 188 | cmn_err(CE_PANIC, | ||
| 189 | "Device %s - bad inode magic/vsn " | ||
| 190 | "daddr %lld #%d (magic=%x)", | ||
| 191 | XFS_BUFTARG_NAME(mp->m_ddev_targp), | ||
| 192 | (unsigned long long)imap->im_blkno, i, | ||
| 193 | be16_to_cpu(dip->di_core.di_magic)); | ||
| 194 | #endif | ||
| 195 | xfs_trans_brelse(tp, bp); | ||
| 196 | return XFS_ERROR(EFSCORRUPTED); | ||
| 197 | } | ||
| 198 | } | ||
| 199 | |||
| 200 | xfs_inobp_check(mp, bp); | ||
| 201 | |||
| 202 | /* | ||
| 203 | * Mark the buffer as an inode buffer now that it looks good | ||
| 204 | */ | ||
| 205 | XFS_BUF_SET_VTYPE(bp, B_FS_INO); | ||
| 206 | |||
| 207 | *bpp = bp; | ||
| 208 | return 0; | ||
| 209 | } | ||
| 210 | |||
| 211 | /* | ||
| 129 | * This routine is called to map an inode number within a file | 212 | * This routine is called to map an inode number within a file |
| 130 | * system to the buffer containing the on-disk version of the | 213 | * system to the buffer containing the on-disk version of the |
| 131 | * inode. It returns a pointer to the buffer containing the | 214 | * inode. It returns a pointer to the buffer containing the |
| @@ -147,72 +230,19 @@ xfs_inotobp( | |||
| 147 | xfs_buf_t **bpp, | 230 | xfs_buf_t **bpp, |
| 148 | int *offset) | 231 | int *offset) |
| 149 | { | 232 | { |
| 150 | int di_ok; | ||
| 151 | xfs_imap_t imap; | 233 | xfs_imap_t imap; |
| 152 | xfs_buf_t *bp; | 234 | xfs_buf_t *bp; |
| 153 | int error; | 235 | int error; |
| 154 | xfs_dinode_t *dip; | ||
| 155 | 236 | ||
| 156 | /* | ||
| 157 | * Call the space management code to find the location of the | ||
| 158 | * inode on disk. | ||
| 159 | */ | ||
| 160 | imap.im_blkno = 0; | 237 | imap.im_blkno = 0; |
| 161 | error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); | 238 | error = xfs_imap(mp, tp, ino, &imap, XFS_IMAP_LOOKUP); |
| 162 | if (error != 0) { | 239 | if (error) |
| 163 | cmn_err(CE_WARN, | ||
| 164 | "xfs_inotobp: xfs_imap() returned an " | ||
| 165 | "error %d on %s. Returning error.", error, mp->m_fsname); | ||
| 166 | return error; | 240 | return error; |
| 167 | } | ||
| 168 | 241 | ||
| 169 | /* | 242 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, XFS_BUF_LOCK, 0); |
| 170 | * If the inode number maps to a block outside the bounds of the | 243 | if (error) |
| 171 | * file system then return NULL rather than calling read_buf | ||
| 172 | * and panicing when we get an error from the driver. | ||
| 173 | */ | ||
| 174 | if ((imap.im_blkno + imap.im_len) > | ||
| 175 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | ||
| 176 | cmn_err(CE_WARN, | ||
| 177 | "xfs_inotobp: inode number (%llu + %d) maps to a block outside the bounds " | ||
| 178 | "of the file system %s. Returning EINVAL.", | ||
| 179 | (unsigned long long)imap.im_blkno, | ||
| 180 | imap.im_len, mp->m_fsname); | ||
| 181 | return XFS_ERROR(EINVAL); | ||
| 182 | } | ||
| 183 | |||
| 184 | /* | ||
| 185 | * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will | ||
| 186 | * default to just a read_buf() call. | ||
| 187 | */ | ||
| 188 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, | ||
| 189 | (int)imap.im_len, XFS_BUF_LOCK, &bp); | ||
| 190 | |||
| 191 | if (error) { | ||
| 192 | cmn_err(CE_WARN, | ||
| 193 | "xfs_inotobp: xfs_trans_read_buf() returned an " | ||
| 194 | "error %d on %s. Returning error.", error, mp->m_fsname); | ||
| 195 | return error; | 244 | return error; |
| 196 | } | ||
| 197 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, 0); | ||
| 198 | di_ok = | ||
| 199 | be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && | ||
| 200 | XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); | ||
| 201 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP, | ||
| 202 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
| 203 | XFS_CORRUPTION_ERROR("xfs_inotobp", XFS_ERRLEVEL_LOW, mp, dip); | ||
| 204 | xfs_trans_brelse(tp, bp); | ||
| 205 | cmn_err(CE_WARN, | ||
| 206 | "xfs_inotobp: XFS_TEST_ERROR() returned an " | ||
| 207 | "error on %s. Returning EFSCORRUPTED.", mp->m_fsname); | ||
| 208 | return XFS_ERROR(EFSCORRUPTED); | ||
| 209 | } | ||
| 210 | 245 | ||
| 211 | xfs_inobp_check(mp, bp); | ||
| 212 | |||
| 213 | /* | ||
| 214 | * Set *dipp to point to the on-disk inode in the buffer. | ||
| 215 | */ | ||
| 216 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); | 246 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); |
| 217 | *bpp = bp; | 247 | *bpp = bp; |
| 218 | *offset = imap.im_boffset; | 248 | *offset = imap.im_boffset; |
| @@ -248,46 +278,21 @@ xfs_itobp( | |||
| 248 | xfs_dinode_t **dipp, | 278 | xfs_dinode_t **dipp, |
| 249 | xfs_buf_t **bpp, | 279 | xfs_buf_t **bpp, |
| 250 | xfs_daddr_t bno, | 280 | xfs_daddr_t bno, |
| 251 | uint imap_flags) | 281 | uint imap_flags, |
| 282 | uint buf_flags) | ||
| 252 | { | 283 | { |
| 253 | xfs_imap_t imap; | 284 | xfs_imap_t imap; |
| 254 | xfs_buf_t *bp; | 285 | xfs_buf_t *bp; |
| 255 | int error; | 286 | int error; |
| 256 | int i; | ||
| 257 | int ni; | ||
| 258 | 287 | ||
| 259 | if (ip->i_blkno == (xfs_daddr_t)0) { | 288 | if (ip->i_blkno == (xfs_daddr_t)0) { |
| 260 | /* | ||
| 261 | * Call the space management code to find the location of the | ||
| 262 | * inode on disk. | ||
| 263 | */ | ||
| 264 | imap.im_blkno = bno; | 289 | imap.im_blkno = bno; |
| 265 | if ((error = xfs_imap(mp, tp, ip->i_ino, &imap, | 290 | error = xfs_imap(mp, tp, ip->i_ino, &imap, |
| 266 | XFS_IMAP_LOOKUP | imap_flags))) | 291 | XFS_IMAP_LOOKUP | imap_flags); |
| 292 | if (error) | ||
| 267 | return error; | 293 | return error; |
| 268 | 294 | ||
| 269 | /* | 295 | /* |
| 270 | * If the inode number maps to a block outside the bounds | ||
| 271 | * of the file system then return NULL rather than calling | ||
| 272 | * read_buf and panicing when we get an error from the | ||
| 273 | * driver. | ||
| 274 | */ | ||
| 275 | if ((imap.im_blkno + imap.im_len) > | ||
| 276 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | ||
| 277 | #ifdef DEBUG | ||
| 278 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " | ||
| 279 | "(imap.im_blkno (0x%llx) " | ||
| 280 | "+ imap.im_len (0x%llx)) > " | ||
| 281 | " XFS_FSB_TO_BB(mp, " | ||
| 282 | "mp->m_sb.sb_dblocks) (0x%llx)", | ||
| 283 | (unsigned long long) imap.im_blkno, | ||
| 284 | (unsigned long long) imap.im_len, | ||
| 285 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); | ||
| 286 | #endif /* DEBUG */ | ||
| 287 | return XFS_ERROR(EINVAL); | ||
| 288 | } | ||
| 289 | |||
| 290 | /* | ||
| 291 | * Fill in the fields in the inode that will be used to | 296 | * Fill in the fields in the inode that will be used to |
| 292 | * map the inode to its buffer from now on. | 297 | * map the inode to its buffer from now on. |
| 293 | */ | 298 | */ |
| @@ -305,76 +310,17 @@ xfs_itobp( | |||
| 305 | } | 310 | } |
| 306 | ASSERT(bno == 0 || bno == imap.im_blkno); | 311 | ASSERT(bno == 0 || bno == imap.im_blkno); |
| 307 | 312 | ||
| 308 | /* | 313 | error = xfs_imap_to_bp(mp, tp, &imap, &bp, buf_flags, imap_flags); |
| 309 | * Read in the buffer. If tp is NULL, xfs_trans_read_buf() will | 314 | if (error) |
| 310 | * default to just a read_buf() call. | ||
| 311 | */ | ||
| 312 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap.im_blkno, | ||
| 313 | (int)imap.im_len, XFS_BUF_LOCK, &bp); | ||
| 314 | if (error) { | ||
| 315 | #ifdef DEBUG | ||
| 316 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_itobp: " | ||
| 317 | "xfs_trans_read_buf() returned error %d, " | ||
| 318 | "imap.im_blkno 0x%llx, imap.im_len 0x%llx", | ||
| 319 | error, (unsigned long long) imap.im_blkno, | ||
| 320 | (unsigned long long) imap.im_len); | ||
| 321 | #endif /* DEBUG */ | ||
| 322 | return error; | 315 | return error; |
| 323 | } | ||
| 324 | |||
| 325 | /* | ||
| 326 | * Validate the magic number and version of every inode in the buffer | ||
| 327 | * (if DEBUG kernel) or the first inode in the buffer, otherwise. | ||
| 328 | * No validation is done here in userspace (xfs_repair). | ||
| 329 | */ | ||
| 330 | #if !defined(__KERNEL__) | ||
| 331 | ni = 0; | ||
| 332 | #elif defined(DEBUG) | ||
| 333 | ni = BBTOB(imap.im_len) >> mp->m_sb.sb_inodelog; | ||
| 334 | #else /* usual case */ | ||
| 335 | ni = 1; | ||
| 336 | #endif | ||
| 337 | |||
| 338 | for (i = 0; i < ni; i++) { | ||
| 339 | int di_ok; | ||
| 340 | xfs_dinode_t *dip; | ||
| 341 | 316 | ||
| 342 | dip = (xfs_dinode_t *)xfs_buf_offset(bp, | 317 | if (!bp) { |
| 343 | (i << mp->m_sb.sb_inodelog)); | 318 | ASSERT(buf_flags & XFS_BUF_TRYLOCK); |
| 344 | di_ok = be16_to_cpu(dip->di_core.di_magic) == XFS_DINODE_MAGIC && | 319 | ASSERT(tp == NULL); |
| 345 | XFS_DINODE_GOOD_VERSION(dip->di_core.di_version); | 320 | *bpp = NULL; |
| 346 | if (unlikely(XFS_TEST_ERROR(!di_ok, mp, | 321 | return EAGAIN; |
| 347 | XFS_ERRTAG_ITOBP_INOTOBP, | ||
| 348 | XFS_RANDOM_ITOBP_INOTOBP))) { | ||
| 349 | if (imap_flags & XFS_IMAP_BULKSTAT) { | ||
| 350 | xfs_trans_brelse(tp, bp); | ||
| 351 | return XFS_ERROR(EINVAL); | ||
| 352 | } | ||
| 353 | #ifdef DEBUG | ||
| 354 | cmn_err(CE_ALERT, | ||
| 355 | "Device %s - bad inode magic/vsn " | ||
| 356 | "daddr %lld #%d (magic=%x)", | ||
| 357 | XFS_BUFTARG_NAME(mp->m_ddev_targp), | ||
| 358 | (unsigned long long)imap.im_blkno, i, | ||
| 359 | be16_to_cpu(dip->di_core.di_magic)); | ||
| 360 | #endif | ||
| 361 | XFS_CORRUPTION_ERROR("xfs_itobp", XFS_ERRLEVEL_HIGH, | ||
| 362 | mp, dip); | ||
| 363 | xfs_trans_brelse(tp, bp); | ||
| 364 | return XFS_ERROR(EFSCORRUPTED); | ||
| 365 | } | ||
| 366 | } | 322 | } |
| 367 | 323 | ||
| 368 | xfs_inobp_check(mp, bp); | ||
| 369 | |||
| 370 | /* | ||
| 371 | * Mark the buffer as an inode buffer now that it looks good | ||
| 372 | */ | ||
| 373 | XFS_BUF_SET_VTYPE(bp, B_FS_INO); | ||
| 374 | |||
| 375 | /* | ||
| 376 | * Set *dipp to point to the on-disk inode in the buffer. | ||
| 377 | */ | ||
| 378 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); | 324 | *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset); |
| 379 | *bpp = bp; | 325 | *bpp = bp; |
| 380 | return 0; | 326 | return 0; |
| @@ -878,7 +824,7 @@ xfs_iread( | |||
| 878 | * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will | 824 | * return NULL as well. Set i_blkno to 0 so that xfs_itobp() will |
| 879 | * know that this is a new incore inode. | 825 | * know that this is a new incore inode. |
| 880 | */ | 826 | */ |
| 881 | error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags); | 827 | error = xfs_itobp(mp, tp, ip, &dip, &bp, bno, imap_flags, XFS_BUF_LOCK); |
| 882 | if (error) { | 828 | if (error) { |
| 883 | kmem_zone_free(xfs_inode_zone, ip); | 829 | kmem_zone_free(xfs_inode_zone, ip); |
| 884 | return error; | 830 | return error; |
| @@ -1518,51 +1464,50 @@ xfs_itruncate_start( | |||
| 1518 | } | 1464 | } |
| 1519 | 1465 | ||
| 1520 | /* | 1466 | /* |
| 1521 | * Shrink the file to the given new_size. The new | 1467 | * Shrink the file to the given new_size. The new size must be smaller than |
| 1522 | * size must be smaller than the current size. | 1468 | * the current size. This will free up the underlying blocks in the removed |
| 1523 | * This will free up the underlying blocks | 1469 | * range after a call to xfs_itruncate_start() or xfs_atruncate_start(). |
| 1524 | * in the removed range after a call to xfs_itruncate_start() | ||
| 1525 | * or xfs_atruncate_start(). | ||
| 1526 | * | 1470 | * |
| 1527 | * The transaction passed to this routine must have made | 1471 | * The transaction passed to this routine must have made a permanent log |
| 1528 | * a permanent log reservation of at least XFS_ITRUNCATE_LOG_RES. | 1472 | * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the |
| 1529 | * This routine may commit the given transaction and | 1473 | * given transaction and start new ones, so make sure everything involved in |
| 1530 | * start new ones, so make sure everything involved in | 1474 | * the transaction is tidy before calling here. Some transaction will be |
| 1531 | * the transaction is tidy before calling here. | 1475 | * returned to the caller to be committed. The incoming transaction must |
| 1532 | * Some transaction will be returned to the caller to be | 1476 | * already include the inode, and both inode locks must be held exclusively. |
| 1533 | * committed. The incoming transaction must already include | 1477 | * The inode must also be "held" within the transaction. On return the inode |
| 1534 | * the inode, and both inode locks must be held exclusively. | 1478 | * will be "held" within the returned transaction. This routine does NOT |
| 1535 | * The inode must also be "held" within the transaction. On | 1479 | * require any disk space to be reserved for it within the transaction. |
| 1536 | * return the inode will be "held" within the returned transaction. | ||
| 1537 | * This routine does NOT require any disk space to be reserved | ||
| 1538 | * for it within the transaction. | ||
| 1539 | * | 1480 | * |
| 1540 | * The fork parameter must be either xfs_attr_fork or xfs_data_fork, | 1481 | * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it |
| 1541 | * and it indicates the fork which is to be truncated. For the | 1482 | * indicates the fork which is to be truncated. For the attribute fork we only |
| 1542 | * attribute fork we only support truncation to size 0. | 1483 | * support truncation to size 0. |
| 1543 | * | 1484 | * |
| 1544 | * We use the sync parameter to indicate whether or not the first | 1485 | * We use the sync parameter to indicate whether or not the first transaction |
| 1545 | * transaction we perform might have to be synchronous. For the attr fork, | 1486 | * we perform might have to be synchronous. For the attr fork, it needs to be |
| 1546 | * it needs to be so if the unlink of the inode is not yet known to be | 1487 | * so if the unlink of the inode is not yet known to be permanent in the log. |
| 1547 | * permanent in the log. This keeps us from freeing and reusing the | 1488 | * This keeps us from freeing and reusing the blocks of the attribute fork |
| 1548 | * blocks of the attribute fork before the unlink of the inode becomes | 1489 | * before the unlink of the inode becomes permanent. |
| 1549 | * permanent. | ||
| 1550 | * | 1490 | * |
| 1551 | * For the data fork, we normally have to run synchronously if we're | 1491 | * For the data fork, we normally have to run synchronously if we're being |
| 1552 | * being called out of the inactive path or we're being called | 1492 | * called out of the inactive path or we're being called out of the create path |
| 1553 | * out of the create path where we're truncating an existing file. | 1493 | * where we're truncating an existing file. Either way, the truncate needs to |
| 1554 | * Either way, the truncate needs to be sync so blocks don't reappear | 1494 | * be sync so blocks don't reappear in the file with altered data in case of a |
| 1555 | * in the file with altered data in case of a crash. wsync filesystems | 1495 | * crash. wsync filesystems can run the first case async because anything that |
| 1556 | * can run the first case async because anything that shrinks the inode | 1496 | * shrinks the inode has to run sync so by the time we're called here from |
| 1557 | * has to run sync so by the time we're called here from inactive, the | 1497 | * inactive, the inode size is permanently set to 0. |
| 1558 | * inode size is permanently set to 0. | ||
| 1559 | * | 1498 | * |
| 1560 | * Calls from the truncate path always need to be sync unless we're | 1499 | * Calls from the truncate path always need to be sync unless we're in a wsync |
| 1561 | * in a wsync filesystem and the file has already been unlinked. | 1500 | * filesystem and the file has already been unlinked. |
| 1562 | * | 1501 | * |
| 1563 | * The caller is responsible for correctly setting the sync parameter. | 1502 | * The caller is responsible for correctly setting the sync parameter. It gets |
| 1564 | * It gets too hard for us to guess here which path we're being called | 1503 | * too hard for us to guess here which path we're being called out of just |
| 1565 | * out of just based on inode state. | 1504 | * based on inode state. |
| 1505 | * | ||
| 1506 | * If we get an error, we must return with the inode locked and linked into the | ||
| 1507 | * current transaction. This keeps things simple for the higher level code, | ||
| 1508 | * because it always knows that the inode is locked and held in the transaction | ||
| 1509 | * that returns to it whether errors occur or not. We don't mark the inode | ||
| 1510 | * dirty on error so that transactions can be easily aborted if possible. | ||
| 1566 | */ | 1511 | */ |
| 1567 | int | 1512 | int |
| 1568 | xfs_itruncate_finish( | 1513 | xfs_itruncate_finish( |
| @@ -1741,65 +1686,51 @@ xfs_itruncate_finish( | |||
| 1741 | */ | 1686 | */ |
| 1742 | error = xfs_bmap_finish(tp, &free_list, &committed); | 1687 | error = xfs_bmap_finish(tp, &free_list, &committed); |
| 1743 | ntp = *tp; | 1688 | ntp = *tp; |
| 1689 | if (committed) { | ||
| 1690 | /* link the inode into the next xact in the chain */ | ||
| 1691 | xfs_trans_ijoin(ntp, ip, | ||
| 1692 | XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
| 1693 | xfs_trans_ihold(ntp, ip); | ||
| 1694 | } | ||
| 1695 | |||
| 1744 | if (error) { | 1696 | if (error) { |
| 1745 | /* | 1697 | /* |
| 1746 | * If the bmap finish call encounters an error, | 1698 | * If the bmap finish call encounters an error, return |
| 1747 | * return to the caller where the transaction | 1699 | * to the caller where the transaction can be properly |
| 1748 | * can be properly aborted. We just need to | 1700 | * aborted. We just need to make sure we're not |
| 1749 | * make sure we're not holding any resources | 1701 | * holding any resources that we were not when we came |
| 1750 | * that we were not when we came in. | 1702 | * in. |
| 1751 | * | 1703 | * |
| 1752 | * Aborting from this point might lose some | 1704 | * Aborting from this point might lose some blocks in |
| 1753 | * blocks in the file system, but oh well. | 1705 | * the file system, but oh well. |
| 1754 | */ | 1706 | */ |
| 1755 | xfs_bmap_cancel(&free_list); | 1707 | xfs_bmap_cancel(&free_list); |
| 1756 | if (committed) { | ||
| 1757 | /* | ||
| 1758 | * If the passed in transaction committed | ||
| 1759 | * in xfs_bmap_finish(), then we want to | ||
| 1760 | * add the inode to this one before returning. | ||
| 1761 | * This keeps things simple for the higher | ||
| 1762 | * level code, because it always knows that | ||
| 1763 | * the inode is locked and held in the | ||
| 1764 | * transaction that returns to it whether | ||
| 1765 | * errors occur or not. We don't mark the | ||
| 1766 | * inode dirty so that this transaction can | ||
| 1767 | * be easily aborted if possible. | ||
| 1768 | */ | ||
| 1769 | xfs_trans_ijoin(ntp, ip, | ||
| 1770 | XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
| 1771 | xfs_trans_ihold(ntp, ip); | ||
| 1772 | } | ||
| 1773 | return error; | 1708 | return error; |
| 1774 | } | 1709 | } |
| 1775 | 1710 | ||
| 1776 | if (committed) { | 1711 | if (committed) { |
| 1777 | /* | 1712 | /* |
| 1778 | * The first xact was committed, | 1713 | * Mark the inode dirty so it will be logged and |
| 1779 | * so add the inode to the new one. | 1714 | * moved forward in the log as part of every commit. |
| 1780 | * Mark it dirty so it will be logged | ||
| 1781 | * and moved forward in the log as | ||
| 1782 | * part of every commit. | ||
| 1783 | */ | 1715 | */ |
| 1784 | xfs_trans_ijoin(ntp, ip, | ||
| 1785 | XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | ||
| 1786 | xfs_trans_ihold(ntp, ip); | ||
| 1787 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); | 1716 | xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); |
| 1788 | } | 1717 | } |
| 1718 | |||
| 1789 | ntp = xfs_trans_dup(ntp); | 1719 | ntp = xfs_trans_dup(ntp); |
| 1790 | (void) xfs_trans_commit(*tp, 0); | 1720 | error = xfs_trans_commit(*tp, 0); |
| 1791 | *tp = ntp; | 1721 | *tp = ntp; |
| 1792 | error = xfs_trans_reserve(ntp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0, | 1722 | |
| 1793 | XFS_TRANS_PERM_LOG_RES, | 1723 | /* link the inode into the next transaction in the chain */ |
| 1794 | XFS_ITRUNCATE_LOG_COUNT); | ||
| 1795 | /* | ||
| 1796 | * Add the inode being truncated to the next chained | ||
| 1797 | * transaction. | ||
| 1798 | */ | ||
| 1799 | xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); | 1724 | xfs_trans_ijoin(ntp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL); |
| 1800 | xfs_trans_ihold(ntp, ip); | 1725 | xfs_trans_ihold(ntp, ip); |
| 1726 | |||
| 1727 | if (!error) | ||
| 1728 | error = xfs_trans_reserve(ntp, 0, | ||
| 1729 | XFS_ITRUNCATE_LOG_RES(mp), 0, | ||
| 1730 | XFS_TRANS_PERM_LOG_RES, | ||
| 1731 | XFS_ITRUNCATE_LOG_COUNT); | ||
| 1801 | if (error) | 1732 | if (error) |
| 1802 | return (error); | 1733 | return error; |
| 1803 | } | 1734 | } |
| 1804 | /* | 1735 | /* |
| 1805 | * Only update the size in the case of the data fork, but | 1736 | * Only update the size in the case of the data fork, but |
| @@ -1967,7 +1898,7 @@ xfs_iunlink( | |||
| 1967 | * Here we put the head pointer into our next pointer, | 1898 | * Here we put the head pointer into our next pointer, |
| 1968 | * and then we fall through to point the head at us. | 1899 | * and then we fall through to point the head at us. |
| 1969 | */ | 1900 | */ |
| 1970 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); | 1901 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
| 1971 | if (error) | 1902 | if (error) |
| 1972 | return error; | 1903 | return error; |
| 1973 | 1904 | ||
| @@ -2075,7 +2006,7 @@ xfs_iunlink_remove( | |||
| 2075 | * of dealing with the buffer when there is no need to | 2006 | * of dealing with the buffer when there is no need to |
| 2076 | * change it. | 2007 | * change it. |
| 2077 | */ | 2008 | */ |
| 2078 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); | 2009 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
| 2079 | if (error) { | 2010 | if (error) { |
| 2080 | cmn_err(CE_WARN, | 2011 | cmn_err(CE_WARN, |
| 2081 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", | 2012 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", |
| @@ -2137,7 +2068,7 @@ xfs_iunlink_remove( | |||
| 2137 | * Now last_ibp points to the buffer previous to us on | 2068 | * Now last_ibp points to the buffer previous to us on |
| 2138 | * the unlinked list. Pull us from the list. | 2069 | * the unlinked list. Pull us from the list. |
| 2139 | */ | 2070 | */ |
| 2140 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0); | 2071 | error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
| 2141 | if (error) { | 2072 | if (error) { |
| 2142 | cmn_err(CE_WARN, | 2073 | cmn_err(CE_WARN, |
| 2143 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", | 2074 | "xfs_iunlink_remove: xfs_itobp() returned an error %d on %s. Returning error.", |
| @@ -2172,13 +2103,6 @@ xfs_iunlink_remove( | |||
| 2172 | return 0; | 2103 | return 0; |
| 2173 | } | 2104 | } |
| 2174 | 2105 | ||
| 2175 | STATIC_INLINE int xfs_inode_clean(xfs_inode_t *ip) | ||
| 2176 | { | ||
| 2177 | return (((ip->i_itemp == NULL) || | ||
| 2178 | !(ip->i_itemp->ili_format.ilf_fields & XFS_ILOG_ALL)) && | ||
| 2179 | (ip->i_update_core == 0)); | ||
| 2180 | } | ||
| 2181 | |||
| 2182 | STATIC void | 2106 | STATIC void |
| 2183 | xfs_ifree_cluster( | 2107 | xfs_ifree_cluster( |
| 2184 | xfs_inode_t *free_ip, | 2108 | xfs_inode_t *free_ip, |
| @@ -2400,7 +2324,7 @@ xfs_ifree( | |||
| 2400 | 2324 | ||
| 2401 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 2325 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
| 2402 | 2326 | ||
| 2403 | error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0); | 2327 | error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0, 0, XFS_BUF_LOCK); |
| 2404 | if (error) | 2328 | if (error) |
| 2405 | return error; | 2329 | return error; |
| 2406 | 2330 | ||
| @@ -2678,14 +2602,31 @@ xfs_imap( | |||
| 2678 | fsbno = imap->im_blkno ? | 2602 | fsbno = imap->im_blkno ? |
| 2679 | XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; | 2603 | XFS_DADDR_TO_FSB(mp, imap->im_blkno) : NULLFSBLOCK; |
| 2680 | error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); | 2604 | error = xfs_dilocate(mp, tp, ino, &fsbno, &len, &off, flags); |
| 2681 | if (error != 0) { | 2605 | if (error) |
| 2682 | return error; | 2606 | return error; |
| 2683 | } | 2607 | |
| 2684 | imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); | 2608 | imap->im_blkno = XFS_FSB_TO_DADDR(mp, fsbno); |
| 2685 | imap->im_len = XFS_FSB_TO_BB(mp, len); | 2609 | imap->im_len = XFS_FSB_TO_BB(mp, len); |
| 2686 | imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); | 2610 | imap->im_agblkno = XFS_FSB_TO_AGBNO(mp, fsbno); |
| 2687 | imap->im_ioffset = (ushort)off; | 2611 | imap->im_ioffset = (ushort)off; |
| 2688 | imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); | 2612 | imap->im_boffset = (ushort)(off << mp->m_sb.sb_inodelog); |
| 2613 | |||
| 2614 | /* | ||
| 2615 | * If the inode number maps to a block outside the bounds | ||
| 2616 | * of the file system then return NULL rather than calling | ||
| 2617 | * read_buf and panicing when we get an error from the | ||
| 2618 | * driver. | ||
| 2619 | */ | ||
| 2620 | if ((imap->im_blkno + imap->im_len) > | ||
| 2621 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) { | ||
| 2622 | xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " | ||
| 2623 | "(imap->im_blkno (0x%llx) + imap->im_len (0x%llx)) > " | ||
| 2624 | " XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) (0x%llx)", | ||
| 2625 | (unsigned long long) imap->im_blkno, | ||
| 2626 | (unsigned long long) imap->im_len, | ||
| 2627 | XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); | ||
| 2628 | return EINVAL; | ||
| 2629 | } | ||
| 2689 | return 0; | 2630 | return 0; |
| 2690 | } | 2631 | } |
| 2691 | 2632 | ||
| @@ -2826,38 +2767,41 @@ xfs_iunpin( | |||
| 2826 | } | 2767 | } |
| 2827 | 2768 | ||
| 2828 | /* | 2769 | /* |
| 2829 | * This is called to wait for the given inode to be unpinned. | 2770 | * This is called to unpin an inode. It can be directed to wait or to return |
| 2830 | * It will sleep until this happens. The caller must have the | 2771 | * immediately without waiting for the inode to be unpinned. The caller must |
| 2831 | * inode locked in at least shared mode so that the buffer cannot | 2772 | * have the inode locked in at least shared mode so that the buffer cannot be |
| 2832 | * be subsequently pinned once someone is waiting for it to be | 2773 | * subsequently pinned once someone is waiting for it to be unpinned. |
| 2833 | * unpinned. | ||
| 2834 | */ | 2774 | */ |
| 2835 | STATIC void | 2775 | STATIC void |
| 2836 | xfs_iunpin_wait( | 2776 | __xfs_iunpin_wait( |
| 2837 | xfs_inode_t *ip) | 2777 | xfs_inode_t *ip, |
| 2778 | int wait) | ||
| 2838 | { | 2779 | { |
| 2839 | xfs_inode_log_item_t *iip; | 2780 | xfs_inode_log_item_t *iip = ip->i_itemp; |
| 2840 | xfs_lsn_t lsn; | ||
| 2841 | 2781 | ||
| 2842 | ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); | 2782 | ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS)); |
| 2843 | 2783 | if (atomic_read(&ip->i_pincount) == 0) | |
| 2844 | if (atomic_read(&ip->i_pincount) == 0) { | ||
| 2845 | return; | 2784 | return; |
| 2846 | } | ||
| 2847 | 2785 | ||
| 2848 | iip = ip->i_itemp; | 2786 | /* Give the log a push to start the unpinning I/O */ |
| 2849 | if (iip && iip->ili_last_lsn) { | 2787 | xfs_log_force(ip->i_mount, (iip && iip->ili_last_lsn) ? |
| 2850 | lsn = iip->ili_last_lsn; | 2788 | iip->ili_last_lsn : 0, XFS_LOG_FORCE); |
| 2851 | } else { | 2789 | if (wait) |
| 2852 | lsn = (xfs_lsn_t)0; | 2790 | wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); |
| 2853 | } | 2791 | } |
| 2854 | 2792 | ||
| 2855 | /* | 2793 | static inline void |
| 2856 | * Give the log a push so we don't wait here too long. | 2794 | xfs_iunpin_wait( |
| 2857 | */ | 2795 | xfs_inode_t *ip) |
| 2858 | xfs_log_force(ip->i_mount, lsn, XFS_LOG_FORCE); | 2796 | { |
| 2797 | __xfs_iunpin_wait(ip, 1); | ||
| 2798 | } | ||
| 2859 | 2799 | ||
| 2860 | wait_event(ip->i_ipin_wait, (atomic_read(&ip->i_pincount) == 0)); | 2800 | static inline void |
| 2801 | xfs_iunpin_nowait( | ||
| 2802 | xfs_inode_t *ip) | ||
| 2803 | { | ||
| 2804 | __xfs_iunpin_wait(ip, 0); | ||
| 2861 | } | 2805 | } |
| 2862 | 2806 | ||
| 2863 | 2807 | ||
| @@ -2932,7 +2876,7 @@ xfs_iextents_copy( | |||
| 2932 | * format indicates the current state of the fork. | 2876 | * format indicates the current state of the fork. |
| 2933 | */ | 2877 | */ |
| 2934 | /*ARGSUSED*/ | 2878 | /*ARGSUSED*/ |
| 2935 | STATIC int | 2879 | STATIC void |
| 2936 | xfs_iflush_fork( | 2880 | xfs_iflush_fork( |
| 2937 | xfs_inode_t *ip, | 2881 | xfs_inode_t *ip, |
| 2938 | xfs_dinode_t *dip, | 2882 | xfs_dinode_t *dip, |
| @@ -2953,16 +2897,16 @@ xfs_iflush_fork( | |||
| 2953 | static const short extflag[2] = | 2897 | static const short extflag[2] = |
| 2954 | { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; | 2898 | { XFS_ILOG_DEXT, XFS_ILOG_AEXT }; |
| 2955 | 2899 | ||
| 2956 | if (iip == NULL) | 2900 | if (!iip) |
| 2957 | return 0; | 2901 | return; |
| 2958 | ifp = XFS_IFORK_PTR(ip, whichfork); | 2902 | ifp = XFS_IFORK_PTR(ip, whichfork); |
| 2959 | /* | 2903 | /* |
| 2960 | * This can happen if we gave up in iformat in an error path, | 2904 | * This can happen if we gave up in iformat in an error path, |
| 2961 | * for the attribute fork. | 2905 | * for the attribute fork. |
| 2962 | */ | 2906 | */ |
| 2963 | if (ifp == NULL) { | 2907 | if (!ifp) { |
| 2964 | ASSERT(whichfork == XFS_ATTR_FORK); | 2908 | ASSERT(whichfork == XFS_ATTR_FORK); |
| 2965 | return 0; | 2909 | return; |
| 2966 | } | 2910 | } |
| 2967 | cp = XFS_DFORK_PTR(dip, whichfork); | 2911 | cp = XFS_DFORK_PTR(dip, whichfork); |
| 2968 | mp = ip->i_mount; | 2912 | mp = ip->i_mount; |
| @@ -3023,8 +2967,145 @@ xfs_iflush_fork( | |||
| 3023 | ASSERT(0); | 2967 | ASSERT(0); |
| 3024 | break; | 2968 | break; |
| 3025 | } | 2969 | } |
| 2970 | } | ||
| 2971 | |||
| 2972 | STATIC int | ||
| 2973 | xfs_iflush_cluster( | ||
| 2974 | xfs_inode_t *ip, | ||
| 2975 | xfs_buf_t *bp) | ||
| 2976 | { | ||
| 2977 | xfs_mount_t *mp = ip->i_mount; | ||
| 2978 | xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); | ||
| 2979 | unsigned long first_index, mask; | ||
| 2980 | int ilist_size; | ||
| 2981 | xfs_inode_t **ilist; | ||
| 2982 | xfs_inode_t *iq; | ||
| 2983 | int nr_found; | ||
| 2984 | int clcount = 0; | ||
| 2985 | int bufwasdelwri; | ||
| 2986 | int i; | ||
| 2987 | |||
| 2988 | ASSERT(pag->pagi_inodeok); | ||
| 2989 | ASSERT(pag->pag_ici_init); | ||
| 2990 | |||
| 2991 | ilist_size = XFS_INODE_CLUSTER_SIZE(mp) * sizeof(xfs_inode_t *); | ||
| 2992 | ilist = kmem_alloc(ilist_size, KM_MAYFAIL); | ||
| 2993 | if (!ilist) | ||
| 2994 | return 0; | ||
| 2995 | |||
| 2996 | mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); | ||
| 2997 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; | ||
| 2998 | read_lock(&pag->pag_ici_lock); | ||
| 2999 | /* really need a gang lookup range call here */ | ||
| 3000 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, | ||
| 3001 | first_index, | ||
| 3002 | XFS_INODE_CLUSTER_SIZE(mp)); | ||
| 3003 | if (nr_found == 0) | ||
| 3004 | goto out_free; | ||
| 3005 | |||
| 3006 | for (i = 0; i < nr_found; i++) { | ||
| 3007 | iq = ilist[i]; | ||
| 3008 | if (iq == ip) | ||
| 3009 | continue; | ||
| 3010 | /* if the inode lies outside this cluster, we're done. */ | ||
| 3011 | if ((XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) | ||
| 3012 | break; | ||
| 3013 | /* | ||
| 3014 | * Do an un-protected check to see if the inode is dirty and | ||
| 3015 | * is a candidate for flushing. These checks will be repeated | ||
| 3016 | * later after the appropriate locks are acquired. | ||
| 3017 | */ | ||
| 3018 | if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) | ||
| 3019 | continue; | ||
| 3020 | |||
| 3021 | /* | ||
| 3022 | * Try to get locks. If any are unavailable or it is pinned, | ||
| 3023 | * then this inode cannot be flushed and is skipped. | ||
| 3024 | */ | ||
| 3025 | |||
| 3026 | if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) | ||
| 3027 | continue; | ||
| 3028 | if (!xfs_iflock_nowait(iq)) { | ||
| 3029 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
| 3030 | continue; | ||
| 3031 | } | ||
| 3032 | if (xfs_ipincount(iq)) { | ||
| 3033 | xfs_ifunlock(iq); | ||
| 3034 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
| 3035 | continue; | ||
| 3036 | } | ||
| 3037 | |||
| 3038 | /* | ||
| 3039 | * arriving here means that this inode can be flushed. First | ||
| 3040 | * re-check that it's dirty before flushing. | ||
| 3041 | */ | ||
| 3042 | if (!xfs_inode_clean(iq)) { | ||
| 3043 | int error; | ||
| 3044 | error = xfs_iflush_int(iq, bp); | ||
| 3045 | if (error) { | ||
| 3046 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
| 3047 | goto cluster_corrupt_out; | ||
| 3048 | } | ||
| 3049 | clcount++; | ||
| 3050 | } else { | ||
| 3051 | xfs_ifunlock(iq); | ||
| 3052 | } | ||
| 3053 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
| 3054 | } | ||
| 3055 | |||
| 3056 | if (clcount) { | ||
| 3057 | XFS_STATS_INC(xs_icluster_flushcnt); | ||
| 3058 | XFS_STATS_ADD(xs_icluster_flushinode, clcount); | ||
| 3059 | } | ||
| 3026 | 3060 | ||
| 3061 | out_free: | ||
| 3062 | read_unlock(&pag->pag_ici_lock); | ||
| 3063 | kmem_free(ilist, ilist_size); | ||
| 3027 | return 0; | 3064 | return 0; |
| 3065 | |||
| 3066 | |||
| 3067 | cluster_corrupt_out: | ||
| 3068 | /* | ||
| 3069 | * Corruption detected in the clustering loop. Invalidate the | ||
| 3070 | * inode buffer and shut down the filesystem. | ||
| 3071 | */ | ||
| 3072 | read_unlock(&pag->pag_ici_lock); | ||
| 3073 | /* | ||
| 3074 | * Clean up the buffer. If it was B_DELWRI, just release it -- | ||
| 3075 | * brelse can handle it with no problems. If not, shut down the | ||
| 3076 | * filesystem before releasing the buffer. | ||
| 3077 | */ | ||
| 3078 | bufwasdelwri = XFS_BUF_ISDELAYWRITE(bp); | ||
| 3079 | if (bufwasdelwri) | ||
| 3080 | xfs_buf_relse(bp); | ||
| 3081 | |||
| 3082 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
| 3083 | |||
| 3084 | if (!bufwasdelwri) { | ||
| 3085 | /* | ||
| 3086 | * Just like incore_relse: if we have b_iodone functions, | ||
| 3087 | * mark the buffer as an error and call them. Otherwise | ||
| 3088 | * mark it as stale and brelse. | ||
| 3089 | */ | ||
| 3090 | if (XFS_BUF_IODONE_FUNC(bp)) { | ||
| 3091 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | ||
| 3092 | XFS_BUF_UNDONE(bp); | ||
| 3093 | XFS_BUF_STALE(bp); | ||
| 3094 | XFS_BUF_SHUT(bp); | ||
| 3095 | XFS_BUF_ERROR(bp,EIO); | ||
| 3096 | xfs_biodone(bp); | ||
| 3097 | } else { | ||
| 3098 | XFS_BUF_STALE(bp); | ||
| 3099 | xfs_buf_relse(bp); | ||
| 3100 | } | ||
| 3101 | } | ||
| 3102 | |||
| 3103 | /* | ||
| 3104 | * Unlocks the flush lock | ||
| 3105 | */ | ||
| 3106 | xfs_iflush_abort(iq); | ||
| 3107 | kmem_free(ilist, ilist_size); | ||
| 3108 | return XFS_ERROR(EFSCORRUPTED); | ||
| 3028 | } | 3109 | } |
| 3029 | 3110 | ||
| 3030 | /* | 3111 | /* |
| @@ -3046,11 +3127,7 @@ xfs_iflush( | |||
| 3046 | xfs_dinode_t *dip; | 3127 | xfs_dinode_t *dip; |
| 3047 | xfs_mount_t *mp; | 3128 | xfs_mount_t *mp; |
| 3048 | int error; | 3129 | int error; |
| 3049 | /* REFERENCED */ | 3130 | int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK); |
| 3050 | xfs_inode_t *iq; | ||
| 3051 | int clcount; /* count of inodes clustered */ | ||
| 3052 | int bufwasdelwri; | ||
| 3053 | struct hlist_node *entry; | ||
| 3054 | enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; | 3131 | enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) }; |
| 3055 | 3132 | ||
| 3056 | XFS_STATS_INC(xs_iflush_count); | 3133 | XFS_STATS_INC(xs_iflush_count); |
| @@ -3067,8 +3144,7 @@ xfs_iflush( | |||
| 3067 | * If the inode isn't dirty, then just release the inode | 3144 | * If the inode isn't dirty, then just release the inode |
| 3068 | * flush lock and do nothing. | 3145 | * flush lock and do nothing. |
| 3069 | */ | 3146 | */ |
| 3070 | if ((ip->i_update_core == 0) && | 3147 | if (xfs_inode_clean(ip)) { |
| 3071 | ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { | ||
| 3072 | ASSERT((iip != NULL) ? | 3148 | ASSERT((iip != NULL) ? |
| 3073 | !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); | 3149 | !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); |
| 3074 | xfs_ifunlock(ip); | 3150 | xfs_ifunlock(ip); |
| @@ -3076,11 +3152,21 @@ xfs_iflush( | |||
| 3076 | } | 3152 | } |
| 3077 | 3153 | ||
| 3078 | /* | 3154 | /* |
| 3079 | * We can't flush the inode until it is unpinned, so | 3155 | * We can't flush the inode until it is unpinned, so wait for it if we |
| 3080 | * wait for it. We know noone new can pin it, because | 3156 | * are allowed to block. We know noone new can pin it, because we are |
| 3081 | * we are holding the inode lock shared and you need | 3157 | * holding the inode lock shared and you need to hold it exclusively to |
| 3082 | * to hold it exclusively to pin the inode. | 3158 | * pin the inode. |
| 3159 | * | ||
| 3160 | * If we are not allowed to block, force the log out asynchronously so | ||
| 3161 | * that when we come back the inode will be unpinned. If other inodes | ||
| 3162 | * in the same cluster are dirty, they will probably write the inode | ||
| 3163 | * out for us if they occur after the log force completes. | ||
| 3083 | */ | 3164 | */ |
| 3165 | if (noblock && xfs_ipincount(ip)) { | ||
| 3166 | xfs_iunpin_nowait(ip); | ||
| 3167 | xfs_ifunlock(ip); | ||
| 3168 | return EAGAIN; | ||
| 3169 | } | ||
| 3084 | xfs_iunpin_wait(ip); | 3170 | xfs_iunpin_wait(ip); |
| 3085 | 3171 | ||
| 3086 | /* | 3172 | /* |
| @@ -3097,15 +3183,6 @@ xfs_iflush( | |||
| 3097 | } | 3183 | } |
| 3098 | 3184 | ||
| 3099 | /* | 3185 | /* |
| 3100 | * Get the buffer containing the on-disk inode. | ||
| 3101 | */ | ||
| 3102 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0); | ||
| 3103 | if (error) { | ||
| 3104 | xfs_ifunlock(ip); | ||
| 3105 | return error; | ||
| 3106 | } | ||
| 3107 | |||
| 3108 | /* | ||
| 3109 | * Decide how buffer will be flushed out. This is done before | 3186 | * Decide how buffer will be flushed out. This is done before |
| 3110 | * the call to xfs_iflush_int because this field is zeroed by it. | 3187 | * the call to xfs_iflush_int because this field is zeroed by it. |
| 3111 | */ | 3188 | */ |
| @@ -3121,6 +3198,7 @@ xfs_iflush( | |||
| 3121 | case XFS_IFLUSH_DELWRI_ELSE_SYNC: | 3198 | case XFS_IFLUSH_DELWRI_ELSE_SYNC: |
| 3122 | flags = 0; | 3199 | flags = 0; |
| 3123 | break; | 3200 | break; |
| 3201 | case XFS_IFLUSH_ASYNC_NOBLOCK: | ||
| 3124 | case XFS_IFLUSH_ASYNC: | 3202 | case XFS_IFLUSH_ASYNC: |
| 3125 | case XFS_IFLUSH_DELWRI_ELSE_ASYNC: | 3203 | case XFS_IFLUSH_DELWRI_ELSE_ASYNC: |
| 3126 | flags = INT_ASYNC; | 3204 | flags = INT_ASYNC; |
| @@ -3140,6 +3218,7 @@ xfs_iflush( | |||
| 3140 | case XFS_IFLUSH_DELWRI: | 3218 | case XFS_IFLUSH_DELWRI: |
| 3141 | flags = INT_DELWRI; | 3219 | flags = INT_DELWRI; |
| 3142 | break; | 3220 | break; |
| 3221 | case XFS_IFLUSH_ASYNC_NOBLOCK: | ||
| 3143 | case XFS_IFLUSH_ASYNC: | 3222 | case XFS_IFLUSH_ASYNC: |
| 3144 | flags = INT_ASYNC; | 3223 | flags = INT_ASYNC; |
| 3145 | break; | 3224 | break; |
| @@ -3154,94 +3233,41 @@ xfs_iflush( | |||
| 3154 | } | 3233 | } |
| 3155 | 3234 | ||
| 3156 | /* | 3235 | /* |
| 3157 | * First flush out the inode that xfs_iflush was called with. | 3236 | * Get the buffer containing the on-disk inode. |
| 3158 | */ | 3237 | */ |
| 3159 | error = xfs_iflush_int(ip, bp); | 3238 | error = xfs_itobp(mp, NULL, ip, &dip, &bp, 0, 0, |
| 3160 | if (error) { | 3239 | noblock ? XFS_BUF_TRYLOCK : XFS_BUF_LOCK); |
| 3161 | goto corrupt_out; | 3240 | if (error || !bp) { |
| 3241 | xfs_ifunlock(ip); | ||
| 3242 | return error; | ||
| 3162 | } | 3243 | } |
| 3163 | 3244 | ||
| 3164 | /* | 3245 | /* |
| 3165 | * inode clustering: | 3246 | * First flush out the inode that xfs_iflush was called with. |
| 3166 | * see if other inodes can be gathered into this write | ||
| 3167 | */ | 3247 | */ |
| 3168 | spin_lock(&ip->i_cluster->icl_lock); | 3248 | error = xfs_iflush_int(ip, bp); |
| 3169 | ip->i_cluster->icl_buf = bp; | 3249 | if (error) |
| 3170 | 3250 | goto corrupt_out; | |
| 3171 | clcount = 0; | ||
| 3172 | hlist_for_each_entry(iq, entry, &ip->i_cluster->icl_inodes, i_cnode) { | ||
| 3173 | if (iq == ip) | ||
| 3174 | continue; | ||
| 3175 | |||
| 3176 | /* | ||
| 3177 | * Do an un-protected check to see if the inode is dirty and | ||
| 3178 | * is a candidate for flushing. These checks will be repeated | ||
| 3179 | * later after the appropriate locks are acquired. | ||
| 3180 | */ | ||
| 3181 | iip = iq->i_itemp; | ||
| 3182 | if ((iq->i_update_core == 0) && | ||
| 3183 | ((iip == NULL) || | ||
| 3184 | !(iip->ili_format.ilf_fields & XFS_ILOG_ALL)) && | ||
| 3185 | xfs_ipincount(iq) == 0) { | ||
| 3186 | continue; | ||
| 3187 | } | ||
| 3188 | |||
| 3189 | /* | ||
| 3190 | * Try to get locks. If any are unavailable, | ||
| 3191 | * then this inode cannot be flushed and is skipped. | ||
| 3192 | */ | ||
| 3193 | |||
| 3194 | /* get inode locks (just i_lock) */ | ||
| 3195 | if (xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) { | ||
| 3196 | /* get inode flush lock */ | ||
| 3197 | if (xfs_iflock_nowait(iq)) { | ||
| 3198 | /* check if pinned */ | ||
| 3199 | if (xfs_ipincount(iq) == 0) { | ||
| 3200 | /* arriving here means that | ||
| 3201 | * this inode can be flushed. | ||
| 3202 | * first re-check that it's | ||
| 3203 | * dirty | ||
| 3204 | */ | ||
| 3205 | iip = iq->i_itemp; | ||
| 3206 | if ((iq->i_update_core != 0)|| | ||
| 3207 | ((iip != NULL) && | ||
| 3208 | (iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { | ||
| 3209 | clcount++; | ||
| 3210 | error = xfs_iflush_int(iq, bp); | ||
| 3211 | if (error) { | ||
| 3212 | xfs_iunlock(iq, | ||
| 3213 | XFS_ILOCK_SHARED); | ||
| 3214 | goto cluster_corrupt_out; | ||
| 3215 | } | ||
| 3216 | } else { | ||
| 3217 | xfs_ifunlock(iq); | ||
| 3218 | } | ||
| 3219 | } else { | ||
| 3220 | xfs_ifunlock(iq); | ||
| 3221 | } | ||
| 3222 | } | ||
| 3223 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | ||
| 3224 | } | ||
| 3225 | } | ||
| 3226 | spin_unlock(&ip->i_cluster->icl_lock); | ||
| 3227 | |||
| 3228 | if (clcount) { | ||
| 3229 | XFS_STATS_INC(xs_icluster_flushcnt); | ||
| 3230 | XFS_STATS_ADD(xs_icluster_flushinode, clcount); | ||
| 3231 | } | ||
| 3232 | 3251 | ||
| 3233 | /* | 3252 | /* |
| 3234 | * If the buffer is pinned then push on the log so we won't | 3253 | * If the buffer is pinned then push on the log now so we won't |
| 3235 | * get stuck waiting in the write for too long. | 3254 | * get stuck waiting in the write for too long. |
| 3236 | */ | 3255 | */ |
| 3237 | if (XFS_BUF_ISPINNED(bp)){ | 3256 | if (XFS_BUF_ISPINNED(bp)) |
| 3238 | xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); | 3257 | xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); |
| 3239 | } | 3258 | |
| 3259 | /* | ||
| 3260 | * inode clustering: | ||
| 3261 | * see if other inodes can be gathered into this write | ||
| 3262 | */ | ||
| 3263 | error = xfs_iflush_cluster(ip, bp); | ||
| 3264 | if (error) | ||
| 3265 | goto cluster_corrupt_out; | ||
| 3240 | 3266 | ||
| 3241 | if (flags & INT_DELWRI) { | 3267 | if (flags & INT_DELWRI) { |
| 3242 | xfs_bdwrite(mp, bp); | 3268 | xfs_bdwrite(mp, bp); |
| 3243 | } else if (flags & INT_ASYNC) { | 3269 | } else if (flags & INT_ASYNC) { |
| 3244 | xfs_bawrite(mp, bp); | 3270 | error = xfs_bawrite(mp, bp); |
| 3245 | } else { | 3271 | } else { |
| 3246 | error = xfs_bwrite(mp, bp); | 3272 | error = xfs_bwrite(mp, bp); |
| 3247 | } | 3273 | } |
| @@ -3250,52 +3276,11 @@ xfs_iflush( | |||
| 3250 | corrupt_out: | 3276 | corrupt_out: |
| 3251 | xfs_buf_relse(bp); | 3277 | xfs_buf_relse(bp); |
| 3252 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 3278 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
| 3253 | xfs_iflush_abort(ip); | ||
| 3254 | /* | ||
| 3255 | * Unlocks the flush lock | ||
| 3256 | */ | ||
| 3257 | return XFS_ERROR(EFSCORRUPTED); | ||
| 3258 | |||
| 3259 | cluster_corrupt_out: | 3279 | cluster_corrupt_out: |
| 3260 | /* Corruption detected in the clustering loop. Invalidate the | ||
| 3261 | * inode buffer and shut down the filesystem. | ||
| 3262 | */ | ||
| 3263 | spin_unlock(&ip->i_cluster->icl_lock); | ||
| 3264 | |||
| 3265 | /* | ||
| 3266 | * Clean up the buffer. If it was B_DELWRI, just release it -- | ||
| 3267 | * brelse can handle it with no problems. If not, shut down the | ||
| 3268 | * filesystem before releasing the buffer. | ||
| 3269 | */ | ||
| 3270 | if ((bufwasdelwri= XFS_BUF_ISDELAYWRITE(bp))) { | ||
| 3271 | xfs_buf_relse(bp); | ||
| 3272 | } | ||
| 3273 | |||
| 3274 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
| 3275 | |||
| 3276 | if(!bufwasdelwri) { | ||
| 3277 | /* | ||
| 3278 | * Just like incore_relse: if we have b_iodone functions, | ||
| 3279 | * mark the buffer as an error and call them. Otherwise | ||
| 3280 | * mark it as stale and brelse. | ||
| 3281 | */ | ||
| 3282 | if (XFS_BUF_IODONE_FUNC(bp)) { | ||
| 3283 | XFS_BUF_CLR_BDSTRAT_FUNC(bp); | ||
| 3284 | XFS_BUF_UNDONE(bp); | ||
| 3285 | XFS_BUF_STALE(bp); | ||
| 3286 | XFS_BUF_SHUT(bp); | ||
| 3287 | XFS_BUF_ERROR(bp,EIO); | ||
| 3288 | xfs_biodone(bp); | ||
| 3289 | } else { | ||
| 3290 | XFS_BUF_STALE(bp); | ||
| 3291 | xfs_buf_relse(bp); | ||
| 3292 | } | ||
| 3293 | } | ||
| 3294 | |||
| 3295 | xfs_iflush_abort(iq); | ||
| 3296 | /* | 3280 | /* |
| 3297 | * Unlocks the flush lock | 3281 | * Unlocks the flush lock |
| 3298 | */ | 3282 | */ |
| 3283 | xfs_iflush_abort(ip); | ||
| 3299 | return XFS_ERROR(EFSCORRUPTED); | 3284 | return XFS_ERROR(EFSCORRUPTED); |
| 3300 | } | 3285 | } |
| 3301 | 3286 | ||
| @@ -3325,8 +3310,7 @@ xfs_iflush_int( | |||
| 3325 | * If the inode isn't dirty, then just release the inode | 3310 | * If the inode isn't dirty, then just release the inode |
| 3326 | * flush lock and do nothing. | 3311 | * flush lock and do nothing. |
| 3327 | */ | 3312 | */ |
| 3328 | if ((ip->i_update_core == 0) && | 3313 | if (xfs_inode_clean(ip)) { |
| 3329 | ((iip == NULL) || !(iip->ili_format.ilf_fields & XFS_ILOG_ALL))) { | ||
| 3330 | xfs_ifunlock(ip); | 3314 | xfs_ifunlock(ip); |
| 3331 | return 0; | 3315 | return 0; |
| 3332 | } | 3316 | } |
| @@ -3459,16 +3443,9 @@ xfs_iflush_int( | |||
| 3459 | } | 3443 | } |
| 3460 | } | 3444 | } |
| 3461 | 3445 | ||
| 3462 | if (xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp) == EFSCORRUPTED) { | 3446 | xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); |
| 3463 | goto corrupt_out; | 3447 | if (XFS_IFORK_Q(ip)) |
| 3464 | } | 3448 | xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); |
| 3465 | |||
| 3466 | if (XFS_IFORK_Q(ip)) { | ||
| 3467 | /* | ||
| 3468 | * The only error from xfs_iflush_fork is on the data fork. | ||
| 3469 | */ | ||
| 3470 | (void) xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); | ||
| 3471 | } | ||
| 3472 | xfs_inobp_check(mp, bp); | 3449 | xfs_inobp_check(mp, bp); |
| 3473 | 3450 | ||
| 3474 | /* | 3451 | /* |
