diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-12-02 12:13:36 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-12-02 12:13:36 -0500 |
| commit | 8cb280c90f9cfaab3ba3afbace0b1711dee80d0c (patch) | |
| tree | b98d29b0159dd763afab1670d58019b6cb58cfa0 | |
| parent | 8fed709f343346a77888c2eef8f2d41bc637bef6 (diff) | |
| parent | c76febef574fd86566bbdf1a73a547a439115c25 (diff) | |
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs:
xfs: only run xfs_error_test if error injection is active
xfs: avoid moving stale inodes in the AIL
xfs: delayed alloc blocks beyond EOF are valid after writeback
xfs: push stale, pinned buffers on trylock failures
xfs: fix failed write truncation handling.
| -rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 94 | ||||
| -rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 35 | ||||
| -rw-r--r-- | fs/xfs/xfs_bmap.c | 85 | ||||
| -rw-r--r-- | fs/xfs/xfs_bmap.h | 5 | ||||
| -rw-r--r-- | fs/xfs/xfs_dfrag.c | 13 | ||||
| -rw-r--r-- | fs/xfs/xfs_error.c | 3 | ||||
| -rw-r--r-- | fs/xfs/xfs_error.h | 5 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode_item.c | 31 |
8 files changed, 188 insertions, 83 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 7d287afccde5..691f61223ed6 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
| @@ -934,7 +934,6 @@ xfs_aops_discard_page( | |||
| 934 | struct xfs_inode *ip = XFS_I(inode); | 934 | struct xfs_inode *ip = XFS_I(inode); |
| 935 | struct buffer_head *bh, *head; | 935 | struct buffer_head *bh, *head; |
| 936 | loff_t offset = page_offset(page); | 936 | loff_t offset = page_offset(page); |
| 937 | ssize_t len = 1 << inode->i_blkbits; | ||
| 938 | 937 | ||
| 939 | if (!xfs_is_delayed_page(page, IO_DELAY)) | 938 | if (!xfs_is_delayed_page(page, IO_DELAY)) |
| 940 | goto out_invalidate; | 939 | goto out_invalidate; |
| @@ -949,58 +948,14 @@ xfs_aops_discard_page( | |||
| 949 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 948 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
| 950 | bh = head = page_buffers(page); | 949 | bh = head = page_buffers(page); |
| 951 | do { | 950 | do { |
| 952 | int done; | ||
| 953 | xfs_fileoff_t offset_fsb; | ||
| 954 | xfs_bmbt_irec_t imap; | ||
| 955 | int nimaps = 1; | ||
| 956 | int error; | 951 | int error; |
| 957 | xfs_fsblock_t firstblock; | 952 | xfs_fileoff_t start_fsb; |
| 958 | xfs_bmap_free_t flist; | ||
| 959 | 953 | ||
| 960 | if (!buffer_delay(bh)) | 954 | if (!buffer_delay(bh)) |
| 961 | goto next_buffer; | 955 | goto next_buffer; |
| 962 | 956 | ||
| 963 | offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); | 957 | start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); |
| 964 | 958 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); | |
| 965 | /* | ||
| 966 | * Map the range first and check that it is a delalloc extent | ||
| 967 | * before trying to unmap the range. Otherwise we will be | ||
| 968 | * trying to remove a real extent (which requires a | ||
| 969 | * transaction) or a hole, which is probably a bad idea... | ||
| 970 | */ | ||
| 971 | error = xfs_bmapi(NULL, ip, offset_fsb, 1, | ||
| 972 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, | ||
| 973 | &nimaps, NULL); | ||
| 974 | |||
| 975 | if (error) { | ||
| 976 | /* something screwed, just bail */ | ||
| 977 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
| 978 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
| 979 | "page discard failed delalloc mapping lookup."); | ||
| 980 | } | ||
| 981 | break; | ||
| 982 | } | ||
| 983 | if (!nimaps) { | ||
| 984 | /* nothing there */ | ||
| 985 | goto next_buffer; | ||
| 986 | } | ||
| 987 | if (imap.br_startblock != DELAYSTARTBLOCK) { | ||
| 988 | /* been converted, ignore */ | ||
| 989 | goto next_buffer; | ||
| 990 | } | ||
| 991 | WARN_ON(imap.br_blockcount == 0); | ||
| 992 | |||
| 993 | /* | ||
| 994 | * Note: while we initialise the firstblock/flist pair, they | ||
| 995 | * should never be used because blocks should never be | ||
| 996 | * allocated or freed for a delalloc extent and hence we need | ||
| 997 | * don't cancel or finish them after the xfs_bunmapi() call. | ||
| 998 | */ | ||
| 999 | xfs_bmap_init(&flist, &firstblock); | ||
| 1000 | error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, | ||
| 1001 | &flist, &done); | ||
| 1002 | |||
| 1003 | ASSERT(!flist.xbf_count && !flist.xbf_first); | ||
| 1004 | if (error) { | 959 | if (error) { |
| 1005 | /* something screwed, just bail */ | 960 | /* something screwed, just bail */ |
| 1006 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 961 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
| @@ -1010,7 +965,7 @@ xfs_aops_discard_page( | |||
| 1010 | break; | 965 | break; |
| 1011 | } | 966 | } |
| 1012 | next_buffer: | 967 | next_buffer: |
| 1013 | offset += len; | 968 | offset += 1 << inode->i_blkbits; |
| 1014 | 969 | ||
| 1015 | } while ((bh = bh->b_this_page) != head); | 970 | } while ((bh = bh->b_this_page) != head); |
| 1016 | 971 | ||
| @@ -1505,11 +1460,42 @@ xfs_vm_write_failed( | |||
| 1505 | struct inode *inode = mapping->host; | 1460 | struct inode *inode = mapping->host; |
| 1506 | 1461 | ||
| 1507 | if (to > inode->i_size) { | 1462 | if (to > inode->i_size) { |
| 1508 | struct iattr ia = { | 1463 | /* |
| 1509 | .ia_valid = ATTR_SIZE | ATTR_FORCE, | 1464 | * punch out the delalloc blocks we have already allocated. We |
| 1510 | .ia_size = inode->i_size, | 1465 | * don't call xfs_setattr() to do this as we may be in the |
| 1511 | }; | 1466 | * middle of a multi-iovec write and so the vfs inode->i_size |
| 1512 | xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK); | 1467 | * will not match the xfs ip->i_size and so it will zero too |
| 1468 | * much. Hence we jus truncate the page cache to zero what is | ||
| 1469 | * necessary and punch the delalloc blocks directly. | ||
| 1470 | */ | ||
| 1471 | struct xfs_inode *ip = XFS_I(inode); | ||
| 1472 | xfs_fileoff_t start_fsb; | ||
| 1473 | xfs_fileoff_t end_fsb; | ||
| 1474 | int error; | ||
| 1475 | |||
| 1476 | truncate_pagecache(inode, to, inode->i_size); | ||
| 1477 | |||
| 1478 | /* | ||
| 1479 | * Check if there are any blocks that are outside of i_size | ||
| 1480 | * that need to be trimmed back. | ||
| 1481 | */ | ||
| 1482 | start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1; | ||
| 1483 | end_fsb = XFS_B_TO_FSB(ip->i_mount, to); | ||
| 1484 | if (end_fsb <= start_fsb) | ||
| 1485 | return; | ||
| 1486 | |||
| 1487 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
| 1488 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, | ||
| 1489 | end_fsb - start_fsb); | ||
| 1490 | if (error) { | ||
| 1491 | /* something screwed, just bail */ | ||
| 1492 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
| 1493 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
| 1494 | "xfs_vm_write_failed: unable to clean up ino %lld", | ||
| 1495 | ip->i_ino); | ||
| 1496 | } | ||
| 1497 | } | ||
| 1498 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 1513 | } | 1499 | } |
| 1514 | } | 1500 | } |
| 1515 | 1501 | ||
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index aa1d353def29..4c5deb6e9e31 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
| @@ -488,29 +488,16 @@ found: | |||
| 488 | spin_unlock(&pag->pag_buf_lock); | 488 | spin_unlock(&pag->pag_buf_lock); |
| 489 | xfs_perag_put(pag); | 489 | xfs_perag_put(pag); |
| 490 | 490 | ||
| 491 | /* Attempt to get the semaphore without sleeping, | 491 | if (xfs_buf_cond_lock(bp)) { |
| 492 | * if this does not work then we need to drop the | 492 | /* failed, so wait for the lock if requested. */ |
| 493 | * spinlock and do a hard attempt on the semaphore. | ||
| 494 | */ | ||
| 495 | if (down_trylock(&bp->b_sema)) { | ||
| 496 | if (!(flags & XBF_TRYLOCK)) { | 493 | if (!(flags & XBF_TRYLOCK)) { |
| 497 | /* wait for buffer ownership */ | ||
| 498 | xfs_buf_lock(bp); | 494 | xfs_buf_lock(bp); |
| 499 | XFS_STATS_INC(xb_get_locked_waited); | 495 | XFS_STATS_INC(xb_get_locked_waited); |
| 500 | } else { | 496 | } else { |
| 501 | /* We asked for a trylock and failed, no need | ||
| 502 | * to look at file offset and length here, we | ||
| 503 | * know that this buffer at least overlaps our | ||
| 504 | * buffer and is locked, therefore our buffer | ||
| 505 | * either does not exist, or is this buffer. | ||
| 506 | */ | ||
| 507 | xfs_buf_rele(bp); | 497 | xfs_buf_rele(bp); |
| 508 | XFS_STATS_INC(xb_busy_locked); | 498 | XFS_STATS_INC(xb_busy_locked); |
| 509 | return NULL; | 499 | return NULL; |
| 510 | } | 500 | } |
| 511 | } else { | ||
| 512 | /* trylock worked */ | ||
| 513 | XB_SET_OWNER(bp); | ||
| 514 | } | 501 | } |
| 515 | 502 | ||
| 516 | if (bp->b_flags & XBF_STALE) { | 503 | if (bp->b_flags & XBF_STALE) { |
| @@ -876,10 +863,18 @@ xfs_buf_rele( | |||
| 876 | */ | 863 | */ |
| 877 | 864 | ||
| 878 | /* | 865 | /* |
| 879 | * Locks a buffer object, if it is not already locked. | 866 | * Locks a buffer object, if it is not already locked. Note that this in |
| 880 | * Note that this in no way locks the underlying pages, so it is only | 867 | * no way locks the underlying pages, so it is only useful for |
| 881 | * useful for synchronizing concurrent use of buffer objects, not for | 868 | * synchronizing concurrent use of buffer objects, not for synchronizing |
| 882 | * synchronizing independent access to the underlying pages. | 869 | * independent access to the underlying pages. |
| 870 | * | ||
| 871 | * If we come across a stale, pinned, locked buffer, we know that we are | ||
| 872 | * being asked to lock a buffer that has been reallocated. Because it is | ||
| 873 | * pinned, we know that the log has not been pushed to disk and hence it | ||
| 874 | * will still be locked. Rather than continuing to have trylock attempts | ||
| 875 | * fail until someone else pushes the log, push it ourselves before | ||
| 876 | * returning. This means that the xfsaild will not get stuck trying | ||
| 877 | * to push on stale inode buffers. | ||
| 883 | */ | 878 | */ |
| 884 | int | 879 | int |
| 885 | xfs_buf_cond_lock( | 880 | xfs_buf_cond_lock( |
| @@ -890,6 +885,8 @@ xfs_buf_cond_lock( | |||
| 890 | locked = down_trylock(&bp->b_sema) == 0; | 885 | locked = down_trylock(&bp->b_sema) == 0; |
| 891 | if (locked) | 886 | if (locked) |
| 892 | XB_SET_OWNER(bp); | 887 | XB_SET_OWNER(bp); |
| 888 | else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | ||
| 889 | xfs_log_force(bp->b_target->bt_mount, 0); | ||
| 893 | 890 | ||
| 894 | trace_xfs_buf_cond_lock(bp, _RET_IP_); | 891 | trace_xfs_buf_cond_lock(bp, _RET_IP_); |
| 895 | return locked ? 0 : -EBUSY; | 892 | return locked ? 0 : -EBUSY; |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 8abd12e32e13..4111cd3966c7 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
| @@ -5471,8 +5471,13 @@ xfs_getbmap( | |||
| 5471 | if (error) | 5471 | if (error) |
| 5472 | goto out_unlock_iolock; | 5472 | goto out_unlock_iolock; |
| 5473 | } | 5473 | } |
| 5474 | 5474 | /* | |
| 5475 | ASSERT(ip->i_delayed_blks == 0); | 5475 | * even after flushing the inode, there can still be delalloc |
| 5476 | * blocks on the inode beyond EOF due to speculative | ||
| 5477 | * preallocation. These are not removed until the release | ||
| 5478 | * function is called or the inode is inactivated. Hence we | ||
| 5479 | * cannot assert here that ip->i_delayed_blks == 0. | ||
| 5480 | */ | ||
| 5476 | } | 5481 | } |
| 5477 | 5482 | ||
| 5478 | lock = xfs_ilock_map_shared(ip); | 5483 | lock = xfs_ilock_map_shared(ip); |
| @@ -6070,3 +6075,79 @@ xfs_bmap_disk_count_leaves( | |||
| 6070 | *count += xfs_bmbt_disk_get_blockcount(frp); | 6075 | *count += xfs_bmbt_disk_get_blockcount(frp); |
| 6071 | } | 6076 | } |
| 6072 | } | 6077 | } |
| 6078 | |||
| 6079 | /* | ||
| 6080 | * dead simple method of punching delalyed allocation blocks from a range in | ||
| 6081 | * the inode. Walks a block at a time so will be slow, but is only executed in | ||
| 6082 | * rare error cases so the overhead is not critical. This will alays punch out | ||
| 6083 | * both the start and end blocks, even if the ranges only partially overlap | ||
| 6084 | * them, so it is up to the caller to ensure that partial blocks are not | ||
| 6085 | * passed in. | ||
| 6086 | */ | ||
| 6087 | int | ||
| 6088 | xfs_bmap_punch_delalloc_range( | ||
| 6089 | struct xfs_inode *ip, | ||
| 6090 | xfs_fileoff_t start_fsb, | ||
| 6091 | xfs_fileoff_t length) | ||
| 6092 | { | ||
| 6093 | xfs_fileoff_t remaining = length; | ||
| 6094 | int error = 0; | ||
| 6095 | |||
| 6096 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
| 6097 | |||
| 6098 | do { | ||
| 6099 | int done; | ||
| 6100 | xfs_bmbt_irec_t imap; | ||
| 6101 | int nimaps = 1; | ||
| 6102 | xfs_fsblock_t firstblock; | ||
| 6103 | xfs_bmap_free_t flist; | ||
| 6104 | |||
| 6105 | /* | ||
| 6106 | * Map the range first and check that it is a delalloc extent | ||
| 6107 | * before trying to unmap the range. Otherwise we will be | ||
| 6108 | * trying to remove a real extent (which requires a | ||
| 6109 | * transaction) or a hole, which is probably a bad idea... | ||
| 6110 | */ | ||
| 6111 | error = xfs_bmapi(NULL, ip, start_fsb, 1, | ||
| 6112 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, | ||
| 6113 | &nimaps, NULL); | ||
| 6114 | |||
| 6115 | if (error) { | ||
| 6116 | /* something screwed, just bail */ | ||
| 6117 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
| 6118 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
| 6119 | "Failed delalloc mapping lookup ino %lld fsb %lld.", | ||
| 6120 | ip->i_ino, start_fsb); | ||
| 6121 | } | ||
| 6122 | break; | ||
| 6123 | } | ||
| 6124 | if (!nimaps) { | ||
| 6125 | /* nothing there */ | ||
| 6126 | goto next_block; | ||
| 6127 | } | ||
| 6128 | if (imap.br_startblock != DELAYSTARTBLOCK) { | ||
| 6129 | /* been converted, ignore */ | ||
| 6130 | goto next_block; | ||
| 6131 | } | ||
| 6132 | WARN_ON(imap.br_blockcount == 0); | ||
| 6133 | |||
| 6134 | /* | ||
| 6135 | * Note: while we initialise the firstblock/flist pair, they | ||
| 6136 | * should never be used because blocks should never be | ||
| 6137 | * allocated or freed for a delalloc extent and hence we need | ||
| 6138 | * don't cancel or finish them after the xfs_bunmapi() call. | ||
| 6139 | */ | ||
| 6140 | xfs_bmap_init(&flist, &firstblock); | ||
| 6141 | error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, | ||
| 6142 | &flist, &done); | ||
| 6143 | if (error) | ||
| 6144 | break; | ||
| 6145 | |||
| 6146 | ASSERT(!flist.xbf_count && !flist.xbf_first); | ||
| 6147 | next_block: | ||
| 6148 | start_fsb++; | ||
| 6149 | remaining--; | ||
| 6150 | } while(remaining > 0); | ||
| 6151 | |||
| 6152 | return error; | ||
| 6153 | } | ||
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 71ec9b6ecdfc..3651191daea1 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
| @@ -394,6 +394,11 @@ xfs_bmap_count_blocks( | |||
| 394 | int whichfork, | 394 | int whichfork, |
| 395 | int *count); | 395 | int *count); |
| 396 | 396 | ||
| 397 | int | ||
| 398 | xfs_bmap_punch_delalloc_range( | ||
| 399 | struct xfs_inode *ip, | ||
| 400 | xfs_fileoff_t start_fsb, | ||
| 401 | xfs_fileoff_t length); | ||
| 397 | #endif /* __KERNEL__ */ | 402 | #endif /* __KERNEL__ */ |
| 398 | 403 | ||
| 399 | #endif /* __XFS_BMAP_H__ */ | 404 | #endif /* __XFS_BMAP_H__ */ |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 3b9582c60a22..e60490bc00a6 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
| @@ -377,6 +377,19 @@ xfs_swap_extents( | |||
| 377 | ip->i_d.di_format = tip->i_d.di_format; | 377 | ip->i_d.di_format = tip->i_d.di_format; |
| 378 | tip->i_d.di_format = tmp; | 378 | tip->i_d.di_format = tmp; |
| 379 | 379 | ||
| 380 | /* | ||
| 381 | * The extents in the source inode could still contain speculative | ||
| 382 | * preallocation beyond EOF (e.g. the file is open but not modified | ||
| 383 | * while defrag is in progress). In that case, we need to copy over the | ||
| 384 | * number of delalloc blocks the data fork in the source inode is | ||
| 385 | * tracking beyond EOF so that when the fork is truncated away when the | ||
| 386 | * temporary inode is unlinked we don't underrun the i_delayed_blks | ||
| 387 | * counter on that inode. | ||
| 388 | */ | ||
| 389 | ASSERT(tip->i_delayed_blks == 0); | ||
| 390 | tip->i_delayed_blks = ip->i_delayed_blks; | ||
| 391 | ip->i_delayed_blks = 0; | ||
| 392 | |||
| 380 | ilf_fields = XFS_ILOG_CORE; | 393 | ilf_fields = XFS_ILOG_CORE; |
| 381 | 394 | ||
| 382 | switch(ip->i_d.di_format) { | 395 | switch(ip->i_d.di_format) { |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index ed9990267661..c78cc6a3d87c 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
| @@ -58,6 +58,7 @@ xfs_error_trap(int e) | |||
| 58 | int xfs_etest[XFS_NUM_INJECT_ERROR]; | 58 | int xfs_etest[XFS_NUM_INJECT_ERROR]; |
| 59 | int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; | 59 | int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; |
| 60 | char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; | 60 | char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; |
| 61 | int xfs_error_test_active; | ||
| 61 | 62 | ||
| 62 | int | 63 | int |
| 63 | xfs_error_test(int error_tag, int *fsidp, char *expression, | 64 | xfs_error_test(int error_tag, int *fsidp, char *expression, |
| @@ -108,6 +109,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp) | |||
| 108 | len = strlen(mp->m_fsname); | 109 | len = strlen(mp->m_fsname); |
| 109 | xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP); | 110 | xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP); |
| 110 | strcpy(xfs_etest_fsname[i], mp->m_fsname); | 111 | strcpy(xfs_etest_fsname[i], mp->m_fsname); |
| 112 | xfs_error_test_active++; | ||
| 111 | return 0; | 113 | return 0; |
| 112 | } | 114 | } |
| 113 | } | 115 | } |
| @@ -137,6 +139,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) | |||
| 137 | xfs_etest_fsid[i] = 0LL; | 139 | xfs_etest_fsid[i] = 0LL; |
| 138 | kmem_free(xfs_etest_fsname[i]); | 140 | kmem_free(xfs_etest_fsname[i]); |
| 139 | xfs_etest_fsname[i] = NULL; | 141 | xfs_etest_fsname[i] = NULL; |
| 142 | xfs_error_test_active--; | ||
| 140 | } | 143 | } |
| 141 | } | 144 | } |
| 142 | 145 | ||
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index c2c1a072bb82..f338847f80b8 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h | |||
| @@ -127,13 +127,14 @@ extern void xfs_corruption_error(const char *tag, int level, | |||
| 127 | #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT | 127 | #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT |
| 128 | 128 | ||
| 129 | #ifdef DEBUG | 129 | #ifdef DEBUG |
| 130 | extern int xfs_error_test_active; | ||
| 130 | extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); | 131 | extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); |
| 131 | 132 | ||
| 132 | #define XFS_NUM_INJECT_ERROR 10 | 133 | #define XFS_NUM_INJECT_ERROR 10 |
| 133 | #define XFS_TEST_ERROR(expr, mp, tag, rf) \ | 134 | #define XFS_TEST_ERROR(expr, mp, tag, rf) \ |
| 134 | ((expr) || \ | 135 | ((expr) || (xfs_error_test_active && \ |
| 135 | xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ | 136 | xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ |
| 136 | (rf))) | 137 | (rf)))) |
| 137 | 138 | ||
| 138 | extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); | 139 | extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); |
| 139 | extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); | 140 | extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index c7ac020705df..7c8d30c453c3 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
| @@ -657,18 +657,37 @@ xfs_inode_item_unlock( | |||
| 657 | } | 657 | } |
| 658 | 658 | ||
| 659 | /* | 659 | /* |
| 660 | * This is called to find out where the oldest active copy of the | 660 | * This is called to find out where the oldest active copy of the inode log |
| 661 | * inode log item in the on disk log resides now that the last log | 661 | * item in the on disk log resides now that the last log write of it completed |
| 662 | * write of it completed at the given lsn. Since we always re-log | 662 | * at the given lsn. Since we always re-log all dirty data in an inode, the |
| 663 | * all dirty data in an inode, the latest copy in the on disk log | 663 | * latest copy in the on disk log is the only one that matters. Therefore, |
| 664 | * is the only one that matters. Therefore, simply return the | 664 | * simply return the given lsn. |
| 665 | * given lsn. | 665 | * |
| 666 | * If the inode has been marked stale because the cluster is being freed, we | ||
| 667 | * don't want to (re-)insert this inode into the AIL. There is a race condition | ||
| 668 | * where the cluster buffer may be unpinned before the inode is inserted into | ||
| 669 | * the AIL during transaction committed processing. If the buffer is unpinned | ||
| 670 | * before the inode item has been committed and inserted, then it is possible | ||
| 671 | * for the buffer to be written and IO completions before the inode is inserted | ||
| 672 | * into the AIL. In that case, we'd be inserting a clean, stale inode into the | ||
| 673 | * AIL which will never get removed. It will, however, get reclaimed which | ||
| 674 | * triggers an assert in xfs_inode_free() complaining about freein an inode | ||
| 675 | * still in the AIL. | ||
| 676 | * | ||
| 677 | * To avoid this, return a lower LSN than the one passed in so that the | ||
| 678 | * transaction committed code will not move the inode forward in the AIL but | ||
| 679 | * will still unpin it properly. | ||
| 666 | */ | 680 | */ |
| 667 | STATIC xfs_lsn_t | 681 | STATIC xfs_lsn_t |
| 668 | xfs_inode_item_committed( | 682 | xfs_inode_item_committed( |
| 669 | struct xfs_log_item *lip, | 683 | struct xfs_log_item *lip, |
| 670 | xfs_lsn_t lsn) | 684 | xfs_lsn_t lsn) |
| 671 | { | 685 | { |
| 686 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | ||
| 687 | struct xfs_inode *ip = iip->ili_inode; | ||
| 688 | |||
| 689 | if (xfs_iflags_test(ip, XFS_ISTALE)) | ||
| 690 | return lsn - 1; | ||
| 672 | return lsn; | 691 | return lsn; |
| 673 | } | 692 | } |
| 674 | 693 | ||
