diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-12-02 12:13:36 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-12-02 12:13:36 -0500 |
commit | 8cb280c90f9cfaab3ba3afbace0b1711dee80d0c (patch) | |
tree | b98d29b0159dd763afab1670d58019b6cb58cfa0 /fs | |
parent | 8fed709f343346a77888c2eef8f2d41bc637bef6 (diff) | |
parent | c76febef574fd86566bbdf1a73a547a439115c25 (diff) |
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs:
xfs: only run xfs_error_test if error injection is active
xfs: avoid moving stale inodes in the AIL
xfs: delayed alloc blocks beyond EOF are valid after writeback
xfs: push stale, pinned buffers on trylock failures
xfs: fix failed write truncation handling.
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 94 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_buf.c | 35 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap.c | 85 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap.h | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_dfrag.c | 13 | ||||
-rw-r--r-- | fs/xfs/xfs_error.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_error.h | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_inode_item.c | 31 |
8 files changed, 188 insertions, 83 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 7d287afccde5..691f61223ed6 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -934,7 +934,6 @@ xfs_aops_discard_page( | |||
934 | struct xfs_inode *ip = XFS_I(inode); | 934 | struct xfs_inode *ip = XFS_I(inode); |
935 | struct buffer_head *bh, *head; | 935 | struct buffer_head *bh, *head; |
936 | loff_t offset = page_offset(page); | 936 | loff_t offset = page_offset(page); |
937 | ssize_t len = 1 << inode->i_blkbits; | ||
938 | 937 | ||
939 | if (!xfs_is_delayed_page(page, IO_DELAY)) | 938 | if (!xfs_is_delayed_page(page, IO_DELAY)) |
940 | goto out_invalidate; | 939 | goto out_invalidate; |
@@ -949,58 +948,14 @@ xfs_aops_discard_page( | |||
949 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 948 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
950 | bh = head = page_buffers(page); | 949 | bh = head = page_buffers(page); |
951 | do { | 950 | do { |
952 | int done; | ||
953 | xfs_fileoff_t offset_fsb; | ||
954 | xfs_bmbt_irec_t imap; | ||
955 | int nimaps = 1; | ||
956 | int error; | 951 | int error; |
957 | xfs_fsblock_t firstblock; | 952 | xfs_fileoff_t start_fsb; |
958 | xfs_bmap_free_t flist; | ||
959 | 953 | ||
960 | if (!buffer_delay(bh)) | 954 | if (!buffer_delay(bh)) |
961 | goto next_buffer; | 955 | goto next_buffer; |
962 | 956 | ||
963 | offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); | 957 | start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); |
964 | 958 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); | |
965 | /* | ||
966 | * Map the range first and check that it is a delalloc extent | ||
967 | * before trying to unmap the range. Otherwise we will be | ||
968 | * trying to remove a real extent (which requires a | ||
969 | * transaction) or a hole, which is probably a bad idea... | ||
970 | */ | ||
971 | error = xfs_bmapi(NULL, ip, offset_fsb, 1, | ||
972 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, | ||
973 | &nimaps, NULL); | ||
974 | |||
975 | if (error) { | ||
976 | /* something screwed, just bail */ | ||
977 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
978 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
979 | "page discard failed delalloc mapping lookup."); | ||
980 | } | ||
981 | break; | ||
982 | } | ||
983 | if (!nimaps) { | ||
984 | /* nothing there */ | ||
985 | goto next_buffer; | ||
986 | } | ||
987 | if (imap.br_startblock != DELAYSTARTBLOCK) { | ||
988 | /* been converted, ignore */ | ||
989 | goto next_buffer; | ||
990 | } | ||
991 | WARN_ON(imap.br_blockcount == 0); | ||
992 | |||
993 | /* | ||
994 | * Note: while we initialise the firstblock/flist pair, they | ||
995 | * should never be used because blocks should never be | ||
996 | * allocated or freed for a delalloc extent and hence we need | ||
997 | * don't cancel or finish them after the xfs_bunmapi() call. | ||
998 | */ | ||
999 | xfs_bmap_init(&flist, &firstblock); | ||
1000 | error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock, | ||
1001 | &flist, &done); | ||
1002 | |||
1003 | ASSERT(!flist.xbf_count && !flist.xbf_first); | ||
1004 | if (error) { | 959 | if (error) { |
1005 | /* something screwed, just bail */ | 960 | /* something screwed, just bail */ |
1006 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 961 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { |
@@ -1010,7 +965,7 @@ xfs_aops_discard_page( | |||
1010 | break; | 965 | break; |
1011 | } | 966 | } |
1012 | next_buffer: | 967 | next_buffer: |
1013 | offset += len; | 968 | offset += 1 << inode->i_blkbits; |
1014 | 969 | ||
1015 | } while ((bh = bh->b_this_page) != head); | 970 | } while ((bh = bh->b_this_page) != head); |
1016 | 971 | ||
@@ -1505,11 +1460,42 @@ xfs_vm_write_failed( | |||
1505 | struct inode *inode = mapping->host; | 1460 | struct inode *inode = mapping->host; |
1506 | 1461 | ||
1507 | if (to > inode->i_size) { | 1462 | if (to > inode->i_size) { |
1508 | struct iattr ia = { | 1463 | /* |
1509 | .ia_valid = ATTR_SIZE | ATTR_FORCE, | 1464 | * punch out the delalloc blocks we have already allocated. We |
1510 | .ia_size = inode->i_size, | 1465 | * don't call xfs_setattr() to do this as we may be in the |
1511 | }; | 1466 | * middle of a multi-iovec write and so the vfs inode->i_size |
1512 | xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK); | 1467 | * will not match the xfs ip->i_size and so it will zero too |
1468 | * much. Hence we jus truncate the page cache to zero what is | ||
1469 | * necessary and punch the delalloc blocks directly. | ||
1470 | */ | ||
1471 | struct xfs_inode *ip = XFS_I(inode); | ||
1472 | xfs_fileoff_t start_fsb; | ||
1473 | xfs_fileoff_t end_fsb; | ||
1474 | int error; | ||
1475 | |||
1476 | truncate_pagecache(inode, to, inode->i_size); | ||
1477 | |||
1478 | /* | ||
1479 | * Check if there are any blocks that are outside of i_size | ||
1480 | * that need to be trimmed back. | ||
1481 | */ | ||
1482 | start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1; | ||
1483 | end_fsb = XFS_B_TO_FSB(ip->i_mount, to); | ||
1484 | if (end_fsb <= start_fsb) | ||
1485 | return; | ||
1486 | |||
1487 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1488 | error = xfs_bmap_punch_delalloc_range(ip, start_fsb, | ||
1489 | end_fsb - start_fsb); | ||
1490 | if (error) { | ||
1491 | /* something screwed, just bail */ | ||
1492 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
1493 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
1494 | "xfs_vm_write_failed: unable to clean up ino %lld", | ||
1495 | ip->i_ino); | ||
1496 | } | ||
1497 | } | ||
1498 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1513 | } | 1499 | } |
1514 | } | 1500 | } |
1515 | 1501 | ||
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index aa1d353def29..4c5deb6e9e31 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -488,29 +488,16 @@ found: | |||
488 | spin_unlock(&pag->pag_buf_lock); | 488 | spin_unlock(&pag->pag_buf_lock); |
489 | xfs_perag_put(pag); | 489 | xfs_perag_put(pag); |
490 | 490 | ||
491 | /* Attempt to get the semaphore without sleeping, | 491 | if (xfs_buf_cond_lock(bp)) { |
492 | * if this does not work then we need to drop the | 492 | /* failed, so wait for the lock if requested. */ |
493 | * spinlock and do a hard attempt on the semaphore. | ||
494 | */ | ||
495 | if (down_trylock(&bp->b_sema)) { | ||
496 | if (!(flags & XBF_TRYLOCK)) { | 493 | if (!(flags & XBF_TRYLOCK)) { |
497 | /* wait for buffer ownership */ | ||
498 | xfs_buf_lock(bp); | 494 | xfs_buf_lock(bp); |
499 | XFS_STATS_INC(xb_get_locked_waited); | 495 | XFS_STATS_INC(xb_get_locked_waited); |
500 | } else { | 496 | } else { |
501 | /* We asked for a trylock and failed, no need | ||
502 | * to look at file offset and length here, we | ||
503 | * know that this buffer at least overlaps our | ||
504 | * buffer and is locked, therefore our buffer | ||
505 | * either does not exist, or is this buffer. | ||
506 | */ | ||
507 | xfs_buf_rele(bp); | 497 | xfs_buf_rele(bp); |
508 | XFS_STATS_INC(xb_busy_locked); | 498 | XFS_STATS_INC(xb_busy_locked); |
509 | return NULL; | 499 | return NULL; |
510 | } | 500 | } |
511 | } else { | ||
512 | /* trylock worked */ | ||
513 | XB_SET_OWNER(bp); | ||
514 | } | 501 | } |
515 | 502 | ||
516 | if (bp->b_flags & XBF_STALE) { | 503 | if (bp->b_flags & XBF_STALE) { |
@@ -876,10 +863,18 @@ xfs_buf_rele( | |||
876 | */ | 863 | */ |
877 | 864 | ||
878 | /* | 865 | /* |
879 | * Locks a buffer object, if it is not already locked. | 866 | * Locks a buffer object, if it is not already locked. Note that this in |
880 | * Note that this in no way locks the underlying pages, so it is only | 867 | * no way locks the underlying pages, so it is only useful for |
881 | * useful for synchronizing concurrent use of buffer objects, not for | 868 | * synchronizing concurrent use of buffer objects, not for synchronizing |
882 | * synchronizing independent access to the underlying pages. | 869 | * independent access to the underlying pages. |
870 | * | ||
871 | * If we come across a stale, pinned, locked buffer, we know that we are | ||
872 | * being asked to lock a buffer that has been reallocated. Because it is | ||
873 | * pinned, we know that the log has not been pushed to disk and hence it | ||
874 | * will still be locked. Rather than continuing to have trylock attempts | ||
875 | * fail until someone else pushes the log, push it ourselves before | ||
876 | * returning. This means that the xfsaild will not get stuck trying | ||
877 | * to push on stale inode buffers. | ||
883 | */ | 878 | */ |
884 | int | 879 | int |
885 | xfs_buf_cond_lock( | 880 | xfs_buf_cond_lock( |
@@ -890,6 +885,8 @@ xfs_buf_cond_lock( | |||
890 | locked = down_trylock(&bp->b_sema) == 0; | 885 | locked = down_trylock(&bp->b_sema) == 0; |
891 | if (locked) | 886 | if (locked) |
892 | XB_SET_OWNER(bp); | 887 | XB_SET_OWNER(bp); |
888 | else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | ||
889 | xfs_log_force(bp->b_target->bt_mount, 0); | ||
893 | 890 | ||
894 | trace_xfs_buf_cond_lock(bp, _RET_IP_); | 891 | trace_xfs_buf_cond_lock(bp, _RET_IP_); |
895 | return locked ? 0 : -EBUSY; | 892 | return locked ? 0 : -EBUSY; |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 8abd12e32e13..4111cd3966c7 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -5471,8 +5471,13 @@ xfs_getbmap( | |||
5471 | if (error) | 5471 | if (error) |
5472 | goto out_unlock_iolock; | 5472 | goto out_unlock_iolock; |
5473 | } | 5473 | } |
5474 | 5474 | /* | |
5475 | ASSERT(ip->i_delayed_blks == 0); | 5475 | * even after flushing the inode, there can still be delalloc |
5476 | * blocks on the inode beyond EOF due to speculative | ||
5477 | * preallocation. These are not removed until the release | ||
5478 | * function is called or the inode is inactivated. Hence we | ||
5479 | * cannot assert here that ip->i_delayed_blks == 0. | ||
5480 | */ | ||
5476 | } | 5481 | } |
5477 | 5482 | ||
5478 | lock = xfs_ilock_map_shared(ip); | 5483 | lock = xfs_ilock_map_shared(ip); |
@@ -6070,3 +6075,79 @@ xfs_bmap_disk_count_leaves( | |||
6070 | *count += xfs_bmbt_disk_get_blockcount(frp); | 6075 | *count += xfs_bmbt_disk_get_blockcount(frp); |
6071 | } | 6076 | } |
6072 | } | 6077 | } |
6078 | |||
6079 | /* | ||
6080 | * dead simple method of punching delalyed allocation blocks from a range in | ||
6081 | * the inode. Walks a block at a time so will be slow, but is only executed in | ||
6082 | * rare error cases so the overhead is not critical. This will alays punch out | ||
6083 | * both the start and end blocks, even if the ranges only partially overlap | ||
6084 | * them, so it is up to the caller to ensure that partial blocks are not | ||
6085 | * passed in. | ||
6086 | */ | ||
6087 | int | ||
6088 | xfs_bmap_punch_delalloc_range( | ||
6089 | struct xfs_inode *ip, | ||
6090 | xfs_fileoff_t start_fsb, | ||
6091 | xfs_fileoff_t length) | ||
6092 | { | ||
6093 | xfs_fileoff_t remaining = length; | ||
6094 | int error = 0; | ||
6095 | |||
6096 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
6097 | |||
6098 | do { | ||
6099 | int done; | ||
6100 | xfs_bmbt_irec_t imap; | ||
6101 | int nimaps = 1; | ||
6102 | xfs_fsblock_t firstblock; | ||
6103 | xfs_bmap_free_t flist; | ||
6104 | |||
6105 | /* | ||
6106 | * Map the range first and check that it is a delalloc extent | ||
6107 | * before trying to unmap the range. Otherwise we will be | ||
6108 | * trying to remove a real extent (which requires a | ||
6109 | * transaction) or a hole, which is probably a bad idea... | ||
6110 | */ | ||
6111 | error = xfs_bmapi(NULL, ip, start_fsb, 1, | ||
6112 | XFS_BMAPI_ENTIRE, NULL, 0, &imap, | ||
6113 | &nimaps, NULL); | ||
6114 | |||
6115 | if (error) { | ||
6116 | /* something screwed, just bail */ | ||
6117 | if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { | ||
6118 | xfs_fs_cmn_err(CE_ALERT, ip->i_mount, | ||
6119 | "Failed delalloc mapping lookup ino %lld fsb %lld.", | ||
6120 | ip->i_ino, start_fsb); | ||
6121 | } | ||
6122 | break; | ||
6123 | } | ||
6124 | if (!nimaps) { | ||
6125 | /* nothing there */ | ||
6126 | goto next_block; | ||
6127 | } | ||
6128 | if (imap.br_startblock != DELAYSTARTBLOCK) { | ||
6129 | /* been converted, ignore */ | ||
6130 | goto next_block; | ||
6131 | } | ||
6132 | WARN_ON(imap.br_blockcount == 0); | ||
6133 | |||
6134 | /* | ||
6135 | * Note: while we initialise the firstblock/flist pair, they | ||
6136 | * should never be used because blocks should never be | ||
6137 | * allocated or freed for a delalloc extent and hence we need | ||
6138 | * don't cancel or finish them after the xfs_bunmapi() call. | ||
6139 | */ | ||
6140 | xfs_bmap_init(&flist, &firstblock); | ||
6141 | error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock, | ||
6142 | &flist, &done); | ||
6143 | if (error) | ||
6144 | break; | ||
6145 | |||
6146 | ASSERT(!flist.xbf_count && !flist.xbf_first); | ||
6147 | next_block: | ||
6148 | start_fsb++; | ||
6149 | remaining--; | ||
6150 | } while(remaining > 0); | ||
6151 | |||
6152 | return error; | ||
6153 | } | ||
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 71ec9b6ecdfc..3651191daea1 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
@@ -394,6 +394,11 @@ xfs_bmap_count_blocks( | |||
394 | int whichfork, | 394 | int whichfork, |
395 | int *count); | 395 | int *count); |
396 | 396 | ||
397 | int | ||
398 | xfs_bmap_punch_delalloc_range( | ||
399 | struct xfs_inode *ip, | ||
400 | xfs_fileoff_t start_fsb, | ||
401 | xfs_fileoff_t length); | ||
397 | #endif /* __KERNEL__ */ | 402 | #endif /* __KERNEL__ */ |
398 | 403 | ||
399 | #endif /* __XFS_BMAP_H__ */ | 404 | #endif /* __XFS_BMAP_H__ */ |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 3b9582c60a22..e60490bc00a6 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
@@ -377,6 +377,19 @@ xfs_swap_extents( | |||
377 | ip->i_d.di_format = tip->i_d.di_format; | 377 | ip->i_d.di_format = tip->i_d.di_format; |
378 | tip->i_d.di_format = tmp; | 378 | tip->i_d.di_format = tmp; |
379 | 379 | ||
380 | /* | ||
381 | * The extents in the source inode could still contain speculative | ||
382 | * preallocation beyond EOF (e.g. the file is open but not modified | ||
383 | * while defrag is in progress). In that case, we need to copy over the | ||
384 | * number of delalloc blocks the data fork in the source inode is | ||
385 | * tracking beyond EOF so that when the fork is truncated away when the | ||
386 | * temporary inode is unlinked we don't underrun the i_delayed_blks | ||
387 | * counter on that inode. | ||
388 | */ | ||
389 | ASSERT(tip->i_delayed_blks == 0); | ||
390 | tip->i_delayed_blks = ip->i_delayed_blks; | ||
391 | ip->i_delayed_blks = 0; | ||
392 | |||
380 | ilf_fields = XFS_ILOG_CORE; | 393 | ilf_fields = XFS_ILOG_CORE; |
381 | 394 | ||
382 | switch(ip->i_d.di_format) { | 395 | switch(ip->i_d.di_format) { |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index ed9990267661..c78cc6a3d87c 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -58,6 +58,7 @@ xfs_error_trap(int e) | |||
58 | int xfs_etest[XFS_NUM_INJECT_ERROR]; | 58 | int xfs_etest[XFS_NUM_INJECT_ERROR]; |
59 | int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; | 59 | int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; |
60 | char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; | 60 | char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; |
61 | int xfs_error_test_active; | ||
61 | 62 | ||
62 | int | 63 | int |
63 | xfs_error_test(int error_tag, int *fsidp, char *expression, | 64 | xfs_error_test(int error_tag, int *fsidp, char *expression, |
@@ -108,6 +109,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp) | |||
108 | len = strlen(mp->m_fsname); | 109 | len = strlen(mp->m_fsname); |
109 | xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP); | 110 | xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP); |
110 | strcpy(xfs_etest_fsname[i], mp->m_fsname); | 111 | strcpy(xfs_etest_fsname[i], mp->m_fsname); |
112 | xfs_error_test_active++; | ||
111 | return 0; | 113 | return 0; |
112 | } | 114 | } |
113 | } | 115 | } |
@@ -137,6 +139,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) | |||
137 | xfs_etest_fsid[i] = 0LL; | 139 | xfs_etest_fsid[i] = 0LL; |
138 | kmem_free(xfs_etest_fsname[i]); | 140 | kmem_free(xfs_etest_fsname[i]); |
139 | xfs_etest_fsname[i] = NULL; | 141 | xfs_etest_fsname[i] = NULL; |
142 | xfs_error_test_active--; | ||
140 | } | 143 | } |
141 | } | 144 | } |
142 | 145 | ||
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index c2c1a072bb82..f338847f80b8 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h | |||
@@ -127,13 +127,14 @@ extern void xfs_corruption_error(const char *tag, int level, | |||
127 | #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT | 127 | #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT |
128 | 128 | ||
129 | #ifdef DEBUG | 129 | #ifdef DEBUG |
130 | extern int xfs_error_test_active; | ||
130 | extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); | 131 | extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); |
131 | 132 | ||
132 | #define XFS_NUM_INJECT_ERROR 10 | 133 | #define XFS_NUM_INJECT_ERROR 10 |
133 | #define XFS_TEST_ERROR(expr, mp, tag, rf) \ | 134 | #define XFS_TEST_ERROR(expr, mp, tag, rf) \ |
134 | ((expr) || \ | 135 | ((expr) || (xfs_error_test_active && \ |
135 | xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ | 136 | xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ |
136 | (rf))) | 137 | (rf)))) |
137 | 138 | ||
138 | extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); | 139 | extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); |
139 | extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); | 140 | extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index c7ac020705df..7c8d30c453c3 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -657,18 +657,37 @@ xfs_inode_item_unlock( | |||
657 | } | 657 | } |
658 | 658 | ||
659 | /* | 659 | /* |
660 | * This is called to find out where the oldest active copy of the | 660 | * This is called to find out where the oldest active copy of the inode log |
661 | * inode log item in the on disk log resides now that the last log | 661 | * item in the on disk log resides now that the last log write of it completed |
662 | * write of it completed at the given lsn. Since we always re-log | 662 | * at the given lsn. Since we always re-log all dirty data in an inode, the |
663 | * all dirty data in an inode, the latest copy in the on disk log | 663 | * latest copy in the on disk log is the only one that matters. Therefore, |
664 | * is the only one that matters. Therefore, simply return the | 664 | * simply return the given lsn. |
665 | * given lsn. | 665 | * |
666 | * If the inode has been marked stale because the cluster is being freed, we | ||
667 | * don't want to (re-)insert this inode into the AIL. There is a race condition | ||
668 | * where the cluster buffer may be unpinned before the inode is inserted into | ||
669 | * the AIL during transaction committed processing. If the buffer is unpinned | ||
670 | * before the inode item has been committed and inserted, then it is possible | ||
671 | * for the buffer to be written and IO completions before the inode is inserted | ||
672 | * into the AIL. In that case, we'd be inserting a clean, stale inode into the | ||
673 | * AIL which will never get removed. It will, however, get reclaimed which | ||
674 | * triggers an assert in xfs_inode_free() complaining about freein an inode | ||
675 | * still in the AIL. | ||
676 | * | ||
677 | * To avoid this, return a lower LSN than the one passed in so that the | ||
678 | * transaction committed code will not move the inode forward in the AIL but | ||
679 | * will still unpin it properly. | ||
666 | */ | 680 | */ |
667 | STATIC xfs_lsn_t | 681 | STATIC xfs_lsn_t |
668 | xfs_inode_item_committed( | 682 | xfs_inode_item_committed( |
669 | struct xfs_log_item *lip, | 683 | struct xfs_log_item *lip, |
670 | xfs_lsn_t lsn) | 684 | xfs_lsn_t lsn) |
671 | { | 685 | { |
686 | struct xfs_inode_log_item *iip = INODE_ITEM(lip); | ||
687 | struct xfs_inode *ip = iip->ili_inode; | ||
688 | |||
689 | if (xfs_iflags_test(ip, XFS_ISTALE)) | ||
690 | return lsn - 1; | ||
672 | return lsn; | 691 | return lsn; |
673 | } | 692 | } |
674 | 693 | ||