diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-01-18 17:08:07 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-01-18 17:08:07 -0500 |
| commit | 1e868d8e6d2c4b8736cdf7a4bd5701e4f527f722 (patch) | |
| tree | ce9ab0c1e5b4fd1610e8bf49a089a5cd5a474566 | |
| parent | 2faae42233778676607a2a45b95aeb375bebe2c6 (diff) | |
| parent | e09f98606dcc156de1146c209d45a0d6d5f51c3f (diff) | |
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs:
xfs: xfs_swap_extents needs to handle dynamic fork offsets
xfs: fix missing error check in xfs_rtfree_range
xfs: fix stale inode flush avoidance
xfs: Remove inode iolock held check during allocation
xfs: reclaim all inodes by background tree walks
xfs: Avoid inodes in reclaim when flushing from inode cache
xfs: reclaim inodes under a write lock
| -rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 14 | ||||
| -rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 183 | ||||
| -rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.h | 2 | ||||
| -rw-r--r-- | fs/xfs/quota/xfs_qm_syscalls.c | 2 | ||||
| -rw-r--r-- | fs/xfs/xfs_dfrag.c | 106 | ||||
| -rw-r--r-- | fs/xfs/xfs_iget.c | 1 | ||||
| -rw-r--r-- | fs/xfs/xfs_inode.c | 21 | ||||
| -rw-r--r-- | fs/xfs/xfs_rtalloc.c | 2 |
8 files changed, 201 insertions, 130 deletions
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 09783cc444ac..77414db10dc2 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
| @@ -954,16 +954,14 @@ xfs_fs_destroy_inode( | |||
| 954 | ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); | 954 | ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM)); |
| 955 | 955 | ||
| 956 | /* | 956 | /* |
| 957 | * If we have nothing to flush with this inode then complete the | 957 | * We always use background reclaim here because even if the |
| 958 | * teardown now, otherwise delay the flush operation. | 958 | * inode is clean, it still may be under IO and hence we have |
| 959 | * to take the flush lock. The background reclaim path handles | ||
| 960 | * this more efficiently than we can here, so simply let background | ||
| 961 | * reclaim tear down all inodes. | ||
| 959 | */ | 962 | */ |
| 960 | if (!xfs_inode_clean(ip)) { | ||
| 961 | xfs_inode_set_reclaim_tag(ip); | ||
| 962 | return; | ||
| 963 | } | ||
| 964 | |||
| 965 | out_reclaim: | 963 | out_reclaim: |
| 966 | xfs_ireclaim(ip); | 964 | xfs_inode_set_reclaim_tag(ip); |
| 967 | } | 965 | } |
| 968 | 966 | ||
| 969 | /* | 967 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 6fed97a8cd3e..1f5e4bb5e970 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
| @@ -65,7 +65,6 @@ xfs_inode_ag_lookup( | |||
| 65 | * as the tree is sparse and a gang lookup walks to find | 65 | * as the tree is sparse and a gang lookup walks to find |
| 66 | * the number of objects requested. | 66 | * the number of objects requested. |
| 67 | */ | 67 | */ |
| 68 | read_lock(&pag->pag_ici_lock); | ||
| 69 | if (tag == XFS_ICI_NO_TAG) { | 68 | if (tag == XFS_ICI_NO_TAG) { |
| 70 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, | 69 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, |
| 71 | (void **)&ip, *first_index, 1); | 70 | (void **)&ip, *first_index, 1); |
| @@ -74,7 +73,7 @@ xfs_inode_ag_lookup( | |||
| 74 | (void **)&ip, *first_index, 1, tag); | 73 | (void **)&ip, *first_index, 1, tag); |
| 75 | } | 74 | } |
| 76 | if (!nr_found) | 75 | if (!nr_found) |
| 77 | goto unlock; | 76 | return NULL; |
| 78 | 77 | ||
| 79 | /* | 78 | /* |
| 80 | * Update the index for the next lookup. Catch overflows | 79 | * Update the index for the next lookup. Catch overflows |
| @@ -84,13 +83,8 @@ xfs_inode_ag_lookup( | |||
| 84 | */ | 83 | */ |
| 85 | *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | 84 | *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); |
| 86 | if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | 85 | if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) |
| 87 | goto unlock; | 86 | return NULL; |
| 88 | |||
| 89 | return ip; | 87 | return ip; |
| 90 | |||
| 91 | unlock: | ||
| 92 | read_unlock(&pag->pag_ici_lock); | ||
| 93 | return NULL; | ||
| 94 | } | 88 | } |
| 95 | 89 | ||
| 96 | STATIC int | 90 | STATIC int |
| @@ -100,7 +94,8 @@ xfs_inode_ag_walk( | |||
| 100 | int (*execute)(struct xfs_inode *ip, | 94 | int (*execute)(struct xfs_inode *ip, |
| 101 | struct xfs_perag *pag, int flags), | 95 | struct xfs_perag *pag, int flags), |
| 102 | int flags, | 96 | int flags, |
| 103 | int tag) | 97 | int tag, |
| 98 | int exclusive) | ||
| 104 | { | 99 | { |
| 105 | struct xfs_perag *pag = &mp->m_perag[ag]; | 100 | struct xfs_perag *pag = &mp->m_perag[ag]; |
| 106 | uint32_t first_index; | 101 | uint32_t first_index; |
| @@ -114,10 +109,20 @@ restart: | |||
| 114 | int error = 0; | 109 | int error = 0; |
| 115 | xfs_inode_t *ip; | 110 | xfs_inode_t *ip; |
| 116 | 111 | ||
| 112 | if (exclusive) | ||
| 113 | write_lock(&pag->pag_ici_lock); | ||
| 114 | else | ||
| 115 | read_lock(&pag->pag_ici_lock); | ||
| 117 | ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); | 116 | ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); |
| 118 | if (!ip) | 117 | if (!ip) { |
| 118 | if (exclusive) | ||
| 119 | write_unlock(&pag->pag_ici_lock); | ||
| 120 | else | ||
| 121 | read_unlock(&pag->pag_ici_lock); | ||
| 119 | break; | 122 | break; |
| 123 | } | ||
| 120 | 124 | ||
| 125 | /* execute releases pag->pag_ici_lock */ | ||
| 121 | error = execute(ip, pag, flags); | 126 | error = execute(ip, pag, flags); |
| 122 | if (error == EAGAIN) { | 127 | if (error == EAGAIN) { |
| 123 | skipped++; | 128 | skipped++; |
| @@ -125,9 +130,8 @@ restart: | |||
| 125 | } | 130 | } |
| 126 | if (error) | 131 | if (error) |
| 127 | last_error = error; | 132 | last_error = error; |
| 128 | /* | 133 | |
| 129 | * bail out if the filesystem is corrupted. | 134 | /* bail out if the filesystem is corrupted. */ |
| 130 | */ | ||
| 131 | if (error == EFSCORRUPTED) | 135 | if (error == EFSCORRUPTED) |
| 132 | break; | 136 | break; |
| 133 | 137 | ||
| @@ -148,7 +152,8 @@ xfs_inode_ag_iterator( | |||
| 148 | int (*execute)(struct xfs_inode *ip, | 152 | int (*execute)(struct xfs_inode *ip, |
| 149 | struct xfs_perag *pag, int flags), | 153 | struct xfs_perag *pag, int flags), |
| 150 | int flags, | 154 | int flags, |
| 151 | int tag) | 155 | int tag, |
| 156 | int exclusive) | ||
| 152 | { | 157 | { |
| 153 | int error = 0; | 158 | int error = 0; |
| 154 | int last_error = 0; | 159 | int last_error = 0; |
| @@ -157,7 +162,8 @@ xfs_inode_ag_iterator( | |||
| 157 | for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { | 162 | for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) { |
| 158 | if (!mp->m_perag[ag].pag_ici_init) | 163 | if (!mp->m_perag[ag].pag_ici_init) |
| 159 | continue; | 164 | continue; |
| 160 | error = xfs_inode_ag_walk(mp, ag, execute, flags, tag); | 165 | error = xfs_inode_ag_walk(mp, ag, execute, flags, tag, |
| 166 | exclusive); | ||
| 161 | if (error) { | 167 | if (error) { |
| 162 | last_error = error; | 168 | last_error = error; |
| 163 | if (error == EFSCORRUPTED) | 169 | if (error == EFSCORRUPTED) |
| @@ -174,30 +180,31 @@ xfs_sync_inode_valid( | |||
| 174 | struct xfs_perag *pag) | 180 | struct xfs_perag *pag) |
| 175 | { | 181 | { |
| 176 | struct inode *inode = VFS_I(ip); | 182 | struct inode *inode = VFS_I(ip); |
| 183 | int error = EFSCORRUPTED; | ||
| 177 | 184 | ||
| 178 | /* nothing to sync during shutdown */ | 185 | /* nothing to sync during shutdown */ |
| 179 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { | 186 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
| 180 | read_unlock(&pag->pag_ici_lock); | 187 | goto out_unlock; |
| 181 | return EFSCORRUPTED; | ||
| 182 | } | ||
| 183 | 188 | ||
| 184 | /* | 189 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ |
| 185 | * If we can't get a reference on the inode, it must be in reclaim. | 190 | error = ENOENT; |
| 186 | * Leave it for the reclaim code to flush. Also avoid inodes that | 191 | if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) |
| 187 | * haven't been fully initialised. | 192 | goto out_unlock; |
| 188 | */ | ||
| 189 | if (!igrab(inode)) { | ||
| 190 | read_unlock(&pag->pag_ici_lock); | ||
| 191 | return ENOENT; | ||
| 192 | } | ||
| 193 | read_unlock(&pag->pag_ici_lock); | ||
| 194 | 193 | ||
| 195 | if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) { | 194 | /* If we can't grab the inode, it must on it's way to reclaim. */ |
| 195 | if (!igrab(inode)) | ||
| 196 | goto out_unlock; | ||
| 197 | |||
| 198 | if (is_bad_inode(inode)) { | ||
| 196 | IRELE(ip); | 199 | IRELE(ip); |
| 197 | return ENOENT; | 200 | goto out_unlock; |
| 198 | } | 201 | } |
| 199 | 202 | ||
| 200 | return 0; | 203 | /* inode is valid */ |
| 204 | error = 0; | ||
| 205 | out_unlock: | ||
| 206 | read_unlock(&pag->pag_ici_lock); | ||
| 207 | return error; | ||
| 201 | } | 208 | } |
| 202 | 209 | ||
| 203 | STATIC int | 210 | STATIC int |
| @@ -282,7 +289,7 @@ xfs_sync_data( | |||
| 282 | ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); | 289 | ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); |
| 283 | 290 | ||
| 284 | error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, | 291 | error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, |
| 285 | XFS_ICI_NO_TAG); | 292 | XFS_ICI_NO_TAG, 0); |
| 286 | if (error) | 293 | if (error) |
| 287 | return XFS_ERROR(error); | 294 | return XFS_ERROR(error); |
| 288 | 295 | ||
| @@ -304,7 +311,7 @@ xfs_sync_attr( | |||
| 304 | ASSERT((flags & ~SYNC_WAIT) == 0); | 311 | ASSERT((flags & ~SYNC_WAIT) == 0); |
| 305 | 312 | ||
| 306 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, | 313 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, |
| 307 | XFS_ICI_NO_TAG); | 314 | XFS_ICI_NO_TAG, 0); |
| 308 | } | 315 | } |
| 309 | 316 | ||
| 310 | STATIC int | 317 | STATIC int |
| @@ -664,60 +671,6 @@ xfs_syncd_stop( | |||
| 664 | kthread_stop(mp->m_sync_task); | 671 | kthread_stop(mp->m_sync_task); |
| 665 | } | 672 | } |
| 666 | 673 | ||
| 667 | STATIC int | ||
| 668 | xfs_reclaim_inode( | ||
| 669 | xfs_inode_t *ip, | ||
| 670 | int sync_mode) | ||
| 671 | { | ||
| 672 | xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); | ||
| 673 | |||
| 674 | /* The hash lock here protects a thread in xfs_iget_core from | ||
| 675 | * racing with us on linking the inode back with a vnode. | ||
| 676 | * Once we have the XFS_IRECLAIM flag set it will not touch | ||
| 677 | * us. | ||
| 678 | */ | ||
| 679 | write_lock(&pag->pag_ici_lock); | ||
| 680 | spin_lock(&ip->i_flags_lock); | ||
| 681 | if (__xfs_iflags_test(ip, XFS_IRECLAIM) || | ||
| 682 | !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { | ||
| 683 | spin_unlock(&ip->i_flags_lock); | ||
| 684 | write_unlock(&pag->pag_ici_lock); | ||
| 685 | return -EAGAIN; | ||
| 686 | } | ||
| 687 | __xfs_iflags_set(ip, XFS_IRECLAIM); | ||
| 688 | spin_unlock(&ip->i_flags_lock); | ||
| 689 | write_unlock(&pag->pag_ici_lock); | ||
| 690 | xfs_put_perag(ip->i_mount, pag); | ||
| 691 | |||
| 692 | /* | ||
| 693 | * If the inode is still dirty, then flush it out. If the inode | ||
| 694 | * is not in the AIL, then it will be OK to flush it delwri as | ||
| 695 | * long as xfs_iflush() does not keep any references to the inode. | ||
| 696 | * We leave that decision up to xfs_iflush() since it has the | ||
| 697 | * knowledge of whether it's OK to simply do a delwri flush of | ||
| 698 | * the inode or whether we need to wait until the inode is | ||
| 699 | * pulled from the AIL. | ||
| 700 | * We get the flush lock regardless, though, just to make sure | ||
| 701 | * we don't free it while it is being flushed. | ||
| 702 | */ | ||
| 703 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
| 704 | xfs_iflock(ip); | ||
| 705 | |||
| 706 | /* | ||
| 707 | * In the case of a forced shutdown we rely on xfs_iflush() to | ||
| 708 | * wait for the inode to be unpinned before returning an error. | ||
| 709 | */ | ||
| 710 | if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { | ||
| 711 | /* synchronize with xfs_iflush_done */ | ||
| 712 | xfs_iflock(ip); | ||
| 713 | xfs_ifunlock(ip); | ||
| 714 | } | ||
| 715 | |||
| 716 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 717 | xfs_ireclaim(ip); | ||
| 718 | return 0; | ||
| 719 | } | ||
| 720 | |||
| 721 | void | 674 | void |
| 722 | __xfs_inode_set_reclaim_tag( | 675 | __xfs_inode_set_reclaim_tag( |
| 723 | struct xfs_perag *pag, | 676 | struct xfs_perag *pag, |
| @@ -760,19 +713,55 @@ __xfs_inode_clear_reclaim_tag( | |||
| 760 | } | 713 | } |
| 761 | 714 | ||
| 762 | STATIC int | 715 | STATIC int |
| 763 | xfs_reclaim_inode_now( | 716 | xfs_reclaim_inode( |
| 764 | struct xfs_inode *ip, | 717 | struct xfs_inode *ip, |
| 765 | struct xfs_perag *pag, | 718 | struct xfs_perag *pag, |
| 766 | int flags) | 719 | int sync_mode) |
| 767 | { | 720 | { |
| 768 | /* ignore if already under reclaim */ | 721 | /* |
| 769 | if (xfs_iflags_test(ip, XFS_IRECLAIM)) { | 722 | * The radix tree lock here protects a thread in xfs_iget from racing |
| 770 | read_unlock(&pag->pag_ici_lock); | 723 | * with us starting reclaim on the inode. Once we have the |
| 724 | * XFS_IRECLAIM flag set it will not touch us. | ||
| 725 | */ | ||
| 726 | spin_lock(&ip->i_flags_lock); | ||
| 727 | ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | ||
| 728 | if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { | ||
| 729 | /* ignore as it is already under reclaim */ | ||
| 730 | spin_unlock(&ip->i_flags_lock); | ||
| 731 | write_unlock(&pag->pag_ici_lock); | ||
| 771 | return 0; | 732 | return 0; |
| 772 | } | 733 | } |
| 773 | read_unlock(&pag->pag_ici_lock); | 734 | __xfs_iflags_set(ip, XFS_IRECLAIM); |
| 735 | spin_unlock(&ip->i_flags_lock); | ||
| 736 | write_unlock(&pag->pag_ici_lock); | ||
| 774 | 737 | ||
| 775 | return xfs_reclaim_inode(ip, flags); | 738 | /* |
| 739 | * If the inode is still dirty, then flush it out. If the inode | ||
| 740 | * is not in the AIL, then it will be OK to flush it delwri as | ||
| 741 | * long as xfs_iflush() does not keep any references to the inode. | ||
| 742 | * We leave that decision up to xfs_iflush() since it has the | ||
| 743 | * knowledge of whether it's OK to simply do a delwri flush of | ||
| 744 | * the inode or whether we need to wait until the inode is | ||
| 745 | * pulled from the AIL. | ||
| 746 | * We get the flush lock regardless, though, just to make sure | ||
| 747 | * we don't free it while it is being flushed. | ||
| 748 | */ | ||
| 749 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
| 750 | xfs_iflock(ip); | ||
| 751 | |||
| 752 | /* | ||
| 753 | * In the case of a forced shutdown we rely on xfs_iflush() to | ||
| 754 | * wait for the inode to be unpinned before returning an error. | ||
| 755 | */ | ||
| 756 | if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { | ||
| 757 | /* synchronize with xfs_iflush_done */ | ||
| 758 | xfs_iflock(ip); | ||
| 759 | xfs_ifunlock(ip); | ||
| 760 | } | ||
| 761 | |||
| 762 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
| 763 | xfs_ireclaim(ip); | ||
| 764 | return 0; | ||
| 776 | } | 765 | } |
| 777 | 766 | ||
| 778 | int | 767 | int |
| @@ -780,6 +769,6 @@ xfs_reclaim_inodes( | |||
| 780 | xfs_mount_t *mp, | 769 | xfs_mount_t *mp, |
| 781 | int mode) | 770 | int mode) |
| 782 | { | 771 | { |
| 783 | return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode, | 772 | return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, |
| 784 | XFS_ICI_RECLAIM_TAG); | 773 | XFS_ICI_RECLAIM_TAG, 1); |
| 785 | } | 774 | } |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index a500b4d91835..ea932b43335d 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
| @@ -54,6 +54,6 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, | |||
| 54 | int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); | 54 | int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); |
| 55 | int xfs_inode_ag_iterator(struct xfs_mount *mp, | 55 | int xfs_inode_ag_iterator(struct xfs_mount *mp, |
| 56 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), | 56 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), |
| 57 | int flags, int tag); | 57 | int flags, int tag, int write_lock); |
| 58 | 58 | ||
| 59 | #endif | 59 | #endif |
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 71af76fe8a23..873e07e29074 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
| @@ -891,7 +891,7 @@ xfs_qm_dqrele_all_inodes( | |||
| 891 | uint flags) | 891 | uint flags) |
| 892 | { | 892 | { |
| 893 | ASSERT(mp->m_quotainfo); | 893 | ASSERT(mp->m_quotainfo); |
| 894 | xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG); | 894 | xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0); |
| 895 | } | 895 | } |
| 896 | 896 | ||
| 897 | /*------------------------------------------------------------------------*/ | 897 | /*------------------------------------------------------------------------*/ |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index d1483a4f71b8..84ca1cf16a1e 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
| @@ -114,10 +114,82 @@ xfs_swapext( | |||
| 114 | return error; | 114 | return error; |
| 115 | } | 115 | } |
| 116 | 116 | ||
| 117 | /* | ||
| 118 | * We need to check that the format of the data fork in the temporary inode is | ||
| 119 | * valid for the target inode before doing the swap. This is not a problem with | ||
| 120 | * attr1 because of the fixed fork offset, but attr2 has a dynamically sized | ||
| 121 | * data fork depending on the space the attribute fork is taking so we can get | ||
| 122 | * invalid formats on the target inode. | ||
| 123 | * | ||
| 124 | * E.g. target has space for 7 extents in extent format, temp inode only has | ||
| 125 | * space for 6. If we defragment down to 7 extents, then the tmp format is a | ||
| 126 | * btree, but when swapped it needs to be in extent format. Hence we can't just | ||
| 127 | * blindly swap data forks on attr2 filesystems. | ||
| 128 | * | ||
| 129 | * Note that we check the swap in both directions so that we don't end up with | ||
| 130 | * a corrupt temporary inode, either. | ||
| 131 | * | ||
| 132 | * Note that fixing the way xfs_fsr sets up the attribute fork in the source | ||
| 133 | * inode will prevent this situation from occurring, so all we do here is | ||
| 134 | * reject and log the attempt. basically we are putting the responsibility on | ||
| 135 | * userspace to get this right. | ||
| 136 | */ | ||
| 137 | static int | ||
| 138 | xfs_swap_extents_check_format( | ||
| 139 | xfs_inode_t *ip, /* target inode */ | ||
| 140 | xfs_inode_t *tip) /* tmp inode */ | ||
| 141 | { | ||
| 142 | |||
| 143 | /* Should never get a local format */ | ||
| 144 | if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || | ||
| 145 | tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) | ||
| 146 | return EINVAL; | ||
| 147 | |||
| 148 | /* | ||
| 149 | * if the target inode has less extents that then temporary inode then | ||
| 150 | * why did userspace call us? | ||
| 151 | */ | ||
| 152 | if (ip->i_d.di_nextents < tip->i_d.di_nextents) | ||
| 153 | return EINVAL; | ||
| 154 | |||
| 155 | /* | ||
| 156 | * if the target inode is in extent form and the temp inode is in btree | ||
| 157 | * form then we will end up with the target inode in the wrong format | ||
| 158 | * as we already know there are less extents in the temp inode. | ||
| 159 | */ | ||
| 160 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | ||
| 161 | tip->i_d.di_format == XFS_DINODE_FMT_BTREE) | ||
| 162 | return EINVAL; | ||
| 163 | |||
| 164 | /* Check temp in extent form to max in target */ | ||
| 165 | if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | ||
| 166 | XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max) | ||
| 167 | return EINVAL; | ||
| 168 | |||
| 169 | /* Check target in extent form to max in temp */ | ||
| 170 | if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && | ||
| 171 | XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max) | ||
| 172 | return EINVAL; | ||
| 173 | |||
| 174 | /* Check root block of temp in btree form to max in target */ | ||
| 175 | if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE && | ||
| 176 | XFS_IFORK_BOFF(ip) && | ||
| 177 | tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip)) | ||
| 178 | return EINVAL; | ||
| 179 | |||
| 180 | /* Check root block of target in btree form to max in temp */ | ||
| 181 | if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && | ||
| 182 | XFS_IFORK_BOFF(tip) && | ||
| 183 | ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip)) | ||
| 184 | return EINVAL; | ||
| 185 | |||
| 186 | return 0; | ||
| 187 | } | ||
| 188 | |||
| 117 | int | 189 | int |
| 118 | xfs_swap_extents( | 190 | xfs_swap_extents( |
| 119 | xfs_inode_t *ip, | 191 | xfs_inode_t *ip, /* target inode */ |
| 120 | xfs_inode_t *tip, | 192 | xfs_inode_t *tip, /* tmp inode */ |
| 121 | xfs_swapext_t *sxp) | 193 | xfs_swapext_t *sxp) |
| 122 | { | 194 | { |
| 123 | xfs_mount_t *mp; | 195 | xfs_mount_t *mp; |
| @@ -161,13 +233,6 @@ xfs_swap_extents( | |||
| 161 | goto out_unlock; | 233 | goto out_unlock; |
| 162 | } | 234 | } |
| 163 | 235 | ||
| 164 | /* Should never get a local format */ | ||
| 165 | if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL || | ||
| 166 | tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) { | ||
| 167 | error = XFS_ERROR(EINVAL); | ||
| 168 | goto out_unlock; | ||
| 169 | } | ||
| 170 | |||
| 171 | if (VN_CACHED(VFS_I(tip)) != 0) { | 236 | if (VN_CACHED(VFS_I(tip)) != 0) { |
| 172 | error = xfs_flushinval_pages(tip, 0, -1, | 237 | error = xfs_flushinval_pages(tip, 0, -1, |
| 173 | FI_REMAPF_LOCKED); | 238 | FI_REMAPF_LOCKED); |
| @@ -189,13 +254,12 @@ xfs_swap_extents( | |||
| 189 | goto out_unlock; | 254 | goto out_unlock; |
| 190 | } | 255 | } |
| 191 | 256 | ||
| 192 | /* | 257 | /* check inode formats now that data is flushed */ |
| 193 | * If the target has extended attributes, the tmp file | 258 | error = xfs_swap_extents_check_format(ip, tip); |
| 194 | * must also in order to ensure the correct data fork | 259 | if (error) { |
| 195 | * format. | 260 | xfs_fs_cmn_err(CE_NOTE, mp, |
| 196 | */ | 261 | "%s: inode 0x%llx format is incompatible for exchanging.", |
| 197 | if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) { | 262 | __FILE__, ip->i_ino); |
| 198 | error = XFS_ERROR(EINVAL); | ||
| 199 | goto out_unlock; | 263 | goto out_unlock; |
| 200 | } | 264 | } |
| 201 | 265 | ||
| @@ -276,6 +340,16 @@ xfs_swap_extents( | |||
| 276 | *tifp = *tempifp; /* struct copy */ | 340 | *tifp = *tempifp; /* struct copy */ |
| 277 | 341 | ||
| 278 | /* | 342 | /* |
| 343 | * Fix the in-memory data fork values that are dependent on the fork | ||
| 344 | * offset in the inode. We can't assume they remain the same as attr2 | ||
| 345 | * has dynamic fork offsets. | ||
| 346 | */ | ||
| 347 | ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) / | ||
| 348 | (uint)sizeof(xfs_bmbt_rec_t); | ||
| 349 | tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) / | ||
| 350 | (uint)sizeof(xfs_bmbt_rec_t); | ||
| 351 | |||
| 352 | /* | ||
| 279 | * Fix the on-disk inode values | 353 | * Fix the on-disk inode values |
| 280 | */ | 354 | */ |
| 281 | tmp = (__uint64_t)ip->i_d.di_nblocks; | 355 | tmp = (__uint64_t)ip->i_d.di_nblocks; |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index fa402a6bbbcf..155e798f30a1 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
| @@ -73,7 +73,6 @@ xfs_inode_alloc( | |||
| 73 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 73 | ASSERT(atomic_read(&ip->i_pincount) == 0); |
| 74 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); | 74 | ASSERT(!spin_is_locked(&ip->i_flags_lock)); |
| 75 | ASSERT(completion_done(&ip->i_flush)); | 75 | ASSERT(completion_done(&ip->i_flush)); |
| 76 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | ||
| 77 | 76 | ||
| 78 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); | 77 | mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); |
| 79 | 78 | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 391d36b0e68c..ef77fd88c8e3 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
| @@ -2842,13 +2842,9 @@ xfs_iflush( | |||
| 2842 | 2842 | ||
| 2843 | /* | 2843 | /* |
| 2844 | * If the inode isn't dirty, then just release the inode flush lock and | 2844 | * If the inode isn't dirty, then just release the inode flush lock and |
| 2845 | * do nothing. Treat stale inodes the same; we cannot rely on the | 2845 | * do nothing. |
| 2846 | * backing buffer remaining stale in cache for the remaining life of | ||
| 2847 | * the stale inode and so xfs_itobp() below may give us a buffer that | ||
| 2848 | * no longer contains inodes below. Doing this stale check here also | ||
| 2849 | * avoids forcing the log on pinned, stale inodes. | ||
| 2850 | */ | 2846 | */ |
| 2851 | if (xfs_inode_clean(ip) || xfs_iflags_test(ip, XFS_ISTALE)) { | 2847 | if (xfs_inode_clean(ip)) { |
| 2852 | xfs_ifunlock(ip); | 2848 | xfs_ifunlock(ip); |
| 2853 | return 0; | 2849 | return 0; |
| 2854 | } | 2850 | } |
| @@ -2872,6 +2868,19 @@ xfs_iflush( | |||
| 2872 | xfs_iunpin_wait(ip); | 2868 | xfs_iunpin_wait(ip); |
| 2873 | 2869 | ||
| 2874 | /* | 2870 | /* |
| 2871 | * For stale inodes we cannot rely on the backing buffer remaining | ||
| 2872 | * stale in cache for the remaining life of the stale inode and so | ||
| 2873 | * xfs_itobp() below may give us a buffer that no longer contains | ||
| 2874 | * inodes below. We have to check this after ensuring the inode is | ||
| 2875 | * unpinned so that it is safe to reclaim the stale inode after the | ||
| 2876 | * flush call. | ||
| 2877 | */ | ||
| 2878 | if (xfs_iflags_test(ip, XFS_ISTALE)) { | ||
| 2879 | xfs_ifunlock(ip); | ||
| 2880 | return 0; | ||
| 2881 | } | ||
| 2882 | |||
| 2883 | /* | ||
| 2875 | * This may have been unpinned because the filesystem is shutting | 2884 | * This may have been unpinned because the filesystem is shutting |
| 2876 | * down forcibly. If that's the case we must not write this inode | 2885 | * down forcibly. If that's the case we must not write this inode |
| 2877 | * to disk, because the log record didn't make it to disk! | 2886 | * to disk, because the log record didn't make it to disk! |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 9e15a1185362..6be05f756d59 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
| @@ -1517,6 +1517,8 @@ xfs_rtfree_range( | |||
| 1517 | */ | 1517 | */ |
| 1518 | error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, | 1518 | error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, |
| 1519 | &postblock); | 1519 | &postblock); |
| 1520 | if (error) | ||
| 1521 | return error; | ||
| 1520 | /* | 1522 | /* |
| 1521 | * If there are blocks not being freed at the front of the | 1523 | * If there are blocks not being freed at the front of the |
| 1522 | * old extent, add summary data for them to be allocated. | 1524 | * old extent, add summary data for them to be allocated. |
