diff options
author | Dave Chinner <david@fromorbit.com> | 2016-05-19 20:34:00 -0400 |
---|---|---|
committer | Dave Chinner <david@fromorbit.com> | 2016-05-19 20:34:00 -0400 |
commit | 555b67e4e729ca544bb4028ab12e532c68b70ddb (patch) | |
tree | 8bfb59ccca39cebe0210366bebfeddd8bd3ab3a3 | |
parent | 544ad71fc8e20fb3a6f50f00d487751492cd8409 (diff) | |
parent | ad438c4038968e5ca5248f851212634e474983e8 (diff) |
Merge branch 'xfs-4.7-inode-reclaim' into for-next
-rw-r--r-- | fs/xfs/libxfs/xfs_inode_fork.c | 27 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 290 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 104 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 28 |
4 files changed, 250 insertions, 199 deletions
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index d3d1477bfb9e..bbcc8c7a44b3 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c | |||
@@ -1519,6 +1519,24 @@ xfs_iext_indirect_to_direct( | |||
1519 | } | 1519 | } |
1520 | 1520 | ||
1521 | /* | 1521 | /* |
1522 | * Remove all records from the indirection array. | ||
1523 | */ | ||
1524 | STATIC void | ||
1525 | xfs_iext_irec_remove_all( | ||
1526 | struct xfs_ifork *ifp) | ||
1527 | { | ||
1528 | int nlists; | ||
1529 | int i; | ||
1530 | |||
1531 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
1532 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
1533 | for (i = 0; i < nlists; i++) | ||
1534 | kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf); | ||
1535 | kmem_free(ifp->if_u1.if_ext_irec); | ||
1536 | ifp->if_flags &= ~XFS_IFEXTIREC; | ||
1537 | } | ||
1538 | |||
1539 | /* | ||
1522 | * Free incore file extents. | 1540 | * Free incore file extents. |
1523 | */ | 1541 | */ |
1524 | void | 1542 | void |
@@ -1526,14 +1544,7 @@ xfs_iext_destroy( | |||
1526 | xfs_ifork_t *ifp) /* inode fork pointer */ | 1544 | xfs_ifork_t *ifp) /* inode fork pointer */ |
1527 | { | 1545 | { |
1528 | if (ifp->if_flags & XFS_IFEXTIREC) { | 1546 | if (ifp->if_flags & XFS_IFEXTIREC) { |
1529 | int erp_idx; | 1547 | xfs_iext_irec_remove_all(ifp); |
1530 | int nlists; | ||
1531 | |||
1532 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
1533 | for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) { | ||
1534 | xfs_iext_irec_remove(ifp, erp_idx); | ||
1535 | } | ||
1536 | ifp->if_flags &= ~XFS_IFEXTIREC; | ||
1537 | } else if (ifp->if_real_bytes) { | 1548 | } else if (ifp->if_real_bytes) { |
1538 | kmem_free(ifp->if_u1.if_extents); | 1549 | kmem_free(ifp->if_u1.if_extents); |
1539 | } else if (ifp->if_bytes) { | 1550 | } else if (ifp->if_bytes) { |
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index bf2d60749278..99ee6eee5e0b 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c | |||
@@ -37,9 +37,6 @@ | |||
37 | #include <linux/kthread.h> | 37 | #include <linux/kthread.h> |
38 | #include <linux/freezer.h> | 38 | #include <linux/freezer.h> |
39 | 39 | ||
40 | STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, | ||
41 | struct xfs_perag *pag, struct xfs_inode *ip); | ||
42 | |||
43 | /* | 40 | /* |
44 | * Allocate and initialise an xfs_inode. | 41 | * Allocate and initialise an xfs_inode. |
45 | */ | 42 | */ |
@@ -94,13 +91,6 @@ xfs_inode_free_callback( | |||
94 | struct inode *inode = container_of(head, struct inode, i_rcu); | 91 | struct inode *inode = container_of(head, struct inode, i_rcu); |
95 | struct xfs_inode *ip = XFS_I(inode); | 92 | struct xfs_inode *ip = XFS_I(inode); |
96 | 93 | ||
97 | kmem_zone_free(xfs_inode_zone, ip); | ||
98 | } | ||
99 | |||
100 | void | ||
101 | xfs_inode_free( | ||
102 | struct xfs_inode *ip) | ||
103 | { | ||
104 | switch (VFS_I(ip)->i_mode & S_IFMT) { | 94 | switch (VFS_I(ip)->i_mode & S_IFMT) { |
105 | case S_IFREG: | 95 | case S_IFREG: |
106 | case S_IFDIR: | 96 | case S_IFDIR: |
@@ -118,6 +108,25 @@ xfs_inode_free( | |||
118 | ip->i_itemp = NULL; | 108 | ip->i_itemp = NULL; |
119 | } | 109 | } |
120 | 110 | ||
111 | kmem_zone_free(xfs_inode_zone, ip); | ||
112 | } | ||
113 | |||
114 | static void | ||
115 | __xfs_inode_free( | ||
116 | struct xfs_inode *ip) | ||
117 | { | ||
118 | /* asserts to verify all state is correct here */ | ||
119 | ASSERT(atomic_read(&ip->i_pincount) == 0); | ||
120 | ASSERT(!xfs_isiflocked(ip)); | ||
121 | XFS_STATS_DEC(ip->i_mount, vn_active); | ||
122 | |||
123 | call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); | ||
124 | } | ||
125 | |||
126 | void | ||
127 | xfs_inode_free( | ||
128 | struct xfs_inode *ip) | ||
129 | { | ||
121 | /* | 130 | /* |
122 | * Because we use RCU freeing we need to ensure the inode always | 131 | * Because we use RCU freeing we need to ensure the inode always |
123 | * appears to be reclaimed with an invalid inode number when in the | 132 | * appears to be reclaimed with an invalid inode number when in the |
@@ -129,12 +138,123 @@ xfs_inode_free( | |||
129 | ip->i_ino = 0; | 138 | ip->i_ino = 0; |
130 | spin_unlock(&ip->i_flags_lock); | 139 | spin_unlock(&ip->i_flags_lock); |
131 | 140 | ||
132 | /* asserts to verify all state is correct here */ | 141 | __xfs_inode_free(ip); |
133 | ASSERT(atomic_read(&ip->i_pincount) == 0); | 142 | } |
134 | ASSERT(!xfs_isiflocked(ip)); | ||
135 | XFS_STATS_DEC(ip->i_mount, vn_active); | ||
136 | 143 | ||
137 | call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); | 144 | /* |
145 | * Queue a new inode reclaim pass if there are reclaimable inodes and there | ||
146 | * isn't a reclaim pass already in progress. By default it runs every 5s based | ||
147 | * on the xfs periodic sync default of 30s. Perhaps this should have it's own | ||
148 | * tunable, but that can be done if this method proves to be ineffective or too | ||
149 | * aggressive. | ||
150 | */ | ||
151 | static void | ||
152 | xfs_reclaim_work_queue( | ||
153 | struct xfs_mount *mp) | ||
154 | { | ||
155 | |||
156 | rcu_read_lock(); | ||
157 | if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { | ||
158 | queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, | ||
159 | msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); | ||
160 | } | ||
161 | rcu_read_unlock(); | ||
162 | } | ||
163 | |||
164 | /* | ||
165 | * This is a fast pass over the inode cache to try to get reclaim moving on as | ||
166 | * many inodes as possible in a short period of time. It kicks itself every few | ||
167 | * seconds, as well as being kicked by the inode cache shrinker when memory | ||
168 | * goes low. It scans as quickly as possible avoiding locked inodes or those | ||
169 | * already being flushed, and once done schedules a future pass. | ||
170 | */ | ||
171 | void | ||
172 | xfs_reclaim_worker( | ||
173 | struct work_struct *work) | ||
174 | { | ||
175 | struct xfs_mount *mp = container_of(to_delayed_work(work), | ||
176 | struct xfs_mount, m_reclaim_work); | ||
177 | |||
178 | xfs_reclaim_inodes(mp, SYNC_TRYLOCK); | ||
179 | xfs_reclaim_work_queue(mp); | ||
180 | } | ||
181 | |||
182 | static void | ||
183 | xfs_perag_set_reclaim_tag( | ||
184 | struct xfs_perag *pag) | ||
185 | { | ||
186 | struct xfs_mount *mp = pag->pag_mount; | ||
187 | |||
188 | ASSERT(spin_is_locked(&pag->pag_ici_lock)); | ||
189 | if (pag->pag_ici_reclaimable++) | ||
190 | return; | ||
191 | |||
192 | /* propagate the reclaim tag up into the perag radix tree */ | ||
193 | spin_lock(&mp->m_perag_lock); | ||
194 | radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno, | ||
195 | XFS_ICI_RECLAIM_TAG); | ||
196 | spin_unlock(&mp->m_perag_lock); | ||
197 | |||
198 | /* schedule periodic background inode reclaim */ | ||
199 | xfs_reclaim_work_queue(mp); | ||
200 | |||
201 | trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_); | ||
202 | } | ||
203 | |||
204 | static void | ||
205 | xfs_perag_clear_reclaim_tag( | ||
206 | struct xfs_perag *pag) | ||
207 | { | ||
208 | struct xfs_mount *mp = pag->pag_mount; | ||
209 | |||
210 | ASSERT(spin_is_locked(&pag->pag_ici_lock)); | ||
211 | if (--pag->pag_ici_reclaimable) | ||
212 | return; | ||
213 | |||
214 | /* clear the reclaim tag from the perag radix tree */ | ||
215 | spin_lock(&mp->m_perag_lock); | ||
216 | radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, | ||
217 | XFS_ICI_RECLAIM_TAG); | ||
218 | spin_unlock(&mp->m_perag_lock); | ||
219 | trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_); | ||
220 | } | ||
221 | |||
222 | |||
223 | /* | ||
224 | * We set the inode flag atomically with the radix tree tag. | ||
225 | * Once we get tag lookups on the radix tree, this inode flag | ||
226 | * can go away. | ||
227 | */ | ||
228 | void | ||
229 | xfs_inode_set_reclaim_tag( | ||
230 | struct xfs_inode *ip) | ||
231 | { | ||
232 | struct xfs_mount *mp = ip->i_mount; | ||
233 | struct xfs_perag *pag; | ||
234 | |||
235 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); | ||
236 | spin_lock(&pag->pag_ici_lock); | ||
237 | spin_lock(&ip->i_flags_lock); | ||
238 | |||
239 | radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino), | ||
240 | XFS_ICI_RECLAIM_TAG); | ||
241 | xfs_perag_set_reclaim_tag(pag); | ||
242 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | ||
243 | |||
244 | spin_unlock(&ip->i_flags_lock); | ||
245 | spin_unlock(&pag->pag_ici_lock); | ||
246 | xfs_perag_put(pag); | ||
247 | } | ||
248 | |||
249 | STATIC void | ||
250 | xfs_inode_clear_reclaim_tag( | ||
251 | struct xfs_perag *pag, | ||
252 | xfs_ino_t ino) | ||
253 | { | ||
254 | radix_tree_tag_clear(&pag->pag_ici_root, | ||
255 | XFS_INO_TO_AGINO(pag->pag_mount, ino), | ||
256 | XFS_ICI_RECLAIM_TAG); | ||
257 | xfs_perag_clear_reclaim_tag(pag); | ||
138 | } | 258 | } |
139 | 259 | ||
140 | /* | 260 | /* |
@@ -264,7 +384,7 @@ xfs_iget_cache_hit( | |||
264 | */ | 384 | */ |
265 | ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; | 385 | ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; |
266 | ip->i_flags |= XFS_INEW; | 386 | ip->i_flags |= XFS_INEW; |
267 | __xfs_inode_clear_reclaim_tag(mp, pag, ip); | 387 | xfs_inode_clear_reclaim_tag(pag, ip->i_ino); |
268 | inode->i_state = I_NEW; | 388 | inode->i_state = I_NEW; |
269 | 389 | ||
270 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | 390 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); |
@@ -723,121 +843,6 @@ xfs_inode_ag_iterator_tag( | |||
723 | } | 843 | } |
724 | 844 | ||
725 | /* | 845 | /* |
726 | * Queue a new inode reclaim pass if there are reclaimable inodes and there | ||
727 | * isn't a reclaim pass already in progress. By default it runs every 5s based | ||
728 | * on the xfs periodic sync default of 30s. Perhaps this should have it's own | ||
729 | * tunable, but that can be done if this method proves to be ineffective or too | ||
730 | * aggressive. | ||
731 | */ | ||
732 | static void | ||
733 | xfs_reclaim_work_queue( | ||
734 | struct xfs_mount *mp) | ||
735 | { | ||
736 | |||
737 | rcu_read_lock(); | ||
738 | if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) { | ||
739 | queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work, | ||
740 | msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); | ||
741 | } | ||
742 | rcu_read_unlock(); | ||
743 | } | ||
744 | |||
745 | /* | ||
746 | * This is a fast pass over the inode cache to try to get reclaim moving on as | ||
747 | * many inodes as possible in a short period of time. It kicks itself every few | ||
748 | * seconds, as well as being kicked by the inode cache shrinker when memory | ||
749 | * goes low. It scans as quickly as possible avoiding locked inodes or those | ||
750 | * already being flushed, and once done schedules a future pass. | ||
751 | */ | ||
752 | void | ||
753 | xfs_reclaim_worker( | ||
754 | struct work_struct *work) | ||
755 | { | ||
756 | struct xfs_mount *mp = container_of(to_delayed_work(work), | ||
757 | struct xfs_mount, m_reclaim_work); | ||
758 | |||
759 | xfs_reclaim_inodes(mp, SYNC_TRYLOCK); | ||
760 | xfs_reclaim_work_queue(mp); | ||
761 | } | ||
762 | |||
763 | static void | ||
764 | __xfs_inode_set_reclaim_tag( | ||
765 | struct xfs_perag *pag, | ||
766 | struct xfs_inode *ip) | ||
767 | { | ||
768 | radix_tree_tag_set(&pag->pag_ici_root, | ||
769 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino), | ||
770 | XFS_ICI_RECLAIM_TAG); | ||
771 | |||
772 | if (!pag->pag_ici_reclaimable) { | ||
773 | /* propagate the reclaim tag up into the perag radix tree */ | ||
774 | spin_lock(&ip->i_mount->m_perag_lock); | ||
775 | radix_tree_tag_set(&ip->i_mount->m_perag_tree, | ||
776 | XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), | ||
777 | XFS_ICI_RECLAIM_TAG); | ||
778 | spin_unlock(&ip->i_mount->m_perag_lock); | ||
779 | |||
780 | /* schedule periodic background inode reclaim */ | ||
781 | xfs_reclaim_work_queue(ip->i_mount); | ||
782 | |||
783 | trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno, | ||
784 | -1, _RET_IP_); | ||
785 | } | ||
786 | pag->pag_ici_reclaimable++; | ||
787 | } | ||
788 | |||
789 | /* | ||
790 | * We set the inode flag atomically with the radix tree tag. | ||
791 | * Once we get tag lookups on the radix tree, this inode flag | ||
792 | * can go away. | ||
793 | */ | ||
794 | void | ||
795 | xfs_inode_set_reclaim_tag( | ||
796 | xfs_inode_t *ip) | ||
797 | { | ||
798 | struct xfs_mount *mp = ip->i_mount; | ||
799 | struct xfs_perag *pag; | ||
800 | |||
801 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); | ||
802 | spin_lock(&pag->pag_ici_lock); | ||
803 | spin_lock(&ip->i_flags_lock); | ||
804 | __xfs_inode_set_reclaim_tag(pag, ip); | ||
805 | __xfs_iflags_set(ip, XFS_IRECLAIMABLE); | ||
806 | spin_unlock(&ip->i_flags_lock); | ||
807 | spin_unlock(&pag->pag_ici_lock); | ||
808 | xfs_perag_put(pag); | ||
809 | } | ||
810 | |||
811 | STATIC void | ||
812 | __xfs_inode_clear_reclaim( | ||
813 | xfs_perag_t *pag, | ||
814 | xfs_inode_t *ip) | ||
815 | { | ||
816 | pag->pag_ici_reclaimable--; | ||
817 | if (!pag->pag_ici_reclaimable) { | ||
818 | /* clear the reclaim tag from the perag radix tree */ | ||
819 | spin_lock(&ip->i_mount->m_perag_lock); | ||
820 | radix_tree_tag_clear(&ip->i_mount->m_perag_tree, | ||
821 | XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino), | ||
822 | XFS_ICI_RECLAIM_TAG); | ||
823 | spin_unlock(&ip->i_mount->m_perag_lock); | ||
824 | trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno, | ||
825 | -1, _RET_IP_); | ||
826 | } | ||
827 | } | ||
828 | |||
829 | STATIC void | ||
830 | __xfs_inode_clear_reclaim_tag( | ||
831 | xfs_mount_t *mp, | ||
832 | xfs_perag_t *pag, | ||
833 | xfs_inode_t *ip) | ||
834 | { | ||
835 | radix_tree_tag_clear(&pag->pag_ici_root, | ||
836 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
837 | __xfs_inode_clear_reclaim(pag, ip); | ||
838 | } | ||
839 | |||
840 | /* | ||
841 | * Grab the inode for reclaim exclusively. | 846 | * Grab the inode for reclaim exclusively. |
842 | * Return 0 if we grabbed it, non-zero otherwise. | 847 | * Return 0 if we grabbed it, non-zero otherwise. |
843 | */ | 848 | */ |
@@ -929,6 +934,7 @@ xfs_reclaim_inode( | |||
929 | int sync_mode) | 934 | int sync_mode) |
930 | { | 935 | { |
931 | struct xfs_buf *bp = NULL; | 936 | struct xfs_buf *bp = NULL; |
937 | xfs_ino_t ino = ip->i_ino; /* for radix_tree_delete */ | ||
932 | int error; | 938 | int error; |
933 | 939 | ||
934 | restart: | 940 | restart: |
@@ -993,6 +999,22 @@ restart: | |||
993 | 999 | ||
994 | xfs_iflock(ip); | 1000 | xfs_iflock(ip); |
995 | reclaim: | 1001 | reclaim: |
1002 | /* | ||
1003 | * Because we use RCU freeing we need to ensure the inode always appears | ||
1004 | * to be reclaimed with an invalid inode number when in the free state. | ||
1005 | * We do this as early as possible under the ILOCK and flush lock so | ||
1006 | * that xfs_iflush_cluster() can be guaranteed to detect races with us | ||
1007 | * here. By doing this, we guarantee that once xfs_iflush_cluster has | ||
1008 | * locked both the XFS_ILOCK and the flush lock that it will see either | ||
1009 | * a valid, flushable inode that will serialise correctly against the | ||
1010 | * locks below, or it will see a clean (and invalid) inode that it can | ||
1011 | * skip. | ||
1012 | */ | ||
1013 | spin_lock(&ip->i_flags_lock); | ||
1014 | ip->i_flags = XFS_IRECLAIM; | ||
1015 | ip->i_ino = 0; | ||
1016 | spin_unlock(&ip->i_flags_lock); | ||
1017 | |||
996 | xfs_ifunlock(ip); | 1018 | xfs_ifunlock(ip); |
997 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1019 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
998 | 1020 | ||
@@ -1006,9 +1028,9 @@ reclaim: | |||
1006 | */ | 1028 | */ |
1007 | spin_lock(&pag->pag_ici_lock); | 1029 | spin_lock(&pag->pag_ici_lock); |
1008 | if (!radix_tree_delete(&pag->pag_ici_root, | 1030 | if (!radix_tree_delete(&pag->pag_ici_root, |
1009 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) | 1031 | XFS_INO_TO_AGINO(ip->i_mount, ino))) |
1010 | ASSERT(0); | 1032 | ASSERT(0); |
1011 | __xfs_inode_clear_reclaim(pag, ip); | 1033 | xfs_perag_clear_reclaim_tag(pag); |
1012 | spin_unlock(&pag->pag_ici_lock); | 1034 | spin_unlock(&pag->pag_ici_lock); |
1013 | 1035 | ||
1014 | /* | 1036 | /* |
@@ -1023,7 +1045,7 @@ reclaim: | |||
1023 | xfs_qm_dqdetach(ip); | 1045 | xfs_qm_dqdetach(ip); |
1024 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1046 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
1025 | 1047 | ||
1026 | xfs_inode_free(ip); | 1048 | __xfs_inode_free(ip); |
1027 | return error; | 1049 | return error; |
1028 | 1050 | ||
1029 | out_ifunlock: | 1051 | out_ifunlock: |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index f79ea594fbf2..ee6799e0476f 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -3149,16 +3149,16 @@ out_release_wip: | |||
3149 | 3149 | ||
3150 | STATIC int | 3150 | STATIC int |
3151 | xfs_iflush_cluster( | 3151 | xfs_iflush_cluster( |
3152 | xfs_inode_t *ip, | 3152 | struct xfs_inode *ip, |
3153 | xfs_buf_t *bp) | 3153 | struct xfs_buf *bp) |
3154 | { | 3154 | { |
3155 | xfs_mount_t *mp = ip->i_mount; | 3155 | struct xfs_mount *mp = ip->i_mount; |
3156 | struct xfs_perag *pag; | 3156 | struct xfs_perag *pag; |
3157 | unsigned long first_index, mask; | 3157 | unsigned long first_index, mask; |
3158 | unsigned long inodes_per_cluster; | 3158 | unsigned long inodes_per_cluster; |
3159 | int ilist_size; | 3159 | int cilist_size; |
3160 | xfs_inode_t **ilist; | 3160 | struct xfs_inode **cilist; |
3161 | xfs_inode_t *iq; | 3161 | struct xfs_inode *cip; |
3162 | int nr_found; | 3162 | int nr_found; |
3163 | int clcount = 0; | 3163 | int clcount = 0; |
3164 | int bufwasdelwri; | 3164 | int bufwasdelwri; |
@@ -3167,23 +3167,23 @@ xfs_iflush_cluster( | |||
3167 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); | 3167 | pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); |
3168 | 3168 | ||
3169 | inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; | 3169 | inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; |
3170 | ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); | 3170 | cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); |
3171 | ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); | 3171 | cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS); |
3172 | if (!ilist) | 3172 | if (!cilist) |
3173 | goto out_put; | 3173 | goto out_put; |
3174 | 3174 | ||
3175 | mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); | 3175 | mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); |
3176 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; | 3176 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; |
3177 | rcu_read_lock(); | 3177 | rcu_read_lock(); |
3178 | /* really need a gang lookup range call here */ | 3178 | /* really need a gang lookup range call here */ |
3179 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, | 3179 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist, |
3180 | first_index, inodes_per_cluster); | 3180 | first_index, inodes_per_cluster); |
3181 | if (nr_found == 0) | 3181 | if (nr_found == 0) |
3182 | goto out_free; | 3182 | goto out_free; |
3183 | 3183 | ||
3184 | for (i = 0; i < nr_found; i++) { | 3184 | for (i = 0; i < nr_found; i++) { |
3185 | iq = ilist[i]; | 3185 | cip = cilist[i]; |
3186 | if (iq == ip) | 3186 | if (cip == ip) |
3187 | continue; | 3187 | continue; |
3188 | 3188 | ||
3189 | /* | 3189 | /* |
@@ -3192,20 +3192,30 @@ xfs_iflush_cluster( | |||
3192 | * We need to check under the i_flags_lock for a valid inode | 3192 | * We need to check under the i_flags_lock for a valid inode |
3193 | * here. Skip it if it is not valid or the wrong inode. | 3193 | * here. Skip it if it is not valid or the wrong inode. |
3194 | */ | 3194 | */ |
3195 | spin_lock(&ip->i_flags_lock); | 3195 | spin_lock(&cip->i_flags_lock); |
3196 | if (!ip->i_ino || | 3196 | if (!cip->i_ino || |
3197 | (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { | 3197 | __xfs_iflags_test(cip, XFS_ISTALE)) { |
3198 | spin_unlock(&ip->i_flags_lock); | 3198 | spin_unlock(&cip->i_flags_lock); |
3199 | continue; | 3199 | continue; |
3200 | } | 3200 | } |
3201 | spin_unlock(&ip->i_flags_lock); | 3201 | |
3202 | /* | ||
3203 | * Once we fall off the end of the cluster, no point checking | ||
3204 | * any more inodes in the list because they will also all be | ||
3205 | * outside the cluster. | ||
3206 | */ | ||
3207 | if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) { | ||
3208 | spin_unlock(&cip->i_flags_lock); | ||
3209 | break; | ||
3210 | } | ||
3211 | spin_unlock(&cip->i_flags_lock); | ||
3202 | 3212 | ||
3203 | /* | 3213 | /* |
3204 | * Do an un-protected check to see if the inode is dirty and | 3214 | * Do an un-protected check to see if the inode is dirty and |
3205 | * is a candidate for flushing. These checks will be repeated | 3215 | * is a candidate for flushing. These checks will be repeated |
3206 | * later after the appropriate locks are acquired. | 3216 | * later after the appropriate locks are acquired. |
3207 | */ | 3217 | */ |
3208 | if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) | 3218 | if (xfs_inode_clean(cip) && xfs_ipincount(cip) == 0) |
3209 | continue; | 3219 | continue; |
3210 | 3220 | ||
3211 | /* | 3221 | /* |
@@ -3213,15 +3223,28 @@ xfs_iflush_cluster( | |||
3213 | * then this inode cannot be flushed and is skipped. | 3223 | * then this inode cannot be flushed and is skipped. |
3214 | */ | 3224 | */ |
3215 | 3225 | ||
3216 | if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) | 3226 | if (!xfs_ilock_nowait(cip, XFS_ILOCK_SHARED)) |
3217 | continue; | 3227 | continue; |
3218 | if (!xfs_iflock_nowait(iq)) { | 3228 | if (!xfs_iflock_nowait(cip)) { |
3219 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | 3229 | xfs_iunlock(cip, XFS_ILOCK_SHARED); |
3220 | continue; | 3230 | continue; |
3221 | } | 3231 | } |
3222 | if (xfs_ipincount(iq)) { | 3232 | if (xfs_ipincount(cip)) { |
3223 | xfs_ifunlock(iq); | 3233 | xfs_ifunlock(cip); |
3224 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | 3234 | xfs_iunlock(cip, XFS_ILOCK_SHARED); |
3235 | continue; | ||
3236 | } | ||
3237 | |||
3238 | |||
3239 | /* | ||
3240 | * Check the inode number again, just to be certain we are not | ||
3241 | * racing with freeing in xfs_reclaim_inode(). See the comments | ||
3242 | * in that function for more information as to why the initial | ||
3243 | * check is not sufficient. | ||
3244 | */ | ||
3245 | if (!cip->i_ino) { | ||
3246 | xfs_ifunlock(cip); | ||
3247 | xfs_iunlock(cip, XFS_ILOCK_SHARED); | ||
3225 | continue; | 3248 | continue; |
3226 | } | 3249 | } |
3227 | 3250 | ||
@@ -3229,18 +3252,18 @@ xfs_iflush_cluster( | |||
3229 | * arriving here means that this inode can be flushed. First | 3252 | * arriving here means that this inode can be flushed. First |
3230 | * re-check that it's dirty before flushing. | 3253 | * re-check that it's dirty before flushing. |
3231 | */ | 3254 | */ |
3232 | if (!xfs_inode_clean(iq)) { | 3255 | if (!xfs_inode_clean(cip)) { |
3233 | int error; | 3256 | int error; |
3234 | error = xfs_iflush_int(iq, bp); | 3257 | error = xfs_iflush_int(cip, bp); |
3235 | if (error) { | 3258 | if (error) { |
3236 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | 3259 | xfs_iunlock(cip, XFS_ILOCK_SHARED); |
3237 | goto cluster_corrupt_out; | 3260 | goto cluster_corrupt_out; |
3238 | } | 3261 | } |
3239 | clcount++; | 3262 | clcount++; |
3240 | } else { | 3263 | } else { |
3241 | xfs_ifunlock(iq); | 3264 | xfs_ifunlock(cip); |
3242 | } | 3265 | } |
3243 | xfs_iunlock(iq, XFS_ILOCK_SHARED); | 3266 | xfs_iunlock(cip, XFS_ILOCK_SHARED); |
3244 | } | 3267 | } |
3245 | 3268 | ||
3246 | if (clcount) { | 3269 | if (clcount) { |
@@ -3250,7 +3273,7 @@ xfs_iflush_cluster( | |||
3250 | 3273 | ||
3251 | out_free: | 3274 | out_free: |
3252 | rcu_read_unlock(); | 3275 | rcu_read_unlock(); |
3253 | kmem_free(ilist); | 3276 | kmem_free(cilist); |
3254 | out_put: | 3277 | out_put: |
3255 | xfs_perag_put(pag); | 3278 | xfs_perag_put(pag); |
3256 | return 0; | 3279 | return 0; |
@@ -3293,8 +3316,8 @@ cluster_corrupt_out: | |||
3293 | /* | 3316 | /* |
3294 | * Unlocks the flush lock | 3317 | * Unlocks the flush lock |
3295 | */ | 3318 | */ |
3296 | xfs_iflush_abort(iq, false); | 3319 | xfs_iflush_abort(cip, false); |
3297 | kmem_free(ilist); | 3320 | kmem_free(cilist); |
3298 | xfs_perag_put(pag); | 3321 | xfs_perag_put(pag); |
3299 | return -EFSCORRUPTED; | 3322 | return -EFSCORRUPTED; |
3300 | } | 3323 | } |
@@ -3314,7 +3337,7 @@ xfs_iflush( | |||
3314 | struct xfs_buf **bpp) | 3337 | struct xfs_buf **bpp) |
3315 | { | 3338 | { |
3316 | struct xfs_mount *mp = ip->i_mount; | 3339 | struct xfs_mount *mp = ip->i_mount; |
3317 | struct xfs_buf *bp; | 3340 | struct xfs_buf *bp = NULL; |
3318 | struct xfs_dinode *dip; | 3341 | struct xfs_dinode *dip; |
3319 | int error; | 3342 | int error; |
3320 | 3343 | ||
@@ -3356,14 +3379,22 @@ xfs_iflush( | |||
3356 | } | 3379 | } |
3357 | 3380 | ||
3358 | /* | 3381 | /* |
3359 | * Get the buffer containing the on-disk inode. | 3382 | * Get the buffer containing the on-disk inode. We are doing a try-lock |
3383 | * operation here, so we may get an EAGAIN error. In that case, we | ||
3384 | * simply want to return with the inode still dirty. | ||
3385 | * | ||
3386 | * If we get any other error, we effectively have a corruption situation | ||
3387 | * and we cannot flush the inode, so we treat it the same as failing | ||
3388 | * xfs_iflush_int(). | ||
3360 | */ | 3389 | */ |
3361 | error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, | 3390 | error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, |
3362 | 0); | 3391 | 0); |
3363 | if (error || !bp) { | 3392 | if (error == -EAGAIN) { |
3364 | xfs_ifunlock(ip); | 3393 | xfs_ifunlock(ip); |
3365 | return error; | 3394 | return error; |
3366 | } | 3395 | } |
3396 | if (error) | ||
3397 | goto corrupt_out; | ||
3367 | 3398 | ||
3368 | /* | 3399 | /* |
3369 | * First flush out the inode that xfs_iflush was called with. | 3400 | * First flush out the inode that xfs_iflush was called with. |
@@ -3391,7 +3422,8 @@ xfs_iflush( | |||
3391 | return 0; | 3422 | return 0; |
3392 | 3423 | ||
3393 | corrupt_out: | 3424 | corrupt_out: |
3394 | xfs_buf_relse(bp); | 3425 | if (bp) |
3426 | xfs_buf_relse(bp); | ||
3395 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | 3427 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
3396 | cluster_corrupt_out: | 3428 | cluster_corrupt_out: |
3397 | error = -EFSCORRUPTED; | 3429 | error = -EFSCORRUPTED; |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index b412bb1c5fd3..d8424f5c5e74 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -928,7 +928,7 @@ xfs_fs_alloc_inode( | |||
928 | 928 | ||
929 | /* | 929 | /* |
930 | * Now that the generic code is guaranteed not to be accessing | 930 | * Now that the generic code is guaranteed not to be accessing |
931 | * the linux inode, we can reclaim the inode. | 931 | * the linux inode, we can inactivate and reclaim the inode. |
932 | */ | 932 | */ |
933 | STATIC void | 933 | STATIC void |
934 | xfs_fs_destroy_inode( | 934 | xfs_fs_destroy_inode( |
@@ -938,9 +938,14 @@ xfs_fs_destroy_inode( | |||
938 | 938 | ||
939 | trace_xfs_destroy_inode(ip); | 939 | trace_xfs_destroy_inode(ip); |
940 | 940 | ||
941 | XFS_STATS_INC(ip->i_mount, vn_reclaim); | 941 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); |
942 | XFS_STATS_INC(ip->i_mount, vn_rele); | ||
943 | XFS_STATS_INC(ip->i_mount, vn_remove); | ||
944 | |||
945 | xfs_inactive(ip); | ||
942 | 946 | ||
943 | ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); | 947 | ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); |
948 | XFS_STATS_INC(ip->i_mount, vn_reclaim); | ||
944 | 949 | ||
945 | /* | 950 | /* |
946 | * We should never get here with one of the reclaim flags already set. | 951 | * We should never get here with one of the reclaim flags already set. |
@@ -987,24 +992,6 @@ xfs_fs_inode_init_once( | |||
987 | "xfsino", ip->i_ino); | 992 | "xfsino", ip->i_ino); |
988 | } | 993 | } |
989 | 994 | ||
990 | STATIC void | ||
991 | xfs_fs_evict_inode( | ||
992 | struct inode *inode) | ||
993 | { | ||
994 | xfs_inode_t *ip = XFS_I(inode); | ||
995 | |||
996 | ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); | ||
997 | |||
998 | trace_xfs_evict_inode(ip); | ||
999 | |||
1000 | truncate_inode_pages_final(&inode->i_data); | ||
1001 | clear_inode(inode); | ||
1002 | XFS_STATS_INC(ip->i_mount, vn_rele); | ||
1003 | XFS_STATS_INC(ip->i_mount, vn_remove); | ||
1004 | |||
1005 | xfs_inactive(ip); | ||
1006 | } | ||
1007 | |||
1008 | /* | 995 | /* |
1009 | * We do an unlocked check for XFS_IDONTCACHE here because we are already | 996 | * We do an unlocked check for XFS_IDONTCACHE here because we are already |
1010 | * serialised against cache hits here via the inode->i_lock and igrab() in | 997 | * serialised against cache hits here via the inode->i_lock and igrab() in |
@@ -1673,7 +1660,6 @@ xfs_fs_free_cached_objects( | |||
1673 | static const struct super_operations xfs_super_operations = { | 1660 | static const struct super_operations xfs_super_operations = { |
1674 | .alloc_inode = xfs_fs_alloc_inode, | 1661 | .alloc_inode = xfs_fs_alloc_inode, |
1675 | .destroy_inode = xfs_fs_destroy_inode, | 1662 | .destroy_inode = xfs_fs_destroy_inode, |
1676 | .evict_inode = xfs_fs_evict_inode, | ||
1677 | .drop_inode = xfs_fs_drop_inode, | 1663 | .drop_inode = xfs_fs_drop_inode, |
1678 | .put_super = xfs_fs_put_super, | 1664 | .put_super = xfs_fs_put_super, |
1679 | .sync_fs = xfs_fs_sync_fs, | 1665 | .sync_fs = xfs_fs_sync_fs, |