aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDave Chinner <david@fromorbit.com>2016-05-19 20:34:00 -0400
committerDave Chinner <david@fromorbit.com>2016-05-19 20:34:00 -0400
commit555b67e4e729ca544bb4028ab12e532c68b70ddb (patch)
tree8bfb59ccca39cebe0210366bebfeddd8bd3ab3a3
parent544ad71fc8e20fb3a6f50f00d487751492cd8409 (diff)
parentad438c4038968e5ca5248f851212634e474983e8 (diff)
Merge branch 'xfs-4.7-inode-reclaim' into for-next
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c27
-rw-r--r--fs/xfs/xfs_icache.c290
-rw-r--r--fs/xfs/xfs_inode.c104
-rw-r--r--fs/xfs/xfs_super.c28
4 files changed, 250 insertions, 199 deletions
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index d3d1477bfb9e..bbcc8c7a44b3 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -1519,6 +1519,24 @@ xfs_iext_indirect_to_direct(
1519} 1519}
1520 1520
1521/* 1521/*
1522 * Remove all records from the indirection array.
1523 */
1524STATIC void
1525xfs_iext_irec_remove_all(
1526 struct xfs_ifork *ifp)
1527{
1528 int nlists;
1529 int i;
1530
1531 ASSERT(ifp->if_flags & XFS_IFEXTIREC);
1532 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1533 for (i = 0; i < nlists; i++)
1534 kmem_free(ifp->if_u1.if_ext_irec[i].er_extbuf);
1535 kmem_free(ifp->if_u1.if_ext_irec);
1536 ifp->if_flags &= ~XFS_IFEXTIREC;
1537}
1538
1539/*
1522 * Free incore file extents. 1540 * Free incore file extents.
1523 */ 1541 */
1524void 1542void
@@ -1526,14 +1544,7 @@ xfs_iext_destroy(
1526 xfs_ifork_t *ifp) /* inode fork pointer */ 1544 xfs_ifork_t *ifp) /* inode fork pointer */
1527{ 1545{
1528 if (ifp->if_flags & XFS_IFEXTIREC) { 1546 if (ifp->if_flags & XFS_IFEXTIREC) {
1529 int erp_idx; 1547 xfs_iext_irec_remove_all(ifp);
1530 int nlists;
1531
1532 nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ;
1533 for (erp_idx = nlists - 1; erp_idx >= 0 ; erp_idx--) {
1534 xfs_iext_irec_remove(ifp, erp_idx);
1535 }
1536 ifp->if_flags &= ~XFS_IFEXTIREC;
1537 } else if (ifp->if_real_bytes) { 1548 } else if (ifp->if_real_bytes) {
1538 kmem_free(ifp->if_u1.if_extents); 1549 kmem_free(ifp->if_u1.if_extents);
1539 } else if (ifp->if_bytes) { 1550 } else if (ifp->if_bytes) {
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index bf2d60749278..99ee6eee5e0b 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -37,9 +37,6 @@
37#include <linux/kthread.h> 37#include <linux/kthread.h>
38#include <linux/freezer.h> 38#include <linux/freezer.h>
39 39
40STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp,
41 struct xfs_perag *pag, struct xfs_inode *ip);
42
43/* 40/*
44 * Allocate and initialise an xfs_inode. 41 * Allocate and initialise an xfs_inode.
45 */ 42 */
@@ -94,13 +91,6 @@ xfs_inode_free_callback(
94 struct inode *inode = container_of(head, struct inode, i_rcu); 91 struct inode *inode = container_of(head, struct inode, i_rcu);
95 struct xfs_inode *ip = XFS_I(inode); 92 struct xfs_inode *ip = XFS_I(inode);
96 93
97 kmem_zone_free(xfs_inode_zone, ip);
98}
99
100void
101xfs_inode_free(
102 struct xfs_inode *ip)
103{
104 switch (VFS_I(ip)->i_mode & S_IFMT) { 94 switch (VFS_I(ip)->i_mode & S_IFMT) {
105 case S_IFREG: 95 case S_IFREG:
106 case S_IFDIR: 96 case S_IFDIR:
@@ -118,6 +108,25 @@ xfs_inode_free(
118 ip->i_itemp = NULL; 108 ip->i_itemp = NULL;
119 } 109 }
120 110
111 kmem_zone_free(xfs_inode_zone, ip);
112}
113
114static void
115__xfs_inode_free(
116 struct xfs_inode *ip)
117{
118 /* asserts to verify all state is correct here */
119 ASSERT(atomic_read(&ip->i_pincount) == 0);
120 ASSERT(!xfs_isiflocked(ip));
121 XFS_STATS_DEC(ip->i_mount, vn_active);
122
123 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
124}
125
126void
127xfs_inode_free(
128 struct xfs_inode *ip)
129{
121 /* 130 /*
122 * Because we use RCU freeing we need to ensure the inode always 131 * Because we use RCU freeing we need to ensure the inode always
123 * appears to be reclaimed with an invalid inode number when in the 132 * appears to be reclaimed with an invalid inode number when in the
@@ -129,12 +138,123 @@ xfs_inode_free(
129 ip->i_ino = 0; 138 ip->i_ino = 0;
130 spin_unlock(&ip->i_flags_lock); 139 spin_unlock(&ip->i_flags_lock);
131 140
132 /* asserts to verify all state is correct here */ 141 __xfs_inode_free(ip);
133 ASSERT(atomic_read(&ip->i_pincount) == 0); 142}
134 ASSERT(!xfs_isiflocked(ip));
135 XFS_STATS_DEC(ip->i_mount, vn_active);
136 143
137 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); 144/*
145 * Queue a new inode reclaim pass if there are reclaimable inodes and there
146 * isn't a reclaim pass already in progress. By default it runs every 5s based
147 * on the xfs periodic sync default of 30s. Perhaps this should have it's own
148 * tunable, but that can be done if this method proves to be ineffective or too
149 * aggressive.
150 */
151static void
152xfs_reclaim_work_queue(
153 struct xfs_mount *mp)
154{
155
156 rcu_read_lock();
157 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
158 queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
159 msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
160 }
161 rcu_read_unlock();
162}
163
164/*
165 * This is a fast pass over the inode cache to try to get reclaim moving on as
166 * many inodes as possible in a short period of time. It kicks itself every few
167 * seconds, as well as being kicked by the inode cache shrinker when memory
168 * goes low. It scans as quickly as possible avoiding locked inodes or those
169 * already being flushed, and once done schedules a future pass.
170 */
171void
172xfs_reclaim_worker(
173 struct work_struct *work)
174{
175 struct xfs_mount *mp = container_of(to_delayed_work(work),
176 struct xfs_mount, m_reclaim_work);
177
178 xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
179 xfs_reclaim_work_queue(mp);
180}
181
182static void
183xfs_perag_set_reclaim_tag(
184 struct xfs_perag *pag)
185{
186 struct xfs_mount *mp = pag->pag_mount;
187
188 ASSERT(spin_is_locked(&pag->pag_ici_lock));
189 if (pag->pag_ici_reclaimable++)
190 return;
191
192 /* propagate the reclaim tag up into the perag radix tree */
193 spin_lock(&mp->m_perag_lock);
194 radix_tree_tag_set(&mp->m_perag_tree, pag->pag_agno,
195 XFS_ICI_RECLAIM_TAG);
196 spin_unlock(&mp->m_perag_lock);
197
198 /* schedule periodic background inode reclaim */
199 xfs_reclaim_work_queue(mp);
200
201 trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
202}
203
204static void
205xfs_perag_clear_reclaim_tag(
206 struct xfs_perag *pag)
207{
208 struct xfs_mount *mp = pag->pag_mount;
209
210 ASSERT(spin_is_locked(&pag->pag_ici_lock));
211 if (--pag->pag_ici_reclaimable)
212 return;
213
214 /* clear the reclaim tag from the perag radix tree */
215 spin_lock(&mp->m_perag_lock);
216 radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno,
217 XFS_ICI_RECLAIM_TAG);
218 spin_unlock(&mp->m_perag_lock);
219 trace_xfs_perag_clear_reclaim(mp, pag->pag_agno, -1, _RET_IP_);
220}
221
222
223/*
224 * We set the inode flag atomically with the radix tree tag.
225 * Once we get tag lookups on the radix tree, this inode flag
226 * can go away.
227 */
228void
229xfs_inode_set_reclaim_tag(
230 struct xfs_inode *ip)
231{
232 struct xfs_mount *mp = ip->i_mount;
233 struct xfs_perag *pag;
234
235 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
236 spin_lock(&pag->pag_ici_lock);
237 spin_lock(&ip->i_flags_lock);
238
239 radix_tree_tag_set(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino),
240 XFS_ICI_RECLAIM_TAG);
241 xfs_perag_set_reclaim_tag(pag);
242 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
243
244 spin_unlock(&ip->i_flags_lock);
245 spin_unlock(&pag->pag_ici_lock);
246 xfs_perag_put(pag);
247}
248
249STATIC void
250xfs_inode_clear_reclaim_tag(
251 struct xfs_perag *pag,
252 xfs_ino_t ino)
253{
254 radix_tree_tag_clear(&pag->pag_ici_root,
255 XFS_INO_TO_AGINO(pag->pag_mount, ino),
256 XFS_ICI_RECLAIM_TAG);
257 xfs_perag_clear_reclaim_tag(pag);
138} 258}
139 259
140/* 260/*
@@ -264,7 +384,7 @@ xfs_iget_cache_hit(
264 */ 384 */
265 ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS; 385 ip->i_flags &= ~XFS_IRECLAIM_RESET_FLAGS;
266 ip->i_flags |= XFS_INEW; 386 ip->i_flags |= XFS_INEW;
267 __xfs_inode_clear_reclaim_tag(mp, pag, ip); 387 xfs_inode_clear_reclaim_tag(pag, ip->i_ino);
268 inode->i_state = I_NEW; 388 inode->i_state = I_NEW;
269 389
270 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock)); 390 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
@@ -723,121 +843,6 @@ xfs_inode_ag_iterator_tag(
723} 843}
724 844
725/* 845/*
726 * Queue a new inode reclaim pass if there are reclaimable inodes and there
727 * isn't a reclaim pass already in progress. By default it runs every 5s based
728 * on the xfs periodic sync default of 30s. Perhaps this should have it's own
729 * tunable, but that can be done if this method proves to be ineffective or too
730 * aggressive.
731 */
732static void
733xfs_reclaim_work_queue(
734 struct xfs_mount *mp)
735{
736
737 rcu_read_lock();
738 if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
739 queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
740 msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
741 }
742 rcu_read_unlock();
743}
744
745/*
746 * This is a fast pass over the inode cache to try to get reclaim moving on as
747 * many inodes as possible in a short period of time. It kicks itself every few
748 * seconds, as well as being kicked by the inode cache shrinker when memory
749 * goes low. It scans as quickly as possible avoiding locked inodes or those
750 * already being flushed, and once done schedules a future pass.
751 */
752void
753xfs_reclaim_worker(
754 struct work_struct *work)
755{
756 struct xfs_mount *mp = container_of(to_delayed_work(work),
757 struct xfs_mount, m_reclaim_work);
758
759 xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
760 xfs_reclaim_work_queue(mp);
761}
762
763static void
764__xfs_inode_set_reclaim_tag(
765 struct xfs_perag *pag,
766 struct xfs_inode *ip)
767{
768 radix_tree_tag_set(&pag->pag_ici_root,
769 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
770 XFS_ICI_RECLAIM_TAG);
771
772 if (!pag->pag_ici_reclaimable) {
773 /* propagate the reclaim tag up into the perag radix tree */
774 spin_lock(&ip->i_mount->m_perag_lock);
775 radix_tree_tag_set(&ip->i_mount->m_perag_tree,
776 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
777 XFS_ICI_RECLAIM_TAG);
778 spin_unlock(&ip->i_mount->m_perag_lock);
779
780 /* schedule periodic background inode reclaim */
781 xfs_reclaim_work_queue(ip->i_mount);
782
783 trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
784 -1, _RET_IP_);
785 }
786 pag->pag_ici_reclaimable++;
787}
788
789/*
790 * We set the inode flag atomically with the radix tree tag.
791 * Once we get tag lookups on the radix tree, this inode flag
792 * can go away.
793 */
794void
795xfs_inode_set_reclaim_tag(
796 xfs_inode_t *ip)
797{
798 struct xfs_mount *mp = ip->i_mount;
799 struct xfs_perag *pag;
800
801 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
802 spin_lock(&pag->pag_ici_lock);
803 spin_lock(&ip->i_flags_lock);
804 __xfs_inode_set_reclaim_tag(pag, ip);
805 __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
806 spin_unlock(&ip->i_flags_lock);
807 spin_unlock(&pag->pag_ici_lock);
808 xfs_perag_put(pag);
809}
810
811STATIC void
812__xfs_inode_clear_reclaim(
813 xfs_perag_t *pag,
814 xfs_inode_t *ip)
815{
816 pag->pag_ici_reclaimable--;
817 if (!pag->pag_ici_reclaimable) {
818 /* clear the reclaim tag from the perag radix tree */
819 spin_lock(&ip->i_mount->m_perag_lock);
820 radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
821 XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
822 XFS_ICI_RECLAIM_TAG);
823 spin_unlock(&ip->i_mount->m_perag_lock);
824 trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
825 -1, _RET_IP_);
826 }
827}
828
829STATIC void
830__xfs_inode_clear_reclaim_tag(
831 xfs_mount_t *mp,
832 xfs_perag_t *pag,
833 xfs_inode_t *ip)
834{
835 radix_tree_tag_clear(&pag->pag_ici_root,
836 XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
837 __xfs_inode_clear_reclaim(pag, ip);
838}
839
840/*
841 * Grab the inode for reclaim exclusively. 846 * Grab the inode for reclaim exclusively.
842 * Return 0 if we grabbed it, non-zero otherwise. 847 * Return 0 if we grabbed it, non-zero otherwise.
843 */ 848 */
@@ -929,6 +934,7 @@ xfs_reclaim_inode(
929 int sync_mode) 934 int sync_mode)
930{ 935{
931 struct xfs_buf *bp = NULL; 936 struct xfs_buf *bp = NULL;
937 xfs_ino_t ino = ip->i_ino; /* for radix_tree_delete */
932 int error; 938 int error;
933 939
934restart: 940restart:
@@ -993,6 +999,22 @@ restart:
993 999
994 xfs_iflock(ip); 1000 xfs_iflock(ip);
995reclaim: 1001reclaim:
1002 /*
1003 * Because we use RCU freeing we need to ensure the inode always appears
1004 * to be reclaimed with an invalid inode number when in the free state.
1005 * We do this as early as possible under the ILOCK and flush lock so
1006 * that xfs_iflush_cluster() can be guaranteed to detect races with us
1007 * here. By doing this, we guarantee that once xfs_iflush_cluster has
1008 * locked both the XFS_ILOCK and the flush lock that it will see either
1009 * a valid, flushable inode that will serialise correctly against the
1010 * locks below, or it will see a clean (and invalid) inode that it can
1011 * skip.
1012 */
1013 spin_lock(&ip->i_flags_lock);
1014 ip->i_flags = XFS_IRECLAIM;
1015 ip->i_ino = 0;
1016 spin_unlock(&ip->i_flags_lock);
1017
996 xfs_ifunlock(ip); 1018 xfs_ifunlock(ip);
997 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1019 xfs_iunlock(ip, XFS_ILOCK_EXCL);
998 1020
@@ -1006,9 +1028,9 @@ reclaim:
1006 */ 1028 */
1007 spin_lock(&pag->pag_ici_lock); 1029 spin_lock(&pag->pag_ici_lock);
1008 if (!radix_tree_delete(&pag->pag_ici_root, 1030 if (!radix_tree_delete(&pag->pag_ici_root,
1009 XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) 1031 XFS_INO_TO_AGINO(ip->i_mount, ino)))
1010 ASSERT(0); 1032 ASSERT(0);
1011 __xfs_inode_clear_reclaim(pag, ip); 1033 xfs_perag_clear_reclaim_tag(pag);
1012 spin_unlock(&pag->pag_ici_lock); 1034 spin_unlock(&pag->pag_ici_lock);
1013 1035
1014 /* 1036 /*
@@ -1023,7 +1045,7 @@ reclaim:
1023 xfs_qm_dqdetach(ip); 1045 xfs_qm_dqdetach(ip);
1024 xfs_iunlock(ip, XFS_ILOCK_EXCL); 1046 xfs_iunlock(ip, XFS_ILOCK_EXCL);
1025 1047
1026 xfs_inode_free(ip); 1048 __xfs_inode_free(ip);
1027 return error; 1049 return error;
1028 1050
1029out_ifunlock: 1051out_ifunlock:
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index f79ea594fbf2..ee6799e0476f 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3149,16 +3149,16 @@ out_release_wip:
3149 3149
3150STATIC int 3150STATIC int
3151xfs_iflush_cluster( 3151xfs_iflush_cluster(
3152 xfs_inode_t *ip, 3152 struct xfs_inode *ip,
3153 xfs_buf_t *bp) 3153 struct xfs_buf *bp)
3154{ 3154{
3155 xfs_mount_t *mp = ip->i_mount; 3155 struct xfs_mount *mp = ip->i_mount;
3156 struct xfs_perag *pag; 3156 struct xfs_perag *pag;
3157 unsigned long first_index, mask; 3157 unsigned long first_index, mask;
3158 unsigned long inodes_per_cluster; 3158 unsigned long inodes_per_cluster;
3159 int ilist_size; 3159 int cilist_size;
3160 xfs_inode_t **ilist; 3160 struct xfs_inode **cilist;
3161 xfs_inode_t *iq; 3161 struct xfs_inode *cip;
3162 int nr_found; 3162 int nr_found;
3163 int clcount = 0; 3163 int clcount = 0;
3164 int bufwasdelwri; 3164 int bufwasdelwri;
@@ -3167,23 +3167,23 @@ xfs_iflush_cluster(
3167 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 3167 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
3168 3168
3169 inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog; 3169 inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;
3170 ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *); 3170 cilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);
3171 ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS); 3171 cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS);
3172 if (!ilist) 3172 if (!cilist)
3173 goto out_put; 3173 goto out_put;
3174 3174
3175 mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1); 3175 mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1);
3176 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask; 3176 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
3177 rcu_read_lock(); 3177 rcu_read_lock();
3178 /* really need a gang lookup range call here */ 3178 /* really need a gang lookup range call here */
3179 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)ilist, 3179 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist,
3180 first_index, inodes_per_cluster); 3180 first_index, inodes_per_cluster);
3181 if (nr_found == 0) 3181 if (nr_found == 0)
3182 goto out_free; 3182 goto out_free;
3183 3183
3184 for (i = 0; i < nr_found; i++) { 3184 for (i = 0; i < nr_found; i++) {
3185 iq = ilist[i]; 3185 cip = cilist[i];
3186 if (iq == ip) 3186 if (cip == ip)
3187 continue; 3187 continue;
3188 3188
3189 /* 3189 /*
@@ -3192,20 +3192,30 @@ xfs_iflush_cluster(
3192 * We need to check under the i_flags_lock for a valid inode 3192 * We need to check under the i_flags_lock for a valid inode
3193 * here. Skip it if it is not valid or the wrong inode. 3193 * here. Skip it if it is not valid or the wrong inode.
3194 */ 3194 */
3195 spin_lock(&ip->i_flags_lock); 3195 spin_lock(&cip->i_flags_lock);
3196 if (!ip->i_ino || 3196 if (!cip->i_ino ||
3197 (XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) { 3197 __xfs_iflags_test(cip, XFS_ISTALE)) {
3198 spin_unlock(&ip->i_flags_lock); 3198 spin_unlock(&cip->i_flags_lock);
3199 continue; 3199 continue;
3200 } 3200 }
3201 spin_unlock(&ip->i_flags_lock); 3201
3202 /*
3203 * Once we fall off the end of the cluster, no point checking
3204 * any more inodes in the list because they will also all be
3205 * outside the cluster.
3206 */
3207 if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) {
3208 spin_unlock(&cip->i_flags_lock);
3209 break;
3210 }
3211 spin_unlock(&cip->i_flags_lock);
3202 3212
3203 /* 3213 /*
3204 * Do an un-protected check to see if the inode is dirty and 3214 * Do an un-protected check to see if the inode is dirty and
3205 * is a candidate for flushing. These checks will be repeated 3215 * is a candidate for flushing. These checks will be repeated
3206 * later after the appropriate locks are acquired. 3216 * later after the appropriate locks are acquired.
3207 */ 3217 */
3208 if (xfs_inode_clean(iq) && xfs_ipincount(iq) == 0) 3218 if (xfs_inode_clean(cip) && xfs_ipincount(cip) == 0)
3209 continue; 3219 continue;
3210 3220
3211 /* 3221 /*
@@ -3213,15 +3223,28 @@ xfs_iflush_cluster(
3213 * then this inode cannot be flushed and is skipped. 3223 * then this inode cannot be flushed and is skipped.
3214 */ 3224 */
3215 3225
3216 if (!xfs_ilock_nowait(iq, XFS_ILOCK_SHARED)) 3226 if (!xfs_ilock_nowait(cip, XFS_ILOCK_SHARED))
3217 continue; 3227 continue;
3218 if (!xfs_iflock_nowait(iq)) { 3228 if (!xfs_iflock_nowait(cip)) {
3219 xfs_iunlock(iq, XFS_ILOCK_SHARED); 3229 xfs_iunlock(cip, XFS_ILOCK_SHARED);
3220 continue; 3230 continue;
3221 } 3231 }
3222 if (xfs_ipincount(iq)) { 3232 if (xfs_ipincount(cip)) {
3223 xfs_ifunlock(iq); 3233 xfs_ifunlock(cip);
3224 xfs_iunlock(iq, XFS_ILOCK_SHARED); 3234 xfs_iunlock(cip, XFS_ILOCK_SHARED);
3235 continue;
3236 }
3237
3238
3239 /*
3240 * Check the inode number again, just to be certain we are not
3241 * racing with freeing in xfs_reclaim_inode(). See the comments
3242 * in that function for more information as to why the initial
3243 * check is not sufficient.
3244 */
3245 if (!cip->i_ino) {
3246 xfs_ifunlock(cip);
3247 xfs_iunlock(cip, XFS_ILOCK_SHARED);
3225 continue; 3248 continue;
3226 } 3249 }
3227 3250
@@ -3229,18 +3252,18 @@ xfs_iflush_cluster(
3229 * arriving here means that this inode can be flushed. First 3252 * arriving here means that this inode can be flushed. First
3230 * re-check that it's dirty before flushing. 3253 * re-check that it's dirty before flushing.
3231 */ 3254 */
3232 if (!xfs_inode_clean(iq)) { 3255 if (!xfs_inode_clean(cip)) {
3233 int error; 3256 int error;
3234 error = xfs_iflush_int(iq, bp); 3257 error = xfs_iflush_int(cip, bp);
3235 if (error) { 3258 if (error) {
3236 xfs_iunlock(iq, XFS_ILOCK_SHARED); 3259 xfs_iunlock(cip, XFS_ILOCK_SHARED);
3237 goto cluster_corrupt_out; 3260 goto cluster_corrupt_out;
3238 } 3261 }
3239 clcount++; 3262 clcount++;
3240 } else { 3263 } else {
3241 xfs_ifunlock(iq); 3264 xfs_ifunlock(cip);
3242 } 3265 }
3243 xfs_iunlock(iq, XFS_ILOCK_SHARED); 3266 xfs_iunlock(cip, XFS_ILOCK_SHARED);
3244 } 3267 }
3245 3268
3246 if (clcount) { 3269 if (clcount) {
@@ -3250,7 +3273,7 @@ xfs_iflush_cluster(
3250 3273
3251out_free: 3274out_free:
3252 rcu_read_unlock(); 3275 rcu_read_unlock();
3253 kmem_free(ilist); 3276 kmem_free(cilist);
3254out_put: 3277out_put:
3255 xfs_perag_put(pag); 3278 xfs_perag_put(pag);
3256 return 0; 3279 return 0;
@@ -3293,8 +3316,8 @@ cluster_corrupt_out:
3293 /* 3316 /*
3294 * Unlocks the flush lock 3317 * Unlocks the flush lock
3295 */ 3318 */
3296 xfs_iflush_abort(iq, false); 3319 xfs_iflush_abort(cip, false);
3297 kmem_free(ilist); 3320 kmem_free(cilist);
3298 xfs_perag_put(pag); 3321 xfs_perag_put(pag);
3299 return -EFSCORRUPTED; 3322 return -EFSCORRUPTED;
3300} 3323}
@@ -3314,7 +3337,7 @@ xfs_iflush(
3314 struct xfs_buf **bpp) 3337 struct xfs_buf **bpp)
3315{ 3338{
3316 struct xfs_mount *mp = ip->i_mount; 3339 struct xfs_mount *mp = ip->i_mount;
3317 struct xfs_buf *bp; 3340 struct xfs_buf *bp = NULL;
3318 struct xfs_dinode *dip; 3341 struct xfs_dinode *dip;
3319 int error; 3342 int error;
3320 3343
@@ -3356,14 +3379,22 @@ xfs_iflush(
3356 } 3379 }
3357 3380
3358 /* 3381 /*
3359 * Get the buffer containing the on-disk inode. 3382 * Get the buffer containing the on-disk inode. We are doing a try-lock
3383 * operation here, so we may get an EAGAIN error. In that case, we
3384 * simply want to return with the inode still dirty.
3385 *
3386 * If we get any other error, we effectively have a corruption situation
3387 * and we cannot flush the inode, so we treat it the same as failing
3388 * xfs_iflush_int().
3360 */ 3389 */
3361 error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK, 3390 error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
3362 0); 3391 0);
3363 if (error || !bp) { 3392 if (error == -EAGAIN) {
3364 xfs_ifunlock(ip); 3393 xfs_ifunlock(ip);
3365 return error; 3394 return error;
3366 } 3395 }
3396 if (error)
3397 goto corrupt_out;
3367 3398
3368 /* 3399 /*
3369 * First flush out the inode that xfs_iflush was called with. 3400 * First flush out the inode that xfs_iflush was called with.
@@ -3391,7 +3422,8 @@ xfs_iflush(
3391 return 0; 3422 return 0;
3392 3423
3393corrupt_out: 3424corrupt_out:
3394 xfs_buf_relse(bp); 3425 if (bp)
3426 xfs_buf_relse(bp);
3395 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 3427 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
3396cluster_corrupt_out: 3428cluster_corrupt_out:
3397 error = -EFSCORRUPTED; 3429 error = -EFSCORRUPTED;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index b412bb1c5fd3..d8424f5c5e74 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -928,7 +928,7 @@ xfs_fs_alloc_inode(
928 928
929/* 929/*
930 * Now that the generic code is guaranteed not to be accessing 930 * Now that the generic code is guaranteed not to be accessing
931 * the linux inode, we can reclaim the inode. 931 * the linux inode, we can inactivate and reclaim the inode.
932 */ 932 */
933STATIC void 933STATIC void
934xfs_fs_destroy_inode( 934xfs_fs_destroy_inode(
@@ -938,9 +938,14 @@ xfs_fs_destroy_inode(
938 938
939 trace_xfs_destroy_inode(ip); 939 trace_xfs_destroy_inode(ip);
940 940
941 XFS_STATS_INC(ip->i_mount, vn_reclaim); 941 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
942 XFS_STATS_INC(ip->i_mount, vn_rele);
943 XFS_STATS_INC(ip->i_mount, vn_remove);
944
945 xfs_inactive(ip);
942 946
943 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 947 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
948 XFS_STATS_INC(ip->i_mount, vn_reclaim);
944 949
945 /* 950 /*
946 * We should never get here with one of the reclaim flags already set. 951 * We should never get here with one of the reclaim flags already set.
@@ -987,24 +992,6 @@ xfs_fs_inode_init_once(
987 "xfsino", ip->i_ino); 992 "xfsino", ip->i_ino);
988} 993}
989 994
990STATIC void
991xfs_fs_evict_inode(
992 struct inode *inode)
993{
994 xfs_inode_t *ip = XFS_I(inode);
995
996 ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
997
998 trace_xfs_evict_inode(ip);
999
1000 truncate_inode_pages_final(&inode->i_data);
1001 clear_inode(inode);
1002 XFS_STATS_INC(ip->i_mount, vn_rele);
1003 XFS_STATS_INC(ip->i_mount, vn_remove);
1004
1005 xfs_inactive(ip);
1006}
1007
1008/* 995/*
1009 * We do an unlocked check for XFS_IDONTCACHE here because we are already 996 * We do an unlocked check for XFS_IDONTCACHE here because we are already
1010 * serialised against cache hits here via the inode->i_lock and igrab() in 997 * serialised against cache hits here via the inode->i_lock and igrab() in
@@ -1673,7 +1660,6 @@ xfs_fs_free_cached_objects(
1673static const struct super_operations xfs_super_operations = { 1660static const struct super_operations xfs_super_operations = {
1674 .alloc_inode = xfs_fs_alloc_inode, 1661 .alloc_inode = xfs_fs_alloc_inode,
1675 .destroy_inode = xfs_fs_destroy_inode, 1662 .destroy_inode = xfs_fs_destroy_inode,
1676 .evict_inode = xfs_fs_evict_inode,
1677 .drop_inode = xfs_fs_drop_inode, 1663 .drop_inode = xfs_fs_drop_inode,
1678 .put_super = xfs_fs_put_super, 1664 .put_super = xfs_fs_put_super,
1679 .sync_fs = xfs_fs_sync_fs, 1665 .sync_fs = xfs_fs_sync_fs,