aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_sync.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_sync.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c413
1 files changed, 239 insertions, 174 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 81976ffed7d6..37d33254981d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -39,42 +39,39 @@
39#include <linux/kthread.h> 39#include <linux/kthread.h>
40#include <linux/freezer.h> 40#include <linux/freezer.h>
41 41
42/*
43 * The inode lookup is done in batches to keep the amount of lock traffic and
44 * radix tree lookups to a minimum. The batch size is a trade off between
45 * lookup reduction and stack usage. This is in the reclaim path, so we can't
46 * be too greedy.
47 */
48#define XFS_LOOKUP_BATCH 32
42 49
43STATIC xfs_inode_t * 50STATIC int
44xfs_inode_ag_lookup( 51xfs_inode_ag_walk_grab(
45 struct xfs_mount *mp, 52 struct xfs_inode *ip)
46 struct xfs_perag *pag,
47 uint32_t *first_index,
48 int tag)
49{ 53{
50 int nr_found; 54 struct inode *inode = VFS_I(ip);
51 struct xfs_inode *ip;
52 55
53 /* 56 /* nothing to sync during shutdown */
54 * use a gang lookup to find the next inode in the tree 57 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
55 * as the tree is sparse and a gang lookup walks to find 58 return EFSCORRUPTED;
56 * the number of objects requested. 59
57 */ 60 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
58 if (tag == XFS_ICI_NO_TAG) { 61 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
59 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, 62 return ENOENT;
60 (void **)&ip, *first_index, 1); 63
61 } else { 64 /* If we can't grab the inode, it must on it's way to reclaim. */
62 nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, 65 if (!igrab(inode))
63 (void **)&ip, *first_index, 1, tag); 66 return ENOENT;
67
68 if (is_bad_inode(inode)) {
69 IRELE(ip);
70 return ENOENT;
64 } 71 }
65 if (!nr_found)
66 return NULL;
67 72
68 /* 73 /* inode is valid */
69 * Update the index for the next lookup. Catch overflows 74 return 0;
70 * into the next AG range which can occur if we have inodes
71 * in the last block of the AG and we are currently
72 * pointing to the last inode.
73 */
74 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
75 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
76 return NULL;
77 return ip;
78} 75}
79 76
80STATIC int 77STATIC int
@@ -83,49 +80,75 @@ xfs_inode_ag_walk(
83 struct xfs_perag *pag, 80 struct xfs_perag *pag,
84 int (*execute)(struct xfs_inode *ip, 81 int (*execute)(struct xfs_inode *ip,
85 struct xfs_perag *pag, int flags), 82 struct xfs_perag *pag, int flags),
86 int flags, 83 int flags)
87 int tag,
88 int exclusive,
89 int *nr_to_scan)
90{ 84{
91 uint32_t first_index; 85 uint32_t first_index;
92 int last_error = 0; 86 int last_error = 0;
93 int skipped; 87 int skipped;
88 int done;
89 int nr_found;
94 90
95restart: 91restart:
92 done = 0;
96 skipped = 0; 93 skipped = 0;
97 first_index = 0; 94 first_index = 0;
95 nr_found = 0;
98 do { 96 do {
97 struct xfs_inode *batch[XFS_LOOKUP_BATCH];
99 int error = 0; 98 int error = 0;
100 xfs_inode_t *ip; 99 int i;
101 100
102 if (exclusive) 101 read_lock(&pag->pag_ici_lock);
103 write_lock(&pag->pag_ici_lock); 102 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
104 else 103 (void **)batch, first_index,
105 read_lock(&pag->pag_ici_lock); 104 XFS_LOOKUP_BATCH);
106 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); 105 if (!nr_found) {
107 if (!ip) { 106 read_unlock(&pag->pag_ici_lock);
108 if (exclusive)
109 write_unlock(&pag->pag_ici_lock);
110 else
111 read_unlock(&pag->pag_ici_lock);
112 break; 107 break;
113 } 108 }
114 109
115 /* execute releases pag->pag_ici_lock */ 110 /*
116 error = execute(ip, pag, flags); 111 * Grab the inodes before we drop the lock. if we found
117 if (error == EAGAIN) { 112 * nothing, nr == 0 and the loop will be skipped.
118 skipped++; 113 */
119 continue; 114 for (i = 0; i < nr_found; i++) {
115 struct xfs_inode *ip = batch[i];
116
117 if (done || xfs_inode_ag_walk_grab(ip))
118 batch[i] = NULL;
119
120 /*
121 * Update the index for the next lookup. Catch overflows
122 * into the next AG range which can occur if we have inodes
123 * in the last block of the AG and we are currently
124 * pointing to the last inode.
125 */
126 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
127 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
128 done = 1;
129 }
130
131 /* unlock now we've grabbed the inodes. */
132 read_unlock(&pag->pag_ici_lock);
133
134 for (i = 0; i < nr_found; i++) {
135 if (!batch[i])
136 continue;
137 error = execute(batch[i], pag, flags);
138 IRELE(batch[i]);
139 if (error == EAGAIN) {
140 skipped++;
141 continue;
142 }
143 if (error && last_error != EFSCORRUPTED)
144 last_error = error;
120 } 145 }
121 if (error)
122 last_error = error;
123 146
124 /* bail out if the filesystem is corrupted. */ 147 /* bail out if the filesystem is corrupted. */
125 if (error == EFSCORRUPTED) 148 if (error == EFSCORRUPTED)
126 break; 149 break;
127 150
128 } while ((*nr_to_scan)--); 151 } while (nr_found && !done);
129 152
130 if (skipped) { 153 if (skipped) {
131 delay(1); 154 delay(1);
@@ -134,110 +157,32 @@ restart:
134 return last_error; 157 return last_error;
135} 158}
136 159
137/*
138 * Select the next per-ag structure to iterate during the walk. The reclaim
139 * walk is optimised only to walk AGs with reclaimable inodes in them.
140 */
141static struct xfs_perag *
142xfs_inode_ag_iter_next_pag(
143 struct xfs_mount *mp,
144 xfs_agnumber_t *first,
145 int tag)
146{
147 struct xfs_perag *pag = NULL;
148
149 if (tag == XFS_ICI_RECLAIM_TAG) {
150 int found;
151 int ref;
152
153 spin_lock(&mp->m_perag_lock);
154 found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
155 (void **)&pag, *first, 1, tag);
156 if (found <= 0) {
157 spin_unlock(&mp->m_perag_lock);
158 return NULL;
159 }
160 *first = pag->pag_agno + 1;
161 /* open coded pag reference increment */
162 ref = atomic_inc_return(&pag->pag_ref);
163 spin_unlock(&mp->m_perag_lock);
164 trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
165 } else {
166 pag = xfs_perag_get(mp, *first);
167 (*first)++;
168 }
169 return pag;
170}
171
172int 160int
173xfs_inode_ag_iterator( 161xfs_inode_ag_iterator(
174 struct xfs_mount *mp, 162 struct xfs_mount *mp,
175 int (*execute)(struct xfs_inode *ip, 163 int (*execute)(struct xfs_inode *ip,
176 struct xfs_perag *pag, int flags), 164 struct xfs_perag *pag, int flags),
177 int flags, 165 int flags)
178 int tag,
179 int exclusive,
180 int *nr_to_scan)
181{ 166{
182 struct xfs_perag *pag; 167 struct xfs_perag *pag;
183 int error = 0; 168 int error = 0;
184 int last_error = 0; 169 int last_error = 0;
185 xfs_agnumber_t ag; 170 xfs_agnumber_t ag;
186 int nr;
187 171
188 nr = nr_to_scan ? *nr_to_scan : INT_MAX;
189 ag = 0; 172 ag = 0;
190 while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) { 173 while ((pag = xfs_perag_get(mp, ag))) {
191 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, 174 ag = pag->pag_agno + 1;
192 exclusive, &nr); 175 error = xfs_inode_ag_walk(mp, pag, execute, flags);
193 xfs_perag_put(pag); 176 xfs_perag_put(pag);
194 if (error) { 177 if (error) {
195 last_error = error; 178 last_error = error;
196 if (error == EFSCORRUPTED) 179 if (error == EFSCORRUPTED)
197 break; 180 break;
198 } 181 }
199 if (nr <= 0)
200 break;
201 } 182 }
202 if (nr_to_scan)
203 *nr_to_scan = nr;
204 return XFS_ERROR(last_error); 183 return XFS_ERROR(last_error);
205} 184}
206 185
207/* must be called with pag_ici_lock held and releases it */
208int
209xfs_sync_inode_valid(
210 struct xfs_inode *ip,
211 struct xfs_perag *pag)
212{
213 struct inode *inode = VFS_I(ip);
214 int error = EFSCORRUPTED;
215
216 /* nothing to sync during shutdown */
217 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
218 goto out_unlock;
219
220 /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
221 error = ENOENT;
222 if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
223 goto out_unlock;
224
225 /* If we can't grab the inode, it must on it's way to reclaim. */
226 if (!igrab(inode))
227 goto out_unlock;
228
229 if (is_bad_inode(inode)) {
230 IRELE(ip);
231 goto out_unlock;
232 }
233
234 /* inode is valid */
235 error = 0;
236out_unlock:
237 read_unlock(&pag->pag_ici_lock);
238 return error;
239}
240
241STATIC int 186STATIC int
242xfs_sync_inode_data( 187xfs_sync_inode_data(
243 struct xfs_inode *ip, 188 struct xfs_inode *ip,
@@ -248,10 +193,6 @@ xfs_sync_inode_data(
248 struct address_space *mapping = inode->i_mapping; 193 struct address_space *mapping = inode->i_mapping;
249 int error = 0; 194 int error = 0;
250 195
251 error = xfs_sync_inode_valid(ip, pag);
252 if (error)
253 return error;
254
255 if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 196 if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
256 goto out_wait; 197 goto out_wait;
257 198
@@ -268,7 +209,6 @@ xfs_sync_inode_data(
268 out_wait: 209 out_wait:
269 if (flags & SYNC_WAIT) 210 if (flags & SYNC_WAIT)
270 xfs_ioend_wait(ip); 211 xfs_ioend_wait(ip);
271 IRELE(ip);
272 return error; 212 return error;
273} 213}
274 214
@@ -280,10 +220,6 @@ xfs_sync_inode_attr(
280{ 220{
281 int error = 0; 221 int error = 0;
282 222
283 error = xfs_sync_inode_valid(ip, pag);
284 if (error)
285 return error;
286
287 xfs_ilock(ip, XFS_ILOCK_SHARED); 223 xfs_ilock(ip, XFS_ILOCK_SHARED);
288 if (xfs_inode_clean(ip)) 224 if (xfs_inode_clean(ip))
289 goto out_unlock; 225 goto out_unlock;
@@ -302,7 +238,6 @@ xfs_sync_inode_attr(
302 238
303 out_unlock: 239 out_unlock:
304 xfs_iunlock(ip, XFS_ILOCK_SHARED); 240 xfs_iunlock(ip, XFS_ILOCK_SHARED);
305 IRELE(ip);
306 return error; 241 return error;
307} 242}
308 243
@@ -318,8 +253,7 @@ xfs_sync_data(
318 253
319 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 254 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
320 255
321 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 256 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
322 XFS_ICI_NO_TAG, 0, NULL);
323 if (error) 257 if (error)
324 return XFS_ERROR(error); 258 return XFS_ERROR(error);
325 259
@@ -337,8 +271,7 @@ xfs_sync_attr(
337{ 271{
338 ASSERT((flags & ~SYNC_WAIT) == 0); 272 ASSERT((flags & ~SYNC_WAIT) == 0);
339 273
340 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 274 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
341 XFS_ICI_NO_TAG, 0, NULL);
342} 275}
343 276
344STATIC int 277STATIC int
@@ -698,6 +631,43 @@ __xfs_inode_clear_reclaim_tag(
698} 631}
699 632
700/* 633/*
634 * Grab the inode for reclaim exclusively.
635 * Return 0 if we grabbed it, non-zero otherwise.
636 */
637STATIC int
638xfs_reclaim_inode_grab(
639 struct xfs_inode *ip,
640 int flags)
641{
642
643 /*
644 * do some unlocked checks first to avoid unnecceary lock traffic.
645 * The first is a flush lock check, the second is a already in reclaim
646 * check. Only do these checks if we are not going to block on locks.
647 */
648 if ((flags & SYNC_TRYLOCK) &&
649 (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
650 return 1;
651 }
652
653 /*
654 * The radix tree lock here protects a thread in xfs_iget from racing
655 * with us starting reclaim on the inode. Once we have the
656 * XFS_IRECLAIM flag set it will not touch us.
657 */
658 spin_lock(&ip->i_flags_lock);
659 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
660 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
661 /* ignore as it is already under reclaim */
662 spin_unlock(&ip->i_flags_lock);
663 return 1;
664 }
665 __xfs_iflags_set(ip, XFS_IRECLAIM);
666 spin_unlock(&ip->i_flags_lock);
667 return 0;
668}
669
670/*
701 * Inodes in different states need to be treated differently, and the return 671 * Inodes in different states need to be treated differently, and the return
702 * value of xfs_iflush is not sufficient to get this right. The following table 672 * value of xfs_iflush is not sufficient to get this right. The following table
703 * lists the inode states and the reclaim actions necessary for non-blocking 673 * lists the inode states and the reclaim actions necessary for non-blocking
@@ -755,23 +725,6 @@ xfs_reclaim_inode(
755{ 725{
756 int error = 0; 726 int error = 0;
757 727
758 /*
759 * The radix tree lock here protects a thread in xfs_iget from racing
760 * with us starting reclaim on the inode. Once we have the
761 * XFS_IRECLAIM flag set it will not touch us.
762 */
763 spin_lock(&ip->i_flags_lock);
764 ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
765 if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
766 /* ignore as it is already under reclaim */
767 spin_unlock(&ip->i_flags_lock);
768 write_unlock(&pag->pag_ici_lock);
769 return 0;
770 }
771 __xfs_iflags_set(ip, XFS_IRECLAIM);
772 spin_unlock(&ip->i_flags_lock);
773 write_unlock(&pag->pag_ici_lock);
774
775 xfs_ilock(ip, XFS_ILOCK_EXCL); 728 xfs_ilock(ip, XFS_ILOCK_EXCL);
776 if (!xfs_iflock_nowait(ip)) { 729 if (!xfs_iflock_nowait(ip)) {
777 if (!(sync_mode & SYNC_WAIT)) 730 if (!(sync_mode & SYNC_WAIT))
@@ -868,13 +821,126 @@ reclaim:
868 821
869} 822}
870 823
824/*
825 * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
826 * corrupted, we still want to try to reclaim all the inodes. If we don't,
827 * then a shut down during filesystem unmount reclaim walk leak all the
828 * unreclaimed inodes.
829 */
830int
831xfs_reclaim_inodes_ag(
832 struct xfs_mount *mp,
833 int flags,
834 int *nr_to_scan)
835{
836 struct xfs_perag *pag;
837 int error = 0;
838 int last_error = 0;
839 xfs_agnumber_t ag;
840 int trylock = flags & SYNC_TRYLOCK;
841 int skipped;
842
843restart:
844 ag = 0;
845 skipped = 0;
846 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
847 unsigned long first_index = 0;
848 int done = 0;
849 int nr_found = 0;
850
851 ag = pag->pag_agno + 1;
852
853 if (trylock) {
854 if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
855 skipped++;
856 continue;
857 }
858 first_index = pag->pag_ici_reclaim_cursor;
859 } else
860 mutex_lock(&pag->pag_ici_reclaim_lock);
861
862 do {
863 struct xfs_inode *batch[XFS_LOOKUP_BATCH];
864 int i;
865
866 write_lock(&pag->pag_ici_lock);
867 nr_found = radix_tree_gang_lookup_tag(
868 &pag->pag_ici_root,
869 (void **)batch, first_index,
870 XFS_LOOKUP_BATCH,
871 XFS_ICI_RECLAIM_TAG);
872 if (!nr_found) {
873 write_unlock(&pag->pag_ici_lock);
874 break;
875 }
876
877 /*
878 * Grab the inodes before we drop the lock. if we found
879 * nothing, nr == 0 and the loop will be skipped.
880 */
881 for (i = 0; i < nr_found; i++) {
882 struct xfs_inode *ip = batch[i];
883
884 if (done || xfs_reclaim_inode_grab(ip, flags))
885 batch[i] = NULL;
886
887 /*
888 * Update the index for the next lookup. Catch
889 * overflows into the next AG range which can
890 * occur if we have inodes in the last block of
891 * the AG and we are currently pointing to the
892 * last inode.
893 */
894 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
895 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
896 done = 1;
897 }
898
899 /* unlock now we've grabbed the inodes. */
900 write_unlock(&pag->pag_ici_lock);
901
902 for (i = 0; i < nr_found; i++) {
903 if (!batch[i])
904 continue;
905 error = xfs_reclaim_inode(batch[i], pag, flags);
906 if (error && last_error != EFSCORRUPTED)
907 last_error = error;
908 }
909
910 *nr_to_scan -= XFS_LOOKUP_BATCH;
911
912 } while (nr_found && !done && *nr_to_scan > 0);
913
914 if (trylock && !done)
915 pag->pag_ici_reclaim_cursor = first_index;
916 else
917 pag->pag_ici_reclaim_cursor = 0;
918 mutex_unlock(&pag->pag_ici_reclaim_lock);
919 xfs_perag_put(pag);
920 }
921
922 /*
923 * if we skipped any AG, and we still have scan count remaining, do
924 * another pass this time using blocking reclaim semantics (i.e
925 * waiting on the reclaim locks and ignoring the reclaim cursors). This
926 * ensure that when we get more reclaimers than AGs we block rather
927 * than spin trying to execute reclaim.
928 */
929 if (trylock && skipped && *nr_to_scan > 0) {
930 trylock = 0;
931 goto restart;
932 }
933 return XFS_ERROR(last_error);
934}
935
871int 936int
872xfs_reclaim_inodes( 937xfs_reclaim_inodes(
873 xfs_mount_t *mp, 938 xfs_mount_t *mp,
874 int mode) 939 int mode)
875{ 940{
876 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, 941 int nr_to_scan = INT_MAX;
877 XFS_ICI_RECLAIM_TAG, 1, NULL); 942
943 return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
878} 944}
879 945
880/* 946/*
@@ -896,17 +962,16 @@ xfs_reclaim_inode_shrink(
896 if (!(gfp_mask & __GFP_FS)) 962 if (!(gfp_mask & __GFP_FS))
897 return -1; 963 return -1;
898 964
899 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, 965 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
900 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); 966 /* terminate if we don't exhaust the scan */
901 /* if we don't exhaust the scan, don't bother coming back */
902 if (nr_to_scan > 0) 967 if (nr_to_scan > 0)
903 return -1; 968 return -1;
904 } 969 }
905 970
906 reclaimable = 0; 971 reclaimable = 0;
907 ag = 0; 972 ag = 0;
908 while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, 973 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
909 XFS_ICI_RECLAIM_TAG))) { 974 ag = pag->pag_agno + 1;
910 reclaimable += pag->pag_ici_reclaimable; 975 reclaimable += pag->pag_ici_reclaimable;
911 xfs_perag_put(pag); 976 xfs_perag_put(pag);
912 } 977 }