aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2010-09-24 04:40:15 -0400
committerAlex Elder <aelder@sgi.com>2010-10-18 16:07:52 -0400
commit65d0f20533c503b50bd5e7e86434512af7761eea (patch)
tree0e04d6898fc0c729886067a3daf79b10a7a19992 /fs
parent69d6cc76cff3573ceefda178b75e20878866fdc3 (diff)
xfs: split inode AG walking into separate code for reclaim
The reclaim walk requires different locking and has a slightly different walk algorithm, so separate it out so that it can be optimised separately. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c202
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h2
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c3
-rw-r--r--fs/xfs/xfs_mount.c26
-rw-r--r--fs/xfs/xfs_mount.h2
6 files changed, 122 insertions, 115 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 3a1d229b4784..b5cdf0ef39ec 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -40,78 +40,46 @@
40#include <linux/freezer.h> 40#include <linux/freezer.h>
41 41
42 42
43STATIC xfs_inode_t *
44xfs_inode_ag_lookup(
45 struct xfs_mount *mp,
46 struct xfs_perag *pag,
47 uint32_t *first_index,
48 int tag)
49{
50 int nr_found;
51 struct xfs_inode *ip;
52
53 /*
54 * use a gang lookup to find the next inode in the tree
55 * as the tree is sparse and a gang lookup walks to find
56 * the number of objects requested.
57 */
58 if (tag == XFS_ICI_NO_TAG) {
59 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
60 (void **)&ip, *first_index, 1);
61 } else {
62 nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
63 (void **)&ip, *first_index, 1, tag);
64 }
65 if (!nr_found)
66 return NULL;
67
68 /*
69 * Update the index for the next lookup. Catch overflows
70 * into the next AG range which can occur if we have inodes
71 * in the last block of the AG and we are currently
72 * pointing to the last inode.
73 */
74 *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
75 if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
76 return NULL;
77 return ip;
78}
79
80STATIC int 43STATIC int
81xfs_inode_ag_walk( 44xfs_inode_ag_walk(
82 struct xfs_mount *mp, 45 struct xfs_mount *mp,
83 struct xfs_perag *pag, 46 struct xfs_perag *pag,
84 int (*execute)(struct xfs_inode *ip, 47 int (*execute)(struct xfs_inode *ip,
85 struct xfs_perag *pag, int flags), 48 struct xfs_perag *pag, int flags),
86 int flags, 49 int flags)
87 int tag,
88 int exclusive,
89 int *nr_to_scan)
90{ 50{
91 uint32_t first_index; 51 uint32_t first_index;
92 int last_error = 0; 52 int last_error = 0;
93 int skipped; 53 int skipped;
54 int done;
94 55
95restart: 56restart:
57 done = 0;
96 skipped = 0; 58 skipped = 0;
97 first_index = 0; 59 first_index = 0;
98 do { 60 do {
99 int error = 0; 61 int error = 0;
62 int nr_found;
100 xfs_inode_t *ip; 63 xfs_inode_t *ip;
101 64
102 if (exclusive) 65 read_lock(&pag->pag_ici_lock);
103 write_lock(&pag->pag_ici_lock); 66 nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
104 else 67 (void **)&ip, first_index, 1);
105 read_lock(&pag->pag_ici_lock); 68 if (!nr_found) {
106 ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); 69 read_unlock(&pag->pag_ici_lock);
107 if (!ip) {
108 if (exclusive)
109 write_unlock(&pag->pag_ici_lock);
110 else
111 read_unlock(&pag->pag_ici_lock);
112 break; 70 break;
113 } 71 }
114 72
73 /*
74 * Update the index for the next lookup. Catch overflows
75 * into the next AG range which can occur if we have inodes
76 * in the last block of the AG and we are currently
77 * pointing to the last inode.
78 */
79 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
80 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
81 done = 1;
82
115 /* execute releases pag->pag_ici_lock */ 83 /* execute releases pag->pag_ici_lock */
116 error = execute(ip, pag, flags); 84 error = execute(ip, pag, flags);
117 if (error == EAGAIN) { 85 if (error == EAGAIN) {
@@ -125,7 +93,7 @@ restart:
125 if (error == EFSCORRUPTED) 93 if (error == EFSCORRUPTED)
126 break; 94 break;
127 95
128 } while ((*nr_to_scan)--); 96 } while (!done);
129 97
130 if (skipped) { 98 if (skipped) {
131 delay(1); 99 delay(1);
@@ -134,73 +102,29 @@ restart:
134 return last_error; 102 return last_error;
135} 103}
136 104
137/*
138 * Select the next per-ag structure to iterate during the walk. The reclaim
139 * walk is optimised only to walk AGs with reclaimable inodes in them.
140 */
141static struct xfs_perag *
142xfs_inode_ag_iter_next_pag(
143 struct xfs_mount *mp,
144 xfs_agnumber_t *first,
145 int tag)
146{
147 struct xfs_perag *pag = NULL;
148
149 if (tag == XFS_ICI_RECLAIM_TAG) {
150 int found;
151 int ref;
152
153 rcu_read_lock();
154 found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
155 (void **)&pag, *first, 1, tag);
156 if (found <= 0) {
157 rcu_read_unlock();
158 return NULL;
159 }
160 *first = pag->pag_agno + 1;
161 /* open coded pag reference increment */
162 ref = atomic_inc_return(&pag->pag_ref);
163 rcu_read_unlock();
164 trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
165 } else {
166 pag = xfs_perag_get(mp, *first);
167 (*first)++;
168 }
169 return pag;
170}
171
172int 105int
173xfs_inode_ag_iterator( 106xfs_inode_ag_iterator(
174 struct xfs_mount *mp, 107 struct xfs_mount *mp,
175 int (*execute)(struct xfs_inode *ip, 108 int (*execute)(struct xfs_inode *ip,
176 struct xfs_perag *pag, int flags), 109 struct xfs_perag *pag, int flags),
177 int flags, 110 int flags)
178 int tag,
179 int exclusive,
180 int *nr_to_scan)
181{ 111{
182 struct xfs_perag *pag; 112 struct xfs_perag *pag;
183 int error = 0; 113 int error = 0;
184 int last_error = 0; 114 int last_error = 0;
185 xfs_agnumber_t ag; 115 xfs_agnumber_t ag;
186 int nr;
187 116
188 nr = nr_to_scan ? *nr_to_scan : INT_MAX;
189 ag = 0; 117 ag = 0;
190 while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) { 118 while ((pag = xfs_perag_get(mp, ag))) {
191 error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, 119 ag = pag->pag_agno + 1;
192 exclusive, &nr); 120 error = xfs_inode_ag_walk(mp, pag, execute, flags);
193 xfs_perag_put(pag); 121 xfs_perag_put(pag);
194 if (error) { 122 if (error) {
195 last_error = error; 123 last_error = error;
196 if (error == EFSCORRUPTED) 124 if (error == EFSCORRUPTED)
197 break; 125 break;
198 } 126 }
199 if (nr <= 0)
200 break;
201 } 127 }
202 if (nr_to_scan)
203 *nr_to_scan = nr;
204 return XFS_ERROR(last_error); 128 return XFS_ERROR(last_error);
205} 129}
206 130
@@ -318,8 +242,7 @@ xfs_sync_data(
318 242
319 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); 243 ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
320 244
321 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, 245 error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
322 XFS_ICI_NO_TAG, 0, NULL);
323 if (error) 246 if (error)
324 return XFS_ERROR(error); 247 return XFS_ERROR(error);
325 248
@@ -337,8 +260,7 @@ xfs_sync_attr(
337{ 260{
338 ASSERT((flags & ~SYNC_WAIT) == 0); 261 ASSERT((flags & ~SYNC_WAIT) == 0);
339 262
340 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, 263 return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
341 XFS_ICI_NO_TAG, 0, NULL);
342} 264}
343 265
344STATIC int 266STATIC int
@@ -868,13 +790,72 @@ reclaim:
868 790
869} 791}
870 792
793/*
794 * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
795 * corrupted, we still want to try to reclaim all the inodes. If we don't,
796 * then a shut down during filesystem unmount reclaim walk leak all the
797 * unreclaimed inodes.
798 */
799int
800xfs_reclaim_inodes_ag(
801 struct xfs_mount *mp,
802 int flags,
803 int *nr_to_scan)
804{
805 struct xfs_perag *pag;
806 int error = 0;
807 int last_error = 0;
808 xfs_agnumber_t ag;
809
810 ag = 0;
811 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
812 unsigned long first_index = 0;
813 int done = 0;
814
815 ag = pag->pag_agno + 1;
816
817 do {
818 struct xfs_inode *ip;
819 int nr_found;
820
821 write_lock(&pag->pag_ici_lock);
822 nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
823 (void **)&ip, first_index, 1,
824 XFS_ICI_RECLAIM_TAG);
825 if (!nr_found) {
826 write_unlock(&pag->pag_ici_lock);
827 break;
828 }
829
830 /*
831 * Update the index for the next lookup. Catch overflows
832 * into the next AG range which can occur if we have inodes
833 * in the last block of the AG and we are currently
834 * pointing to the last inode.
835 */
836 first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
837 if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
838 done = 1;
839
840 error = xfs_reclaim_inode(ip, pag, flags);
841 if (error && last_error != EFSCORRUPTED)
842 last_error = error;
843
844 } while (!done && (*nr_to_scan)--);
845
846 xfs_perag_put(pag);
847 }
848 return XFS_ERROR(last_error);
849}
850
871int 851int
872xfs_reclaim_inodes( 852xfs_reclaim_inodes(
873 xfs_mount_t *mp, 853 xfs_mount_t *mp,
874 int mode) 854 int mode)
875{ 855{
876 return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, 856 int nr_to_scan = INT_MAX;
877 XFS_ICI_RECLAIM_TAG, 1, NULL); 857
858 return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
878} 859}
879 860
880/* 861/*
@@ -896,17 +877,16 @@ xfs_reclaim_inode_shrink(
896 if (!(gfp_mask & __GFP_FS)) 877 if (!(gfp_mask & __GFP_FS))
897 return -1; 878 return -1;
898 879
899 xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, 880 xfs_reclaim_inodes_ag(mp, 0, &nr_to_scan);
900 XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); 881 /* terminate if we don't exhaust the scan */
901 /* if we don't exhaust the scan, don't bother coming back */
902 if (nr_to_scan > 0) 882 if (nr_to_scan > 0)
903 return -1; 883 return -1;
904 } 884 }
905 885
906 reclaimable = 0; 886 reclaimable = 0;
907 ag = 0; 887 ag = 0;
908 while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, 888 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
909 XFS_ICI_RECLAIM_TAG))) { 889 ag = pag->pag_agno + 1;
910 reclaimable += pag->pag_ici_reclaimable; 890 reclaimable += pag->pag_ici_reclaimable;
911 xfs_perag_put(pag); 891 xfs_perag_put(pag);
912 } 892 }
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index fe78726196f8..e8a352896d20 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -50,7 +50,7 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
50int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); 50int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
51int xfs_inode_ag_iterator(struct xfs_mount *mp, 51int xfs_inode_ag_iterator(struct xfs_mount *mp,
52 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 52 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
53 int flags, int tag, int write_lock, int *nr_to_scan); 53 int flags);
54 54
55void xfs_inode_shrinker_register(struct xfs_mount *mp); 55void xfs_inode_shrinker_register(struct xfs_mount *mp);
56void xfs_inode_shrinker_unregister(struct xfs_mount *mp); 56void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 2a1d4fbd9ed8..286dc201c5b9 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \
124 unsigned long caller_ip), \ 124 unsigned long caller_ip), \
125 TP_ARGS(mp, agno, refcount, caller_ip)) 125 TP_ARGS(mp, agno, refcount, caller_ip))
126DEFINE_PERAG_REF_EVENT(xfs_perag_get); 126DEFINE_PERAG_REF_EVENT(xfs_perag_get);
127DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim); 127DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
128DEFINE_PERAG_REF_EVENT(xfs_perag_put); 128DEFINE_PERAG_REF_EVENT(xfs_perag_put);
129DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); 129DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
130DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); 130DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 7a71336f7922..ac11fbef37fc 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -918,8 +918,7 @@ xfs_qm_dqrele_all_inodes(
918 uint flags) 918 uint flags)
919{ 919{
920 ASSERT(mp->m_quotainfo); 920 ASSERT(mp->m_quotainfo);
921 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, 921 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
922 XFS_ICI_NO_TAG, 0, NULL);
923} 922}
924 923
925/*------------------------------------------------------------------------*/ 924/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 912101d280bf..d66e87c7c3a6 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -219,6 +219,32 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
219 return pag; 219 return pag;
220} 220}
221 221
222/*
223 * search from @first to find the next perag with the given tag set.
224 */
225struct xfs_perag *
226xfs_perag_get_tag(
227 struct xfs_mount *mp,
228 xfs_agnumber_t first,
229 int tag)
230{
231 struct xfs_perag *pag;
232 int found;
233 int ref;
234
235 rcu_read_lock();
236 found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
237 (void **)&pag, first, 1, tag);
238 if (found <= 0) {
239 rcu_read_unlock();
240 return NULL;
241 }
242 ref = atomic_inc_return(&pag->pag_ref);
243 rcu_read_unlock();
244 trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_);
245 return pag;
246}
247
222void 248void
223xfs_perag_put(struct xfs_perag *pag) 249xfs_perag_put(struct xfs_perag *pag)
224{ 250{
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 622da2179a57..7ab240930ba5 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -327,6 +327,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
327 * perag get/put wrappers for ref counting 327 * perag get/put wrappers for ref counting
328 */ 328 */
329struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); 329struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
330struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno,
331 int tag);
330void xfs_perag_put(struct xfs_perag *pag); 332void xfs_perag_put(struct xfs_perag *pag);
331 333
332/* 334/*