diff options
author | Dave Chinner <dchinner@redhat.com> | 2010-09-24 04:40:15 -0400 |
---|---|---|
committer | Alex Elder <aelder@sgi.com> | 2010-10-18 16:07:52 -0400 |
commit | 65d0f20533c503b50bd5e7e86434512af7761eea (patch) | |
tree | 0e04d6898fc0c729886067a3daf79b10a7a19992 | |
parent | 69d6cc76cff3573ceefda178b75e20878866fdc3 (diff) |
xfs: split inode AG walking into separate code for reclaim
The reclaim walk requires different locking and has a slightly
different walk algorithm, so separate it out so that it can be
optimised separately.
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Alex Elder <aelder@sgi.com>
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 202 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.h | 2 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_trace.h | 2 | ||||
-rw-r--r-- | fs/xfs/quota/xfs_qm_syscalls.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 26 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 2 |
6 files changed, 122 insertions, 115 deletions
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 3a1d229b4784..b5cdf0ef39ec 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -40,78 +40,46 @@ | |||
40 | #include <linux/freezer.h> | 40 | #include <linux/freezer.h> |
41 | 41 | ||
42 | 42 | ||
43 | STATIC xfs_inode_t * | ||
44 | xfs_inode_ag_lookup( | ||
45 | struct xfs_mount *mp, | ||
46 | struct xfs_perag *pag, | ||
47 | uint32_t *first_index, | ||
48 | int tag) | ||
49 | { | ||
50 | int nr_found; | ||
51 | struct xfs_inode *ip; | ||
52 | |||
53 | /* | ||
54 | * use a gang lookup to find the next inode in the tree | ||
55 | * as the tree is sparse and a gang lookup walks to find | ||
56 | * the number of objects requested. | ||
57 | */ | ||
58 | if (tag == XFS_ICI_NO_TAG) { | ||
59 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, | ||
60 | (void **)&ip, *first_index, 1); | ||
61 | } else { | ||
62 | nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, | ||
63 | (void **)&ip, *first_index, 1, tag); | ||
64 | } | ||
65 | if (!nr_found) | ||
66 | return NULL; | ||
67 | |||
68 | /* | ||
69 | * Update the index for the next lookup. Catch overflows | ||
70 | * into the next AG range which can occur if we have inodes | ||
71 | * in the last block of the AG and we are currently | ||
72 | * pointing to the last inode. | ||
73 | */ | ||
74 | *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | ||
75 | if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | ||
76 | return NULL; | ||
77 | return ip; | ||
78 | } | ||
79 | |||
80 | STATIC int | 43 | STATIC int |
81 | xfs_inode_ag_walk( | 44 | xfs_inode_ag_walk( |
82 | struct xfs_mount *mp, | 45 | struct xfs_mount *mp, |
83 | struct xfs_perag *pag, | 46 | struct xfs_perag *pag, |
84 | int (*execute)(struct xfs_inode *ip, | 47 | int (*execute)(struct xfs_inode *ip, |
85 | struct xfs_perag *pag, int flags), | 48 | struct xfs_perag *pag, int flags), |
86 | int flags, | 49 | int flags) |
87 | int tag, | ||
88 | int exclusive, | ||
89 | int *nr_to_scan) | ||
90 | { | 50 | { |
91 | uint32_t first_index; | 51 | uint32_t first_index; |
92 | int last_error = 0; | 52 | int last_error = 0; |
93 | int skipped; | 53 | int skipped; |
54 | int done; | ||
94 | 55 | ||
95 | restart: | 56 | restart: |
57 | done = 0; | ||
96 | skipped = 0; | 58 | skipped = 0; |
97 | first_index = 0; | 59 | first_index = 0; |
98 | do { | 60 | do { |
99 | int error = 0; | 61 | int error = 0; |
62 | int nr_found; | ||
100 | xfs_inode_t *ip; | 63 | xfs_inode_t *ip; |
101 | 64 | ||
102 | if (exclusive) | 65 | read_lock(&pag->pag_ici_lock); |
103 | write_lock(&pag->pag_ici_lock); | 66 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, |
104 | else | 67 | (void **)&ip, first_index, 1); |
105 | read_lock(&pag->pag_ici_lock); | 68 | if (!nr_found) { |
106 | ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); | 69 | read_unlock(&pag->pag_ici_lock); |
107 | if (!ip) { | ||
108 | if (exclusive) | ||
109 | write_unlock(&pag->pag_ici_lock); | ||
110 | else | ||
111 | read_unlock(&pag->pag_ici_lock); | ||
112 | break; | 70 | break; |
113 | } | 71 | } |
114 | 72 | ||
73 | /* | ||
74 | * Update the index for the next lookup. Catch overflows | ||
75 | * into the next AG range which can occur if we have inodes | ||
76 | * in the last block of the AG and we are currently | ||
77 | * pointing to the last inode. | ||
78 | */ | ||
79 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | ||
80 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | ||
81 | done = 1; | ||
82 | |||
115 | /* execute releases pag->pag_ici_lock */ | 83 | /* execute releases pag->pag_ici_lock */ |
116 | error = execute(ip, pag, flags); | 84 | error = execute(ip, pag, flags); |
117 | if (error == EAGAIN) { | 85 | if (error == EAGAIN) { |
@@ -125,7 +93,7 @@ restart: | |||
125 | if (error == EFSCORRUPTED) | 93 | if (error == EFSCORRUPTED) |
126 | break; | 94 | break; |
127 | 95 | ||
128 | } while ((*nr_to_scan)--); | 96 | } while (!done); |
129 | 97 | ||
130 | if (skipped) { | 98 | if (skipped) { |
131 | delay(1); | 99 | delay(1); |
@@ -134,73 +102,29 @@ restart: | |||
134 | return last_error; | 102 | return last_error; |
135 | } | 103 | } |
136 | 104 | ||
137 | /* | ||
138 | * Select the next per-ag structure to iterate during the walk. The reclaim | ||
139 | * walk is optimised only to walk AGs with reclaimable inodes in them. | ||
140 | */ | ||
141 | static struct xfs_perag * | ||
142 | xfs_inode_ag_iter_next_pag( | ||
143 | struct xfs_mount *mp, | ||
144 | xfs_agnumber_t *first, | ||
145 | int tag) | ||
146 | { | ||
147 | struct xfs_perag *pag = NULL; | ||
148 | |||
149 | if (tag == XFS_ICI_RECLAIM_TAG) { | ||
150 | int found; | ||
151 | int ref; | ||
152 | |||
153 | rcu_read_lock(); | ||
154 | found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, | ||
155 | (void **)&pag, *first, 1, tag); | ||
156 | if (found <= 0) { | ||
157 | rcu_read_unlock(); | ||
158 | return NULL; | ||
159 | } | ||
160 | *first = pag->pag_agno + 1; | ||
161 | /* open coded pag reference increment */ | ||
162 | ref = atomic_inc_return(&pag->pag_ref); | ||
163 | rcu_read_unlock(); | ||
164 | trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_); | ||
165 | } else { | ||
166 | pag = xfs_perag_get(mp, *first); | ||
167 | (*first)++; | ||
168 | } | ||
169 | return pag; | ||
170 | } | ||
171 | |||
172 | int | 105 | int |
173 | xfs_inode_ag_iterator( | 106 | xfs_inode_ag_iterator( |
174 | struct xfs_mount *mp, | 107 | struct xfs_mount *mp, |
175 | int (*execute)(struct xfs_inode *ip, | 108 | int (*execute)(struct xfs_inode *ip, |
176 | struct xfs_perag *pag, int flags), | 109 | struct xfs_perag *pag, int flags), |
177 | int flags, | 110 | int flags) |
178 | int tag, | ||
179 | int exclusive, | ||
180 | int *nr_to_scan) | ||
181 | { | 111 | { |
182 | struct xfs_perag *pag; | 112 | struct xfs_perag *pag; |
183 | int error = 0; | 113 | int error = 0; |
184 | int last_error = 0; | 114 | int last_error = 0; |
185 | xfs_agnumber_t ag; | 115 | xfs_agnumber_t ag; |
186 | int nr; | ||
187 | 116 | ||
188 | nr = nr_to_scan ? *nr_to_scan : INT_MAX; | ||
189 | ag = 0; | 117 | ag = 0; |
190 | while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) { | 118 | while ((pag = xfs_perag_get(mp, ag))) { |
191 | error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, | 119 | ag = pag->pag_agno + 1; |
192 | exclusive, &nr); | 120 | error = xfs_inode_ag_walk(mp, pag, execute, flags); |
193 | xfs_perag_put(pag); | 121 | xfs_perag_put(pag); |
194 | if (error) { | 122 | if (error) { |
195 | last_error = error; | 123 | last_error = error; |
196 | if (error == EFSCORRUPTED) | 124 | if (error == EFSCORRUPTED) |
197 | break; | 125 | break; |
198 | } | 126 | } |
199 | if (nr <= 0) | ||
200 | break; | ||
201 | } | 127 | } |
202 | if (nr_to_scan) | ||
203 | *nr_to_scan = nr; | ||
204 | return XFS_ERROR(last_error); | 128 | return XFS_ERROR(last_error); |
205 | } | 129 | } |
206 | 130 | ||
@@ -318,8 +242,7 @@ xfs_sync_data( | |||
318 | 242 | ||
319 | ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); | 243 | ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); |
320 | 244 | ||
321 | error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, | 245 | error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags); |
322 | XFS_ICI_NO_TAG, 0, NULL); | ||
323 | if (error) | 246 | if (error) |
324 | return XFS_ERROR(error); | 247 | return XFS_ERROR(error); |
325 | 248 | ||
@@ -337,8 +260,7 @@ xfs_sync_attr( | |||
337 | { | 260 | { |
338 | ASSERT((flags & ~SYNC_WAIT) == 0); | 261 | ASSERT((flags & ~SYNC_WAIT) == 0); |
339 | 262 | ||
340 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, | 263 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags); |
341 | XFS_ICI_NO_TAG, 0, NULL); | ||
342 | } | 264 | } |
343 | 265 | ||
344 | STATIC int | 266 | STATIC int |
@@ -868,13 +790,72 @@ reclaim: | |||
868 | 790 | ||
869 | } | 791 | } |
870 | 792 | ||
793 | /* | ||
794 | * Walk the AGs and reclaim the inodes in them. Even if the filesystem is | ||
795 | * corrupted, we still want to try to reclaim all the inodes. If we don't, | ||
796 | * then a shut down during filesystem unmount reclaim walk leak all the | ||
797 | * unreclaimed inodes. | ||
798 | */ | ||
799 | int | ||
800 | xfs_reclaim_inodes_ag( | ||
801 | struct xfs_mount *mp, | ||
802 | int flags, | ||
803 | int *nr_to_scan) | ||
804 | { | ||
805 | struct xfs_perag *pag; | ||
806 | int error = 0; | ||
807 | int last_error = 0; | ||
808 | xfs_agnumber_t ag; | ||
809 | |||
810 | ag = 0; | ||
811 | while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { | ||
812 | unsigned long first_index = 0; | ||
813 | int done = 0; | ||
814 | |||
815 | ag = pag->pag_agno + 1; | ||
816 | |||
817 | do { | ||
818 | struct xfs_inode *ip; | ||
819 | int nr_found; | ||
820 | |||
821 | write_lock(&pag->pag_ici_lock); | ||
822 | nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, | ||
823 | (void **)&ip, first_index, 1, | ||
824 | XFS_ICI_RECLAIM_TAG); | ||
825 | if (!nr_found) { | ||
826 | write_unlock(&pag->pag_ici_lock); | ||
827 | break; | ||
828 | } | ||
829 | |||
830 | /* | ||
831 | * Update the index for the next lookup. Catch overflows | ||
832 | * into the next AG range which can occur if we have inodes | ||
833 | * in the last block of the AG and we are currently | ||
834 | * pointing to the last inode. | ||
835 | */ | ||
836 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | ||
837 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | ||
838 | done = 1; | ||
839 | |||
840 | error = xfs_reclaim_inode(ip, pag, flags); | ||
841 | if (error && last_error != EFSCORRUPTED) | ||
842 | last_error = error; | ||
843 | |||
844 | } while (!done && (*nr_to_scan)--); | ||
845 | |||
846 | xfs_perag_put(pag); | ||
847 | } | ||
848 | return XFS_ERROR(last_error); | ||
849 | } | ||
850 | |||
871 | int | 851 | int |
872 | xfs_reclaim_inodes( | 852 | xfs_reclaim_inodes( |
873 | xfs_mount_t *mp, | 853 | xfs_mount_t *mp, |
874 | int mode) | 854 | int mode) |
875 | { | 855 | { |
876 | return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, | 856 | int nr_to_scan = INT_MAX; |
877 | XFS_ICI_RECLAIM_TAG, 1, NULL); | 857 | |
858 | return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan); | ||
878 | } | 859 | } |
879 | 860 | ||
880 | /* | 861 | /* |
@@ -896,17 +877,16 @@ xfs_reclaim_inode_shrink( | |||
896 | if (!(gfp_mask & __GFP_FS)) | 877 | if (!(gfp_mask & __GFP_FS)) |
897 | return -1; | 878 | return -1; |
898 | 879 | ||
899 | xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, | 880 | xfs_reclaim_inodes_ag(mp, 0, &nr_to_scan); |
900 | XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); | 881 | /* terminate if we don't exhaust the scan */ |
901 | /* if we don't exhaust the scan, don't bother coming back */ | ||
902 | if (nr_to_scan > 0) | 882 | if (nr_to_scan > 0) |
903 | return -1; | 883 | return -1; |
904 | } | 884 | } |
905 | 885 | ||
906 | reclaimable = 0; | 886 | reclaimable = 0; |
907 | ag = 0; | 887 | ag = 0; |
908 | while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, | 888 | while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { |
909 | XFS_ICI_RECLAIM_TAG))) { | 889 | ag = pag->pag_agno + 1; |
910 | reclaimable += pag->pag_ici_reclaimable; | 890 | reclaimable += pag->pag_ici_reclaimable; |
911 | xfs_perag_put(pag); | 891 | xfs_perag_put(pag); |
912 | } | 892 | } |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index fe78726196f8..e8a352896d20 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -50,7 +50,7 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, | |||
50 | int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); | 50 | int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); |
51 | int xfs_inode_ag_iterator(struct xfs_mount *mp, | 51 | int xfs_inode_ag_iterator(struct xfs_mount *mp, |
52 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), | 52 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), |
53 | int flags, int tag, int write_lock, int *nr_to_scan); | 53 | int flags); |
54 | 54 | ||
55 | void xfs_inode_shrinker_register(struct xfs_mount *mp); | 55 | void xfs_inode_shrinker_register(struct xfs_mount *mp); |
56 | void xfs_inode_shrinker_unregister(struct xfs_mount *mp); | 56 | void xfs_inode_shrinker_unregister(struct xfs_mount *mp); |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 2a1d4fbd9ed8..286dc201c5b9 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \ | |||
124 | unsigned long caller_ip), \ | 124 | unsigned long caller_ip), \ |
125 | TP_ARGS(mp, agno, refcount, caller_ip)) | 125 | TP_ARGS(mp, agno, refcount, caller_ip)) |
126 | DEFINE_PERAG_REF_EVENT(xfs_perag_get); | 126 | DEFINE_PERAG_REF_EVENT(xfs_perag_get); |
127 | DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim); | 127 | DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); |
128 | DEFINE_PERAG_REF_EVENT(xfs_perag_put); | 128 | DEFINE_PERAG_REF_EVENT(xfs_perag_put); |
129 | DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); | 129 | DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); |
130 | DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); | 130 | DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); |
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 7a71336f7922..ac11fbef37fc 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
@@ -918,8 +918,7 @@ xfs_qm_dqrele_all_inodes( | |||
918 | uint flags) | 918 | uint flags) |
919 | { | 919 | { |
920 | ASSERT(mp->m_quotainfo); | 920 | ASSERT(mp->m_quotainfo); |
921 | xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, | 921 | xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags); |
922 | XFS_ICI_NO_TAG, 0, NULL); | ||
923 | } | 922 | } |
924 | 923 | ||
925 | /*------------------------------------------------------------------------*/ | 924 | /*------------------------------------------------------------------------*/ |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 912101d280bf..d66e87c7c3a6 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -219,6 +219,32 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) | |||
219 | return pag; | 219 | return pag; |
220 | } | 220 | } |
221 | 221 | ||
222 | /* | ||
223 | * search from @first to find the next perag with the given tag set. | ||
224 | */ | ||
225 | struct xfs_perag * | ||
226 | xfs_perag_get_tag( | ||
227 | struct xfs_mount *mp, | ||
228 | xfs_agnumber_t first, | ||
229 | int tag) | ||
230 | { | ||
231 | struct xfs_perag *pag; | ||
232 | int found; | ||
233 | int ref; | ||
234 | |||
235 | rcu_read_lock(); | ||
236 | found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, | ||
237 | (void **)&pag, first, 1, tag); | ||
238 | if (found <= 0) { | ||
239 | rcu_read_unlock(); | ||
240 | return NULL; | ||
241 | } | ||
242 | ref = atomic_inc_return(&pag->pag_ref); | ||
243 | rcu_read_unlock(); | ||
244 | trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_); | ||
245 | return pag; | ||
246 | } | ||
247 | |||
222 | void | 248 | void |
223 | xfs_perag_put(struct xfs_perag *pag) | 249 | xfs_perag_put(struct xfs_perag *pag) |
224 | { | 250 | { |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 622da2179a57..7ab240930ba5 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -327,6 +327,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) | |||
327 | * perag get/put wrappers for ref counting | 327 | * perag get/put wrappers for ref counting |
328 | */ | 328 | */ |
329 | struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); | 329 | struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); |
330 | struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
331 | int tag); | ||
330 | void xfs_perag_put(struct xfs_perag *pag); | 332 | void xfs_perag_put(struct xfs_perag *pag); |
331 | 333 | ||
332 | /* | 334 | /* |