diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-07-17 17:57:20 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-17 17:57:20 -0400 |
commit | 393d81aa026e19b6ede6f5f11955c97ee62e5df5 (patch) | |
tree | a1d9511e488e19d41089ff0a736f6ce52a81c6e5 /fs/ubifs/shrinker.c | |
parent | 93a0886e2368eafb9df5e2021fb185195cee88b2 (diff) | |
parent | 5b664cb235e97afbf34db9c4d77f08ebd725335e (diff) |
Merge branch 'linus' into xen-64bit
Diffstat (limited to 'fs/ubifs/shrinker.c')
-rw-r--r-- | fs/ubifs/shrinker.c | 322 |
1 files changed, 322 insertions, 0 deletions
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c new file mode 100644 index 000000000000..f248533841a2 --- /dev/null +++ b/fs/ubifs/shrinker.c | |||
@@ -0,0 +1,322 @@ | |||
1 | /* | ||
2 | * This file is part of UBIFS. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License version 2 as published by | ||
8 | * the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License along with | ||
16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | * | ||
19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
20 | * Adrian Hunter | ||
21 | */ | ||
22 | |||
23 | /* | ||
24 | * This file implements UBIFS shrinker which evicts clean znodes from the TNC | ||
25 | * tree when Linux VM needs more RAM. | ||
26 | * | ||
27 | * We do not implement any LRU lists to find oldest znodes to free because it | ||
28 | * would add additional overhead to the file system fast paths. So the shrinker | ||
29 | * just walks the TNC tree when searching for znodes to free. | ||
30 | * | ||
31 | * If the root of a TNC sub-tree is clean and old enough, then the children are | ||
32 | * also clean and old enough. So the shrinker walks the TNC in level order and | ||
33 | * dumps entire sub-trees. | ||
34 | * | ||
35 | * The age of znodes is just the time-stamp when they were last looked at. | ||
36 | * The current shrinker first tries to evict old znodes, then young ones. | ||
37 | * | ||
38 | * Since the shrinker is global, it has to protect against races with FS | ||
39 | * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'. | ||
40 | */ | ||
41 | |||
42 | #include "ubifs.h" | ||
43 | |||
44 | /* List of all UBIFS file-system instances */ | ||
45 | LIST_HEAD(ubifs_infos); | ||
46 | |||
47 | /* | ||
48 | * We number each shrinker run and record the number on the ubifs_info structure | ||
49 | * so that we can easily work out which ubifs_info structures have already been | ||
50 | * done by the current run. | ||
51 | */ | ||
52 | static unsigned int shrinker_run_no; | ||
53 | |||
54 | /* Protects 'ubifs_infos' list */ | ||
55 | DEFINE_SPINLOCK(ubifs_infos_lock); | ||
56 | |||
57 | /* Global clean znode counter (for all mounted UBIFS instances) */ | ||
58 | atomic_long_t ubifs_clean_zn_cnt; | ||
59 | |||
60 | /** | ||
61 | * shrink_tnc - shrink TNC tree. | ||
62 | * @c: UBIFS file-system description object | ||
63 | * @nr: number of znodes to free | ||
64 | * @age: the age of znodes to free | ||
65 | * @contention: if any contention, this is set to %1 | ||
66 | * | ||
67 | * This function traverses TNC tree and frees clean znodes. It does not free | ||
68 | * clean znodes which younger then @age. Returns number of freed znodes. | ||
69 | */ | ||
70 | static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) | ||
71 | { | ||
72 | int total_freed = 0; | ||
73 | struct ubifs_znode *znode, *zprev; | ||
74 | int time = get_seconds(); | ||
75 | |||
76 | ubifs_assert(mutex_is_locked(&c->umount_mutex)); | ||
77 | ubifs_assert(mutex_is_locked(&c->tnc_mutex)); | ||
78 | |||
79 | if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0) | ||
80 | return 0; | ||
81 | |||
82 | /* | ||
83 | * Traverse the TNC tree in levelorder manner, so that it is possible | ||
84 | * to destroy large sub-trees. Indeed, if a znode is old, then all its | ||
85 | * children are older or of the same age. | ||
86 | * | ||
87 | * Note, we are holding 'c->tnc_mutex', so we do not have to lock the | ||
88 | * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is | ||
89 | * changed only when the 'c->tnc_mutex' is held. | ||
90 | */ | ||
91 | zprev = NULL; | ||
92 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); | ||
93 | while (znode && total_freed < nr && | ||
94 | atomic_long_read(&c->clean_zn_cnt) > 0) { | ||
95 | int freed; | ||
96 | |||
97 | /* | ||
98 | * If the znode is clean, but it is in the 'c->cnext' list, this | ||
99 | * means that this znode has just been written to flash as a | ||
100 | * part of commit and was marked clean. They will be removed | ||
101 | * from the list at end commit. We cannot change the list, | ||
102 | * because it is not protected by any mutex (design decision to | ||
103 | * make commit really independent and parallel to main I/O). So | ||
104 | * we just skip these znodes. | ||
105 | * | ||
106 | * Note, the 'clean_zn_cnt' counters are not updated until | ||
107 | * after the commit, so the UBIFS shrinker does not report | ||
108 | * the znodes which are in the 'c->cnext' list as freeable. | ||
109 | * | ||
110 | * Also note, if the root of a sub-tree is not in 'c->cnext', | ||
111 | * then the whole sub-tree is not in 'c->cnext' as well, so it | ||
112 | * is safe to dump whole sub-tree. | ||
113 | */ | ||
114 | |||
115 | if (znode->cnext) { | ||
116 | /* | ||
117 | * Very soon these znodes will be removed from the list | ||
118 | * and become freeable. | ||
119 | */ | ||
120 | *contention = 1; | ||
121 | } else if (!ubifs_zn_dirty(znode) && | ||
122 | abs(time - znode->time) >= age) { | ||
123 | if (znode->parent) | ||
124 | znode->parent->zbranch[znode->iip].znode = NULL; | ||
125 | else | ||
126 | c->zroot.znode = NULL; | ||
127 | |||
128 | freed = ubifs_destroy_tnc_subtree(znode); | ||
129 | atomic_long_sub(freed, &ubifs_clean_zn_cnt); | ||
130 | atomic_long_sub(freed, &c->clean_zn_cnt); | ||
131 | ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); | ||
132 | total_freed += freed; | ||
133 | znode = zprev; | ||
134 | } | ||
135 | |||
136 | if (unlikely(!c->zroot.znode)) | ||
137 | break; | ||
138 | |||
139 | zprev = znode; | ||
140 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); | ||
141 | cond_resched(); | ||
142 | } | ||
143 | |||
144 | return total_freed; | ||
145 | } | ||
146 | |||
147 | /** | ||
148 | * shrink_tnc_trees - shrink UBIFS TNC trees. | ||
149 | * @nr: number of znodes to free | ||
150 | * @age: the age of znodes to free | ||
151 | * @contention: if any contention, this is set to %1 | ||
152 | * | ||
153 | * This function walks the list of mounted UBIFS file-systems and frees clean | ||
154 | * znodes which are older then @age, until at least @nr znodes are freed. | ||
155 | * Returns the number of freed znodes. | ||
156 | */ | ||
157 | static int shrink_tnc_trees(int nr, int age, int *contention) | ||
158 | { | ||
159 | struct ubifs_info *c; | ||
160 | struct list_head *p; | ||
161 | unsigned int run_no; | ||
162 | int freed = 0; | ||
163 | |||
164 | spin_lock(&ubifs_infos_lock); | ||
165 | do { | ||
166 | run_no = ++shrinker_run_no; | ||
167 | } while (run_no == 0); | ||
168 | /* Iterate over all mounted UBIFS file-systems and try to shrink them */ | ||
169 | p = ubifs_infos.next; | ||
170 | while (p != &ubifs_infos) { | ||
171 | c = list_entry(p, struct ubifs_info, infos_list); | ||
172 | /* | ||
173 | * We move the ones we do to the end of the list, so we stop | ||
174 | * when we see one we have already done. | ||
175 | */ | ||
176 | if (c->shrinker_run_no == run_no) | ||
177 | break; | ||
178 | if (!mutex_trylock(&c->umount_mutex)) { | ||
179 | /* Some un-mount is in progress, try next FS */ | ||
180 | *contention = 1; | ||
181 | p = p->next; | ||
182 | continue; | ||
183 | } | ||
184 | /* | ||
185 | * We're holding 'c->umount_mutex', so the file-system won't go | ||
186 | * away. | ||
187 | */ | ||
188 | if (!mutex_trylock(&c->tnc_mutex)) { | ||
189 | mutex_unlock(&c->umount_mutex); | ||
190 | *contention = 1; | ||
191 | p = p->next; | ||
192 | continue; | ||
193 | } | ||
194 | spin_unlock(&ubifs_infos_lock); | ||
195 | /* | ||
196 | * OK, now we have TNC locked, the file-system cannot go away - | ||
197 | * it is safe to reap the cache. | ||
198 | */ | ||
199 | c->shrinker_run_no = run_no; | ||
200 | freed += shrink_tnc(c, nr, age, contention); | ||
201 | mutex_unlock(&c->tnc_mutex); | ||
202 | spin_lock(&ubifs_infos_lock); | ||
203 | /* Get the next list element before we move this one */ | ||
204 | p = p->next; | ||
205 | /* | ||
206 | * Move this one to the end of the list to provide some | ||
207 | * fairness. | ||
208 | */ | ||
209 | list_del(&c->infos_list); | ||
210 | list_add_tail(&c->infos_list, &ubifs_infos); | ||
211 | mutex_unlock(&c->umount_mutex); | ||
212 | if (freed >= nr) | ||
213 | break; | ||
214 | } | ||
215 | spin_unlock(&ubifs_infos_lock); | ||
216 | return freed; | ||
217 | } | ||
218 | |||
219 | /** | ||
220 | * kick_a_thread - kick a background thread to start commit. | ||
221 | * | ||
222 | * This function kicks a background thread to start background commit. Returns | ||
223 | * %-1 if a thread was kicked or there is another reason to assume the memory | ||
224 | * will soon be freed or become freeable. If there are no dirty znodes, returns | ||
225 | * %0. | ||
226 | */ | ||
227 | static int kick_a_thread(void) | ||
228 | { | ||
229 | int i; | ||
230 | struct ubifs_info *c; | ||
231 | |||
232 | /* | ||
233 | * Iterate over all mounted UBIFS file-systems and find out if there is | ||
234 | * already an ongoing commit operation there. If no, then iterate for | ||
235 | * the second time and initiate background commit. | ||
236 | */ | ||
237 | spin_lock(&ubifs_infos_lock); | ||
238 | for (i = 0; i < 2; i++) { | ||
239 | list_for_each_entry(c, &ubifs_infos, infos_list) { | ||
240 | long dirty_zn_cnt; | ||
241 | |||
242 | if (!mutex_trylock(&c->umount_mutex)) { | ||
243 | /* | ||
244 | * Some un-mount is in progress, it will | ||
245 | * certainly free memory, so just return. | ||
246 | */ | ||
247 | spin_unlock(&ubifs_infos_lock); | ||
248 | return -1; | ||
249 | } | ||
250 | |||
251 | dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); | ||
252 | |||
253 | if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || | ||
254 | c->ro_media) { | ||
255 | mutex_unlock(&c->umount_mutex); | ||
256 | continue; | ||
257 | } | ||
258 | |||
259 | if (c->cmt_state != COMMIT_RESTING) { | ||
260 | spin_unlock(&ubifs_infos_lock); | ||
261 | mutex_unlock(&c->umount_mutex); | ||
262 | return -1; | ||
263 | } | ||
264 | |||
265 | if (i == 1) { | ||
266 | list_del(&c->infos_list); | ||
267 | list_add_tail(&c->infos_list, &ubifs_infos); | ||
268 | spin_unlock(&ubifs_infos_lock); | ||
269 | |||
270 | ubifs_request_bg_commit(c); | ||
271 | mutex_unlock(&c->umount_mutex); | ||
272 | return -1; | ||
273 | } | ||
274 | mutex_unlock(&c->umount_mutex); | ||
275 | } | ||
276 | } | ||
277 | spin_unlock(&ubifs_infos_lock); | ||
278 | |||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | int ubifs_shrinker(int nr, gfp_t gfp_mask) | ||
283 | { | ||
284 | int freed, contention = 0; | ||
285 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); | ||
286 | |||
287 | if (nr == 0) | ||
288 | return clean_zn_cnt; | ||
289 | |||
290 | if (!clean_zn_cnt) { | ||
291 | /* | ||
292 | * No clean znodes, nothing to reap. All we can do in this case | ||
293 | * is to kick background threads to start commit, which will | ||
294 | * probably make clean znodes which, in turn, will be freeable. | ||
295 | * And we return -1 which means will make VM call us again | ||
296 | * later. | ||
297 | */ | ||
298 | dbg_tnc("no clean znodes, kick a thread"); | ||
299 | return kick_a_thread(); | ||
300 | } | ||
301 | |||
302 | freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention); | ||
303 | if (freed >= nr) | ||
304 | goto out; | ||
305 | |||
306 | dbg_tnc("not enough old znodes, try to free young ones"); | ||
307 | freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention); | ||
308 | if (freed >= nr) | ||
309 | goto out; | ||
310 | |||
311 | dbg_tnc("not enough young znodes, free all"); | ||
312 | freed += shrink_tnc_trees(nr - freed, 0, &contention); | ||
313 | |||
314 | if (!freed && contention) { | ||
315 | dbg_tnc("freed nothing, but contention"); | ||
316 | return -1; | ||
317 | } | ||
318 | |||
319 | out: | ||
320 | dbg_tnc("%d znodes were freed, requested %d", freed, nr); | ||
321 | return freed; | ||
322 | } | ||