diff options
Diffstat (limited to 'fs/ubifs/shrinker.c')
| -rw-r--r-- | fs/ubifs/shrinker.c | 322 |
1 files changed, 322 insertions, 0 deletions
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c new file mode 100644 index 000000000000..f248533841a2 --- /dev/null +++ b/fs/ubifs/shrinker.c | |||
| @@ -0,0 +1,322 @@ | |||
| 1 | /* | ||
| 2 | * This file is part of UBIFS. | ||
| 3 | * | ||
| 4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or modify it | ||
| 7 | * under the terms of the GNU General Public License version 2 as published by | ||
| 8 | * the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
| 11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
| 12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
| 13 | * more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public License along with | ||
| 16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
| 17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
| 18 | * | ||
| 19 | * Authors: Artem Bityutskiy (Битюцкий Артём) | ||
| 20 | * Adrian Hunter | ||
| 21 | */ | ||
| 22 | |||
| 23 | /* | ||
| 24 | * This file implements UBIFS shrinker which evicts clean znodes from the TNC | ||
| 25 | * tree when Linux VM needs more RAM. | ||
| 26 | * | ||
| 27 | * We do not implement any LRU lists to find oldest znodes to free because it | ||
| 28 | * would add additional overhead to the file system fast paths. So the shrinker | ||
| 29 | * just walks the TNC tree when searching for znodes to free. | ||
| 30 | * | ||
| 31 | * If the root of a TNC sub-tree is clean and old enough, then the children are | ||
| 32 | * also clean and old enough. So the shrinker walks the TNC in level order and | ||
| 33 | * dumps entire sub-trees. | ||
| 34 | * | ||
| 35 | * The age of znodes is just the time-stamp when they were last looked at. | ||
| 36 | * The current shrinker first tries to evict old znodes, then young ones. | ||
| 37 | * | ||
| 38 | * Since the shrinker is global, it has to protect against races with FS | ||
| 39 | * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'. | ||
| 40 | */ | ||
| 41 | |||
| 42 | #include "ubifs.h" | ||
| 43 | |||
| 44 | /* List of all UBIFS file-system instances */ | ||
| 45 | LIST_HEAD(ubifs_infos); | ||
| 46 | |||
| 47 | /* | ||
| 48 | * We number each shrinker run and record the number on the ubifs_info structure | ||
| 49 | * so that we can easily work out which ubifs_info structures have already been | ||
| 50 | * done by the current run. | ||
| 51 | */ | ||
| 52 | static unsigned int shrinker_run_no; | ||
| 53 | |||
| 54 | /* Protects 'ubifs_infos' list */ | ||
| 55 | DEFINE_SPINLOCK(ubifs_infos_lock); | ||
| 56 | |||
| 57 | /* Global clean znode counter (for all mounted UBIFS instances) */ | ||
| 58 | atomic_long_t ubifs_clean_zn_cnt; | ||
| 59 | |||
| 60 | /** | ||
| 61 | * shrink_tnc - shrink TNC tree. | ||
| 62 | * @c: UBIFS file-system description object | ||
| 63 | * @nr: number of znodes to free | ||
| 64 | * @age: the age of znodes to free | ||
| 65 | * @contention: if any contention, this is set to %1 | ||
| 66 | * | ||
| 67 | * This function traverses TNC tree and frees clean znodes. It does not free | ||
| 68 | * clean znodes which younger then @age. Returns number of freed znodes. | ||
| 69 | */ | ||
| 70 | static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention) | ||
| 71 | { | ||
| 72 | int total_freed = 0; | ||
| 73 | struct ubifs_znode *znode, *zprev; | ||
| 74 | int time = get_seconds(); | ||
| 75 | |||
| 76 | ubifs_assert(mutex_is_locked(&c->umount_mutex)); | ||
| 77 | ubifs_assert(mutex_is_locked(&c->tnc_mutex)); | ||
| 78 | |||
| 79 | if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0) | ||
| 80 | return 0; | ||
| 81 | |||
| 82 | /* | ||
| 83 | * Traverse the TNC tree in levelorder manner, so that it is possible | ||
| 84 | * to destroy large sub-trees. Indeed, if a znode is old, then all its | ||
| 85 | * children are older or of the same age. | ||
| 86 | * | ||
| 87 | * Note, we are holding 'c->tnc_mutex', so we do not have to lock the | ||
| 88 | * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is | ||
| 89 | * changed only when the 'c->tnc_mutex' is held. | ||
| 90 | */ | ||
| 91 | zprev = NULL; | ||
| 92 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); | ||
| 93 | while (znode && total_freed < nr && | ||
| 94 | atomic_long_read(&c->clean_zn_cnt) > 0) { | ||
| 95 | int freed; | ||
| 96 | |||
| 97 | /* | ||
| 98 | * If the znode is clean, but it is in the 'c->cnext' list, this | ||
| 99 | * means that this znode has just been written to flash as a | ||
| 100 | * part of commit and was marked clean. They will be removed | ||
| 101 | * from the list at end commit. We cannot change the list, | ||
| 102 | * because it is not protected by any mutex (design decision to | ||
| 103 | * make commit really independent and parallel to main I/O). So | ||
| 104 | * we just skip these znodes. | ||
| 105 | * | ||
| 106 | * Note, the 'clean_zn_cnt' counters are not updated until | ||
| 107 | * after the commit, so the UBIFS shrinker does not report | ||
| 108 | * the znodes which are in the 'c->cnext' list as freeable. | ||
| 109 | * | ||
| 110 | * Also note, if the root of a sub-tree is not in 'c->cnext', | ||
| 111 | * then the whole sub-tree is not in 'c->cnext' as well, so it | ||
| 112 | * is safe to dump whole sub-tree. | ||
| 113 | */ | ||
| 114 | |||
| 115 | if (znode->cnext) { | ||
| 116 | /* | ||
| 117 | * Very soon these znodes will be removed from the list | ||
| 118 | * and become freeable. | ||
| 119 | */ | ||
| 120 | *contention = 1; | ||
| 121 | } else if (!ubifs_zn_dirty(znode) && | ||
| 122 | abs(time - znode->time) >= age) { | ||
| 123 | if (znode->parent) | ||
| 124 | znode->parent->zbranch[znode->iip].znode = NULL; | ||
| 125 | else | ||
| 126 | c->zroot.znode = NULL; | ||
| 127 | |||
| 128 | freed = ubifs_destroy_tnc_subtree(znode); | ||
| 129 | atomic_long_sub(freed, &ubifs_clean_zn_cnt); | ||
| 130 | atomic_long_sub(freed, &c->clean_zn_cnt); | ||
| 131 | ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0); | ||
| 132 | total_freed += freed; | ||
| 133 | znode = zprev; | ||
| 134 | } | ||
| 135 | |||
| 136 | if (unlikely(!c->zroot.znode)) | ||
| 137 | break; | ||
| 138 | |||
| 139 | zprev = znode; | ||
| 140 | znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode); | ||
| 141 | cond_resched(); | ||
| 142 | } | ||
| 143 | |||
| 144 | return total_freed; | ||
| 145 | } | ||
| 146 | |||
| 147 | /** | ||
| 148 | * shrink_tnc_trees - shrink UBIFS TNC trees. | ||
| 149 | * @nr: number of znodes to free | ||
| 150 | * @age: the age of znodes to free | ||
| 151 | * @contention: if any contention, this is set to %1 | ||
| 152 | * | ||
| 153 | * This function walks the list of mounted UBIFS file-systems and frees clean | ||
| 154 | * znodes which are older then @age, until at least @nr znodes are freed. | ||
| 155 | * Returns the number of freed znodes. | ||
| 156 | */ | ||
| 157 | static int shrink_tnc_trees(int nr, int age, int *contention) | ||
| 158 | { | ||
| 159 | struct ubifs_info *c; | ||
| 160 | struct list_head *p; | ||
| 161 | unsigned int run_no; | ||
| 162 | int freed = 0; | ||
| 163 | |||
| 164 | spin_lock(&ubifs_infos_lock); | ||
| 165 | do { | ||
| 166 | run_no = ++shrinker_run_no; | ||
| 167 | } while (run_no == 0); | ||
| 168 | /* Iterate over all mounted UBIFS file-systems and try to shrink them */ | ||
| 169 | p = ubifs_infos.next; | ||
| 170 | while (p != &ubifs_infos) { | ||
| 171 | c = list_entry(p, struct ubifs_info, infos_list); | ||
| 172 | /* | ||
| 173 | * We move the ones we do to the end of the list, so we stop | ||
| 174 | * when we see one we have already done. | ||
| 175 | */ | ||
| 176 | if (c->shrinker_run_no == run_no) | ||
| 177 | break; | ||
| 178 | if (!mutex_trylock(&c->umount_mutex)) { | ||
| 179 | /* Some un-mount is in progress, try next FS */ | ||
| 180 | *contention = 1; | ||
| 181 | p = p->next; | ||
| 182 | continue; | ||
| 183 | } | ||
| 184 | /* | ||
| 185 | * We're holding 'c->umount_mutex', so the file-system won't go | ||
| 186 | * away. | ||
| 187 | */ | ||
| 188 | if (!mutex_trylock(&c->tnc_mutex)) { | ||
| 189 | mutex_unlock(&c->umount_mutex); | ||
| 190 | *contention = 1; | ||
| 191 | p = p->next; | ||
| 192 | continue; | ||
| 193 | } | ||
| 194 | spin_unlock(&ubifs_infos_lock); | ||
| 195 | /* | ||
| 196 | * OK, now we have TNC locked, the file-system cannot go away - | ||
| 197 | * it is safe to reap the cache. | ||
| 198 | */ | ||
| 199 | c->shrinker_run_no = run_no; | ||
| 200 | freed += shrink_tnc(c, nr, age, contention); | ||
| 201 | mutex_unlock(&c->tnc_mutex); | ||
| 202 | spin_lock(&ubifs_infos_lock); | ||
| 203 | /* Get the next list element before we move this one */ | ||
| 204 | p = p->next; | ||
| 205 | /* | ||
| 206 | * Move this one to the end of the list to provide some | ||
| 207 | * fairness. | ||
| 208 | */ | ||
| 209 | list_del(&c->infos_list); | ||
| 210 | list_add_tail(&c->infos_list, &ubifs_infos); | ||
| 211 | mutex_unlock(&c->umount_mutex); | ||
| 212 | if (freed >= nr) | ||
| 213 | break; | ||
| 214 | } | ||
| 215 | spin_unlock(&ubifs_infos_lock); | ||
| 216 | return freed; | ||
| 217 | } | ||
| 218 | |||
| 219 | /** | ||
| 220 | * kick_a_thread - kick a background thread to start commit. | ||
| 221 | * | ||
| 222 | * This function kicks a background thread to start background commit. Returns | ||
| 223 | * %-1 if a thread was kicked or there is another reason to assume the memory | ||
| 224 | * will soon be freed or become freeable. If there are no dirty znodes, returns | ||
| 225 | * %0. | ||
| 226 | */ | ||
| 227 | static int kick_a_thread(void) | ||
| 228 | { | ||
| 229 | int i; | ||
| 230 | struct ubifs_info *c; | ||
| 231 | |||
| 232 | /* | ||
| 233 | * Iterate over all mounted UBIFS file-systems and find out if there is | ||
| 234 | * already an ongoing commit operation there. If no, then iterate for | ||
| 235 | * the second time and initiate background commit. | ||
| 236 | */ | ||
| 237 | spin_lock(&ubifs_infos_lock); | ||
| 238 | for (i = 0; i < 2; i++) { | ||
| 239 | list_for_each_entry(c, &ubifs_infos, infos_list) { | ||
| 240 | long dirty_zn_cnt; | ||
| 241 | |||
| 242 | if (!mutex_trylock(&c->umount_mutex)) { | ||
| 243 | /* | ||
| 244 | * Some un-mount is in progress, it will | ||
| 245 | * certainly free memory, so just return. | ||
| 246 | */ | ||
| 247 | spin_unlock(&ubifs_infos_lock); | ||
| 248 | return -1; | ||
| 249 | } | ||
| 250 | |||
| 251 | dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); | ||
| 252 | |||
| 253 | if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || | ||
| 254 | c->ro_media) { | ||
| 255 | mutex_unlock(&c->umount_mutex); | ||
| 256 | continue; | ||
| 257 | } | ||
| 258 | |||
| 259 | if (c->cmt_state != COMMIT_RESTING) { | ||
| 260 | spin_unlock(&ubifs_infos_lock); | ||
| 261 | mutex_unlock(&c->umount_mutex); | ||
| 262 | return -1; | ||
| 263 | } | ||
| 264 | |||
| 265 | if (i == 1) { | ||
| 266 | list_del(&c->infos_list); | ||
| 267 | list_add_tail(&c->infos_list, &ubifs_infos); | ||
| 268 | spin_unlock(&ubifs_infos_lock); | ||
| 269 | |||
| 270 | ubifs_request_bg_commit(c); | ||
| 271 | mutex_unlock(&c->umount_mutex); | ||
| 272 | return -1; | ||
| 273 | } | ||
| 274 | mutex_unlock(&c->umount_mutex); | ||
| 275 | } | ||
| 276 | } | ||
| 277 | spin_unlock(&ubifs_infos_lock); | ||
| 278 | |||
| 279 | return 0; | ||
| 280 | } | ||
| 281 | |||
| 282 | int ubifs_shrinker(int nr, gfp_t gfp_mask) | ||
| 283 | { | ||
| 284 | int freed, contention = 0; | ||
| 285 | long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); | ||
| 286 | |||
| 287 | if (nr == 0) | ||
| 288 | return clean_zn_cnt; | ||
| 289 | |||
| 290 | if (!clean_zn_cnt) { | ||
| 291 | /* | ||
| 292 | * No clean znodes, nothing to reap. All we can do in this case | ||
| 293 | * is to kick background threads to start commit, which will | ||
| 294 | * probably make clean znodes which, in turn, will be freeable. | ||
| 295 | * And we return -1 which means will make VM call us again | ||
| 296 | * later. | ||
| 297 | */ | ||
| 298 | dbg_tnc("no clean znodes, kick a thread"); | ||
| 299 | return kick_a_thread(); | ||
| 300 | } | ||
| 301 | |||
| 302 | freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention); | ||
| 303 | if (freed >= nr) | ||
| 304 | goto out; | ||
| 305 | |||
| 306 | dbg_tnc("not enough old znodes, try to free young ones"); | ||
| 307 | freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention); | ||
| 308 | if (freed >= nr) | ||
| 309 | goto out; | ||
| 310 | |||
| 311 | dbg_tnc("not enough young znodes, free all"); | ||
| 312 | freed += shrink_tnc_trees(nr - freed, 0, &contention); | ||
| 313 | |||
| 314 | if (!freed && contention) { | ||
| 315 | dbg_tnc("freed nothing, but contention"); | ||
| 316 | return -1; | ||
| 317 | } | ||
| 318 | |||
| 319 | out: | ||
| 320 | dbg_tnc("%d znodes were freed, requested %d", freed, nr); | ||
| 321 | return freed; | ||
| 322 | } | ||
