diff options
author | Artem Bityutskiy <Artem.Bityutskiy@nokia.com> | 2008-07-14 12:08:37 -0400 |
---|---|---|
committer | Artem Bityutskiy <Artem.Bityutskiy@nokia.com> | 2008-07-15 10:35:15 -0400 |
commit | 1e51764a3c2ac05a23a22b2a95ddee4d9bffb16d (patch) | |
tree | 919debdd48aef9eee9ff0e8f465ef2649325b993 /fs/ubifs/replay.c | |
parent | e56a99d5a42dcb91e622ae7a0289d8fb2ddabffb (diff) |
UBIFS: add new flash file system
This is a new flash file system. See
http://www.linux-mtd.infradead.org/doc/ubifs.html
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
Diffstat (limited to 'fs/ubifs/replay.c')
-rw-r--r-- | fs/ubifs/replay.c | 1075 |
1 files changed, 1075 insertions, 0 deletions
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c new file mode 100644 index 000000000000..7399692af859 --- /dev/null +++ b/fs/ubifs/replay.c | |||
@@ -0,0 +1,1075 @@ | |||
1 | /* | ||
2 | * This file is part of UBIFS. | ||
3 | * | ||
4 | * Copyright (C) 2006-2008 Nokia Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify it | ||
7 | * under the terms of the GNU General Public License version 2 as published by | ||
8 | * the Free Software Foundation. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
11 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
12 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
13 | * more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License along with | ||
16 | * this program; if not, write to the Free Software Foundation, Inc., 51 | ||
17 | * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
18 | * | ||
19 | * Authors: Adrian Hunter | ||
20 | * Artem Bityutskiy (Битюцкий Артём) | ||
21 | */ | ||
22 | |||
23 | /* | ||
24 | * This file contains journal replay code. It runs when the file-system is being | ||
25 | * mounted and requires no locking. | ||
26 | * | ||
27 | * The larger is the journal, the longer it takes to scan it, so the longer it | ||
28 | * takes to mount UBIFS. This is why the journal has limited size which may be | ||
29 | * changed depending on the system requirements. But a larger journal gives | ||
30 | * faster I/O speed because it writes the index less frequently. So this is a | ||
31 | * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the | ||
32 | * larger is the journal, the more memory its index may consume. | ||
33 | */ | ||
34 | |||
35 | #include "ubifs.h" | ||
36 | |||
37 | /* | ||
38 | * Replay flags. | ||
39 | * | ||
40 | * REPLAY_DELETION: node was deleted | ||
41 | * REPLAY_REF: node is a reference node | ||
42 | */ | ||
43 | enum { | ||
44 | REPLAY_DELETION = 1, | ||
45 | REPLAY_REF = 2, | ||
46 | }; | ||
47 | |||
48 | /** | ||
49 | * struct replay_entry - replay tree entry. | ||
50 | * @lnum: logical eraseblock number of the node | ||
51 | * @offs: node offset | ||
52 | * @len: node length | ||
53 | * @sqnum: node sequence number | ||
54 | * @flags: replay flags | ||
55 | * @rb: links the replay tree | ||
56 | * @key: node key | ||
57 | * @nm: directory entry name | ||
58 | * @old_size: truncation old size | ||
59 | * @new_size: truncation new size | ||
60 | * @free: amount of free space in a bud | ||
61 | * @dirty: amount of dirty space in a bud from padding and deletion nodes | ||
62 | * | ||
63 | * UBIFS journal replay must compare node sequence numbers, which means it must | ||
64 | * build a tree of node information to insert into the TNC. | ||
65 | */ | ||
66 | struct replay_entry { | ||
67 | int lnum; | ||
68 | int offs; | ||
69 | int len; | ||
70 | unsigned long long sqnum; | ||
71 | int flags; | ||
72 | struct rb_node rb; | ||
73 | union ubifs_key key; | ||
74 | union { | ||
75 | struct qstr nm; | ||
76 | struct { | ||
77 | loff_t old_size; | ||
78 | loff_t new_size; | ||
79 | }; | ||
80 | struct { | ||
81 | int free; | ||
82 | int dirty; | ||
83 | }; | ||
84 | }; | ||
85 | }; | ||
86 | |||
87 | /** | ||
88 | * struct bud_entry - entry in the list of buds to replay. | ||
89 | * @list: next bud in the list | ||
90 | * @bud: bud description object | ||
91 | * @free: free bytes in the bud | ||
92 | * @sqnum: reference node sequence number | ||
93 | */ | ||
94 | struct bud_entry { | ||
95 | struct list_head list; | ||
96 | struct ubifs_bud *bud; | ||
97 | int free; | ||
98 | unsigned long long sqnum; | ||
99 | }; | ||
100 | |||
101 | /** | ||
102 | * set_bud_lprops - set free and dirty space used by a bud. | ||
103 | * @c: UBIFS file-system description object | ||
104 | * @r: replay entry of bud | ||
105 | */ | ||
106 | static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) | ||
107 | { | ||
108 | const struct ubifs_lprops *lp; | ||
109 | int err = 0, dirty; | ||
110 | |||
111 | ubifs_get_lprops(c); | ||
112 | |||
113 | lp = ubifs_lpt_lookup_dirty(c, r->lnum); | ||
114 | if (IS_ERR(lp)) { | ||
115 | err = PTR_ERR(lp); | ||
116 | goto out; | ||
117 | } | ||
118 | |||
119 | dirty = lp->dirty; | ||
120 | if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { | ||
121 | /* | ||
122 | * The LEB was added to the journal with a starting offset of | ||
123 | * zero which means the LEB must have been empty. The LEB | ||
124 | * property values should be lp->free == c->leb_size and | ||
125 | * lp->dirty == 0, but that is not the case. The reason is that | ||
126 | * the LEB was garbage collected. The garbage collector resets | ||
127 | * the free and dirty space without recording it anywhere except | ||
128 | * lprops, so if there is not a commit then lprops does not have | ||
129 | * that information next time the file system is mounted. | ||
130 | * | ||
131 | * We do not need to adjust free space because the scan has told | ||
132 | * us the exact value which is recorded in the replay entry as | ||
133 | * r->free. | ||
134 | * | ||
135 | * However we do need to subtract from the dirty space the | ||
136 | * amount of space that the garbage collector reclaimed, which | ||
137 | * is the whole LEB minus the amount of space that was free. | ||
138 | */ | ||
139 | dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, | ||
140 | lp->free, lp->dirty); | ||
141 | dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, | ||
142 | lp->free, lp->dirty); | ||
143 | dirty -= c->leb_size - lp->free; | ||
144 | /* | ||
145 | * If the replay order was perfect the dirty space would now be | ||
146 | * zero. The order is not perfect because the the journal heads | ||
147 | * race with eachother. This is not a problem but is does mean | ||
148 | * that the dirty space may temporarily exceed c->leb_size | ||
149 | * during the replay. | ||
150 | */ | ||
151 | if (dirty != 0) | ||
152 | dbg_msg("LEB %d lp: %d free %d dirty " | ||
153 | "replay: %d free %d dirty", r->lnum, lp->free, | ||
154 | lp->dirty, r->free, r->dirty); | ||
155 | } | ||
156 | lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, | ||
157 | lp->flags | LPROPS_TAKEN, 0); | ||
158 | if (IS_ERR(lp)) { | ||
159 | err = PTR_ERR(lp); | ||
160 | goto out; | ||
161 | } | ||
162 | out: | ||
163 | ubifs_release_lprops(c); | ||
164 | return err; | ||
165 | } | ||
166 | |||
167 | /** | ||
168 | * trun_remove_range - apply a replay entry for a truncation to the TNC. | ||
169 | * @c: UBIFS file-system description object | ||
170 | * @r: replay entry of truncation | ||
171 | */ | ||
172 | static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) | ||
173 | { | ||
174 | unsigned min_blk, max_blk; | ||
175 | union ubifs_key min_key, max_key; | ||
176 | ino_t ino; | ||
177 | |||
178 | min_blk = r->new_size / UBIFS_BLOCK_SIZE; | ||
179 | if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) | ||
180 | min_blk += 1; | ||
181 | |||
182 | max_blk = r->old_size / UBIFS_BLOCK_SIZE; | ||
183 | if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) | ||
184 | max_blk -= 1; | ||
185 | |||
186 | ino = key_inum(c, &r->key); | ||
187 | |||
188 | data_key_init(c, &min_key, ino, min_blk); | ||
189 | data_key_init(c, &max_key, ino, max_blk); | ||
190 | |||
191 | return ubifs_tnc_remove_range(c, &min_key, &max_key); | ||
192 | } | ||
193 | |||
194 | /** | ||
195 | * apply_replay_entry - apply a replay entry to the TNC. | ||
196 | * @c: UBIFS file-system description object | ||
197 | * @r: replay entry to apply | ||
198 | * | ||
199 | * Apply a replay entry to the TNC. | ||
200 | */ | ||
201 | static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) | ||
202 | { | ||
203 | int err, deletion = ((r->flags & REPLAY_DELETION) != 0); | ||
204 | |||
205 | dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, | ||
206 | r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); | ||
207 | |||
208 | /* Set c->replay_sqnum to help deal with dangling branches. */ | ||
209 | c->replay_sqnum = r->sqnum; | ||
210 | |||
211 | if (r->flags & REPLAY_REF) | ||
212 | err = set_bud_lprops(c, r); | ||
213 | else if (is_hash_key(c, &r->key)) { | ||
214 | if (deletion) | ||
215 | err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); | ||
216 | else | ||
217 | err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, | ||
218 | r->len, &r->nm); | ||
219 | } else { | ||
220 | if (deletion) | ||
221 | switch (key_type(c, &r->key)) { | ||
222 | case UBIFS_INO_KEY: | ||
223 | { | ||
224 | ino_t inum = key_inum(c, &r->key); | ||
225 | |||
226 | err = ubifs_tnc_remove_ino(c, inum); | ||
227 | break; | ||
228 | } | ||
229 | case UBIFS_TRUN_KEY: | ||
230 | err = trun_remove_range(c, r); | ||
231 | break; | ||
232 | default: | ||
233 | err = ubifs_tnc_remove(c, &r->key); | ||
234 | break; | ||
235 | } | ||
236 | else | ||
237 | err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, | ||
238 | r->len); | ||
239 | if (err) | ||
240 | return err; | ||
241 | |||
242 | if (c->need_recovery) | ||
243 | err = ubifs_recover_size_accum(c, &r->key, deletion, | ||
244 | r->new_size); | ||
245 | } | ||
246 | |||
247 | return err; | ||
248 | } | ||
249 | |||
250 | /** | ||
251 | * destroy_replay_tree - destroy the replay. | ||
252 | * @c: UBIFS file-system description object | ||
253 | * | ||
254 | * Destroy the replay tree. | ||
255 | */ | ||
256 | static void destroy_replay_tree(struct ubifs_info *c) | ||
257 | { | ||
258 | struct rb_node *this = c->replay_tree.rb_node; | ||
259 | struct replay_entry *r; | ||
260 | |||
261 | while (this) { | ||
262 | if (this->rb_left) { | ||
263 | this = this->rb_left; | ||
264 | continue; | ||
265 | } else if (this->rb_right) { | ||
266 | this = this->rb_right; | ||
267 | continue; | ||
268 | } | ||
269 | r = rb_entry(this, struct replay_entry, rb); | ||
270 | this = rb_parent(this); | ||
271 | if (this) { | ||
272 | if (this->rb_left == &r->rb) | ||
273 | this->rb_left = NULL; | ||
274 | else | ||
275 | this->rb_right = NULL; | ||
276 | } | ||
277 | if (is_hash_key(c, &r->key)) | ||
278 | kfree(r->nm.name); | ||
279 | kfree(r); | ||
280 | } | ||
281 | c->replay_tree = RB_ROOT; | ||
282 | } | ||
283 | |||
284 | /** | ||
285 | * apply_replay_tree - apply the replay tree to the TNC. | ||
286 | * @c: UBIFS file-system description object | ||
287 | * | ||
288 | * Apply the replay tree. | ||
289 | * Returns zero in case of success and a negative error code in case of | ||
290 | * failure. | ||
291 | */ | ||
292 | static int apply_replay_tree(struct ubifs_info *c) | ||
293 | { | ||
294 | struct rb_node *this = rb_first(&c->replay_tree); | ||
295 | |||
296 | while (this) { | ||
297 | struct replay_entry *r; | ||
298 | int err; | ||
299 | |||
300 | cond_resched(); | ||
301 | |||
302 | r = rb_entry(this, struct replay_entry, rb); | ||
303 | err = apply_replay_entry(c, r); | ||
304 | if (err) | ||
305 | return err; | ||
306 | this = rb_next(this); | ||
307 | } | ||
308 | return 0; | ||
309 | } | ||
310 | |||
311 | /** | ||
312 | * insert_node - insert a node to the replay tree. | ||
313 | * @c: UBIFS file-system description object | ||
314 | * @lnum: node logical eraseblock number | ||
315 | * @offs: node offset | ||
316 | * @len: node length | ||
317 | * @key: node key | ||
318 | * @sqnum: sequence number | ||
319 | * @deletion: non-zero if this is a deletion | ||
320 | * @used: number of bytes in use in a LEB | ||
321 | * @old_size: truncation old size | ||
322 | * @new_size: truncation new size | ||
323 | * | ||
324 | * This function inserts a scanned non-direntry node to the replay tree. The | ||
325 | * replay tree is an RB-tree containing @struct replay_entry elements which are | ||
326 | * indexed by the sequence number. The replay tree is applied at the very end | ||
327 | * of the replay process. Since the tree is sorted in sequence number order, | ||
328 | * the older modifications are applied first. This function returns zero in | ||
329 | * case of success and a negative error code in case of failure. | ||
330 | */ | ||
331 | static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, | ||
332 | union ubifs_key *key, unsigned long long sqnum, | ||
333 | int deletion, int *used, loff_t old_size, | ||
334 | loff_t new_size) | ||
335 | { | ||
336 | struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; | ||
337 | struct replay_entry *r; | ||
338 | |||
339 | if (key_inum(c, key) >= c->highest_inum) | ||
340 | c->highest_inum = key_inum(c, key); | ||
341 | |||
342 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
343 | while (*p) { | ||
344 | parent = *p; | ||
345 | r = rb_entry(parent, struct replay_entry, rb); | ||
346 | if (sqnum < r->sqnum) { | ||
347 | p = &(*p)->rb_left; | ||
348 | continue; | ||
349 | } else if (sqnum > r->sqnum) { | ||
350 | p = &(*p)->rb_right; | ||
351 | continue; | ||
352 | } | ||
353 | ubifs_err("duplicate sqnum in replay"); | ||
354 | return -EINVAL; | ||
355 | } | ||
356 | |||
357 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); | ||
358 | if (!r) | ||
359 | return -ENOMEM; | ||
360 | |||
361 | if (!deletion) | ||
362 | *used += ALIGN(len, 8); | ||
363 | r->lnum = lnum; | ||
364 | r->offs = offs; | ||
365 | r->len = len; | ||
366 | r->sqnum = sqnum; | ||
367 | r->flags = (deletion ? REPLAY_DELETION : 0); | ||
368 | r->old_size = old_size; | ||
369 | r->new_size = new_size; | ||
370 | key_copy(c, key, &r->key); | ||
371 | |||
372 | rb_link_node(&r->rb, parent, p); | ||
373 | rb_insert_color(&r->rb, &c->replay_tree); | ||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | /** | ||
378 | * insert_dent - insert a directory entry node into the replay tree. | ||
379 | * @c: UBIFS file-system description object | ||
380 | * @lnum: node logical eraseblock number | ||
381 | * @offs: node offset | ||
382 | * @len: node length | ||
383 | * @key: node key | ||
384 | * @name: directory entry name | ||
385 | * @nlen: directory entry name length | ||
386 | * @sqnum: sequence number | ||
387 | * @deletion: non-zero if this is a deletion | ||
388 | * @used: number of bytes in use in a LEB | ||
389 | * | ||
390 | * This function inserts a scanned directory entry node to the replay tree. | ||
391 | * Returns zero in case of success and a negative error code in case of | ||
392 | * failure. | ||
393 | * | ||
394 | * This function is also used for extended attribute entries because they are | ||
395 | * implemented as directory entry nodes. | ||
396 | */ | ||
397 | static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, | ||
398 | union ubifs_key *key, const char *name, int nlen, | ||
399 | unsigned long long sqnum, int deletion, int *used) | ||
400 | { | ||
401 | struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; | ||
402 | struct replay_entry *r; | ||
403 | char *nbuf; | ||
404 | |||
405 | if (key_inum(c, key) >= c->highest_inum) | ||
406 | c->highest_inum = key_inum(c, key); | ||
407 | |||
408 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
409 | while (*p) { | ||
410 | parent = *p; | ||
411 | r = rb_entry(parent, struct replay_entry, rb); | ||
412 | if (sqnum < r->sqnum) { | ||
413 | p = &(*p)->rb_left; | ||
414 | continue; | ||
415 | } | ||
416 | if (sqnum > r->sqnum) { | ||
417 | p = &(*p)->rb_right; | ||
418 | continue; | ||
419 | } | ||
420 | ubifs_err("duplicate sqnum in replay"); | ||
421 | return -EINVAL; | ||
422 | } | ||
423 | |||
424 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); | ||
425 | if (!r) | ||
426 | return -ENOMEM; | ||
427 | nbuf = kmalloc(nlen + 1, GFP_KERNEL); | ||
428 | if (!nbuf) { | ||
429 | kfree(r); | ||
430 | return -ENOMEM; | ||
431 | } | ||
432 | |||
433 | if (!deletion) | ||
434 | *used += ALIGN(len, 8); | ||
435 | r->lnum = lnum; | ||
436 | r->offs = offs; | ||
437 | r->len = len; | ||
438 | r->sqnum = sqnum; | ||
439 | r->nm.len = nlen; | ||
440 | memcpy(nbuf, name, nlen); | ||
441 | nbuf[nlen] = '\0'; | ||
442 | r->nm.name = nbuf; | ||
443 | r->flags = (deletion ? REPLAY_DELETION : 0); | ||
444 | key_copy(c, key, &r->key); | ||
445 | |||
446 | ubifs_assert(!*p); | ||
447 | rb_link_node(&r->rb, parent, p); | ||
448 | rb_insert_color(&r->rb, &c->replay_tree); | ||
449 | return 0; | ||
450 | } | ||
451 | |||
452 | /** | ||
453 | * ubifs_validate_entry - validate directory or extended attribute entry node. | ||
454 | * @c: UBIFS file-system description object | ||
455 | * @dent: the node to validate | ||
456 | * | ||
457 | * This function validates directory or extended attribute entry node @dent. | ||
458 | * Returns zero if the node is all right and a %-EINVAL if not. | ||
459 | */ | ||
460 | int ubifs_validate_entry(struct ubifs_info *c, | ||
461 | const struct ubifs_dent_node *dent) | ||
462 | { | ||
463 | int key_type = key_type_flash(c, dent->key); | ||
464 | int nlen = le16_to_cpu(dent->nlen); | ||
465 | |||
466 | if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || | ||
467 | dent->type >= UBIFS_ITYPES_CNT || | ||
468 | nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || | ||
469 | strnlen(dent->name, nlen) != nlen || | ||
470 | le64_to_cpu(dent->inum) > MAX_INUM) { | ||
471 | ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? | ||
472 | "directory entry" : "extended attribute entry"); | ||
473 | return -EINVAL; | ||
474 | } | ||
475 | |||
476 | if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { | ||
477 | ubifs_err("bad key type %d", key_type); | ||
478 | return -EINVAL; | ||
479 | } | ||
480 | |||
481 | return 0; | ||
482 | } | ||
483 | |||
484 | /** | ||
485 | * replay_bud - replay a bud logical eraseblock. | ||
486 | * @c: UBIFS file-system description object | ||
487 | * @lnum: bud logical eraseblock number to replay | ||
488 | * @offs: bud start offset | ||
489 | * @jhead: journal head to which this bud belongs | ||
490 | * @free: amount of free space in the bud is returned here | ||
491 | * @dirty: amount of dirty space from padding and deletion nodes is returned | ||
492 | * here | ||
493 | * | ||
494 | * This function returns zero in case of success and a negative error code in | ||
495 | * case of failure. | ||
496 | */ | ||
497 | static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, | ||
498 | int *free, int *dirty) | ||
499 | { | ||
500 | int err = 0, used = 0; | ||
501 | struct ubifs_scan_leb *sleb; | ||
502 | struct ubifs_scan_node *snod; | ||
503 | struct ubifs_bud *bud; | ||
504 | |||
505 | dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); | ||
506 | if (c->need_recovery) | ||
507 | sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); | ||
508 | else | ||
509 | sleb = ubifs_scan(c, lnum, offs, c->sbuf); | ||
510 | if (IS_ERR(sleb)) | ||
511 | return PTR_ERR(sleb); | ||
512 | |||
513 | /* | ||
514 | * The bud does not have to start from offset zero - the beginning of | ||
515 | * the 'lnum' LEB may contain previously committed data. One of the | ||
516 | * things we have to do in replay is to correctly update lprops with | ||
517 | * newer information about this LEB. | ||
518 | * | ||
519 | * At this point lprops thinks that this LEB has 'c->leb_size - offs' | ||
520 | * bytes of free space because it only contain information about | ||
521 | * committed data. | ||
522 | * | ||
523 | * But we know that real amount of free space is 'c->leb_size - | ||
524 | * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and | ||
525 | * 'sleb->endpt' is used by bud data. We have to correctly calculate | ||
526 | * how much of these data are dirty and update lprops with this | ||
527 | * information. | ||
528 | * | ||
529 | * The dirt in that LEB region is comprised of padding nodes, deletion | ||
530 | * nodes, truncation nodes and nodes which are obsoleted by subsequent | ||
531 | * nodes in this LEB. So instead of calculating clean space, we | ||
532 | * calculate used space ('used' variable). | ||
533 | */ | ||
534 | |||
535 | list_for_each_entry(snod, &sleb->nodes, list) { | ||
536 | int deletion = 0; | ||
537 | |||
538 | cond_resched(); | ||
539 | |||
540 | if (snod->sqnum >= SQNUM_WATERMARK) { | ||
541 | ubifs_err("file system's life ended"); | ||
542 | goto out_dump; | ||
543 | } | ||
544 | |||
545 | if (snod->sqnum > c->max_sqnum) | ||
546 | c->max_sqnum = snod->sqnum; | ||
547 | |||
548 | switch (snod->type) { | ||
549 | case UBIFS_INO_NODE: | ||
550 | { | ||
551 | struct ubifs_ino_node *ino = snod->node; | ||
552 | loff_t new_size = le64_to_cpu(ino->size); | ||
553 | |||
554 | if (le32_to_cpu(ino->nlink) == 0) | ||
555 | deletion = 1; | ||
556 | err = insert_node(c, lnum, snod->offs, snod->len, | ||
557 | &snod->key, snod->sqnum, deletion, | ||
558 | &used, 0, new_size); | ||
559 | break; | ||
560 | } | ||
561 | case UBIFS_DATA_NODE: | ||
562 | { | ||
563 | struct ubifs_data_node *dn = snod->node; | ||
564 | loff_t new_size = le32_to_cpu(dn->size) + | ||
565 | key_block(c, &snod->key) * | ||
566 | UBIFS_BLOCK_SIZE; | ||
567 | |||
568 | err = insert_node(c, lnum, snod->offs, snod->len, | ||
569 | &snod->key, snod->sqnum, deletion, | ||
570 | &used, 0, new_size); | ||
571 | break; | ||
572 | } | ||
573 | case UBIFS_DENT_NODE: | ||
574 | case UBIFS_XENT_NODE: | ||
575 | { | ||
576 | struct ubifs_dent_node *dent = snod->node; | ||
577 | |||
578 | err = ubifs_validate_entry(c, dent); | ||
579 | if (err) | ||
580 | goto out_dump; | ||
581 | |||
582 | err = insert_dent(c, lnum, snod->offs, snod->len, | ||
583 | &snod->key, dent->name, | ||
584 | le16_to_cpu(dent->nlen), snod->sqnum, | ||
585 | !le64_to_cpu(dent->inum), &used); | ||
586 | break; | ||
587 | } | ||
588 | case UBIFS_TRUN_NODE: | ||
589 | { | ||
590 | struct ubifs_trun_node *trun = snod->node; | ||
591 | loff_t old_size = le64_to_cpu(trun->old_size); | ||
592 | loff_t new_size = le64_to_cpu(trun->new_size); | ||
593 | union ubifs_key key; | ||
594 | |||
595 | /* Validate truncation node */ | ||
596 | if (old_size < 0 || old_size > c->max_inode_sz || | ||
597 | new_size < 0 || new_size > c->max_inode_sz || | ||
598 | old_size <= new_size) { | ||
599 | ubifs_err("bad truncation node"); | ||
600 | goto out_dump; | ||
601 | } | ||
602 | |||
603 | /* | ||
604 | * Create a fake truncation key just to use the same | ||
605 | * functions which expect nodes to have keys. | ||
606 | */ | ||
607 | trun_key_init(c, &key, le32_to_cpu(trun->inum)); | ||
608 | err = insert_node(c, lnum, snod->offs, snod->len, | ||
609 | &key, snod->sqnum, 1, &used, | ||
610 | old_size, new_size); | ||
611 | break; | ||
612 | } | ||
613 | default: | ||
614 | ubifs_err("unexpected node type %d in bud LEB %d:%d", | ||
615 | snod->type, lnum, snod->offs); | ||
616 | err = -EINVAL; | ||
617 | goto out_dump; | ||
618 | } | ||
619 | if (err) | ||
620 | goto out; | ||
621 | } | ||
622 | |||
623 | bud = ubifs_search_bud(c, lnum); | ||
624 | if (!bud) | ||
625 | BUG(); | ||
626 | |||
627 | ubifs_assert(sleb->endpt - offs >= used); | ||
628 | ubifs_assert(sleb->endpt % c->min_io_size == 0); | ||
629 | |||
630 | if (sleb->endpt + c->min_io_size <= c->leb_size && | ||
631 | !(c->vfs_sb->s_flags & MS_RDONLY)) | ||
632 | err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, | ||
633 | sleb->endpt, UBI_SHORTTERM); | ||
634 | |||
635 | *dirty = sleb->endpt - offs - used; | ||
636 | *free = c->leb_size - sleb->endpt; | ||
637 | |||
638 | out: | ||
639 | ubifs_scan_destroy(sleb); | ||
640 | return err; | ||
641 | |||
642 | out_dump: | ||
643 | ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); | ||
644 | dbg_dump_node(c, snod->node); | ||
645 | ubifs_scan_destroy(sleb); | ||
646 | return -EINVAL; | ||
647 | } | ||
648 | |||
649 | /** | ||
650 | * insert_ref_node - insert a reference node to the replay tree. | ||
651 | * @c: UBIFS file-system description object | ||
652 | * @lnum: node logical eraseblock number | ||
653 | * @offs: node offset | ||
654 | * @sqnum: sequence number | ||
655 | * @free: amount of free space in bud | ||
656 | * @dirty: amount of dirty space from padding and deletion nodes | ||
657 | * | ||
658 | * This function inserts a reference node to the replay tree and returns zero | ||
659 | * in case of success ort a negative error code in case of failure. | ||
660 | */ | ||
661 | static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, | ||
662 | unsigned long long sqnum, int free, int dirty) | ||
663 | { | ||
664 | struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; | ||
665 | struct replay_entry *r; | ||
666 | |||
667 | dbg_mnt("add ref LEB %d:%d", lnum, offs); | ||
668 | while (*p) { | ||
669 | parent = *p; | ||
670 | r = rb_entry(parent, struct replay_entry, rb); | ||
671 | if (sqnum < r->sqnum) { | ||
672 | p = &(*p)->rb_left; | ||
673 | continue; | ||
674 | } else if (sqnum > r->sqnum) { | ||
675 | p = &(*p)->rb_right; | ||
676 | continue; | ||
677 | } | ||
678 | ubifs_err("duplicate sqnum in replay tree"); | ||
679 | return -EINVAL; | ||
680 | } | ||
681 | |||
682 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); | ||
683 | if (!r) | ||
684 | return -ENOMEM; | ||
685 | |||
686 | r->lnum = lnum; | ||
687 | r->offs = offs; | ||
688 | r->sqnum = sqnum; | ||
689 | r->flags = REPLAY_REF; | ||
690 | r->free = free; | ||
691 | r->dirty = dirty; | ||
692 | |||
693 | rb_link_node(&r->rb, parent, p); | ||
694 | rb_insert_color(&r->rb, &c->replay_tree); | ||
695 | return 0; | ||
696 | } | ||
697 | |||
698 | /** | ||
699 | * replay_buds - replay all buds. | ||
700 | * @c: UBIFS file-system description object | ||
701 | * | ||
702 | * This function returns zero in case of success and a negative error code in | ||
703 | * case of failure. | ||
704 | */ | ||
705 | static int replay_buds(struct ubifs_info *c) | ||
706 | { | ||
707 | struct bud_entry *b; | ||
708 | int err, uninitialized_var(free), uninitialized_var(dirty); | ||
709 | |||
710 | list_for_each_entry(b, &c->replay_buds, list) { | ||
711 | err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, | ||
712 | &free, &dirty); | ||
713 | if (err) | ||
714 | return err; | ||
715 | err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, | ||
716 | free, dirty); | ||
717 | if (err) | ||
718 | return err; | ||
719 | } | ||
720 | |||
721 | return 0; | ||
722 | } | ||
723 | |||
724 | /** | ||
725 | * destroy_bud_list - destroy the list of buds to replay. | ||
726 | * @c: UBIFS file-system description object | ||
727 | */ | ||
728 | static void destroy_bud_list(struct ubifs_info *c) | ||
729 | { | ||
730 | struct bud_entry *b; | ||
731 | |||
732 | while (!list_empty(&c->replay_buds)) { | ||
733 | b = list_entry(c->replay_buds.next, struct bud_entry, list); | ||
734 | list_del(&b->list); | ||
735 | kfree(b); | ||
736 | } | ||
737 | } | ||
738 | |||
739 | /** | ||
740 | * add_replay_bud - add a bud to the list of buds to replay. | ||
741 | * @c: UBIFS file-system description object | ||
742 | * @lnum: bud logical eraseblock number to replay | ||
743 | * @offs: bud start offset | ||
744 | * @jhead: journal head to which this bud belongs | ||
745 | * @sqnum: reference node sequence number | ||
746 | * | ||
747 | * This function returns zero in case of success and a negative error code in | ||
748 | * case of failure. | ||
749 | */ | ||
750 | static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, | ||
751 | unsigned long long sqnum) | ||
752 | { | ||
753 | struct ubifs_bud *bud; | ||
754 | struct bud_entry *b; | ||
755 | |||
756 | dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); | ||
757 | |||
758 | bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); | ||
759 | if (!bud) | ||
760 | return -ENOMEM; | ||
761 | |||
762 | b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); | ||
763 | if (!b) { | ||
764 | kfree(bud); | ||
765 | return -ENOMEM; | ||
766 | } | ||
767 | |||
768 | bud->lnum = lnum; | ||
769 | bud->start = offs; | ||
770 | bud->jhead = jhead; | ||
771 | ubifs_add_bud(c, bud); | ||
772 | |||
773 | b->bud = bud; | ||
774 | b->sqnum = sqnum; | ||
775 | list_add_tail(&b->list, &c->replay_buds); | ||
776 | |||
777 | return 0; | ||
778 | } | ||
779 | |||
780 | /** | ||
781 | * validate_ref - validate a reference node. | ||
782 | * @c: UBIFS file-system description object | ||
783 | * @ref: the reference node to validate | ||
784 | * @ref_lnum: LEB number of the reference node | ||
785 | * @ref_offs: reference node offset | ||
786 | * | ||
787 | * This function returns %1 if a bud reference already exists for the LEB. %0 is | ||
788 | * returned if the reference node is new, otherwise %-EINVAL is returned if | ||
789 | * validation failed. | ||
790 | */ | ||
791 | static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) | ||
792 | { | ||
793 | struct ubifs_bud *bud; | ||
794 | int lnum = le32_to_cpu(ref->lnum); | ||
795 | unsigned int offs = le32_to_cpu(ref->offs); | ||
796 | unsigned int jhead = le32_to_cpu(ref->jhead); | ||
797 | |||
798 | /* | ||
799 | * ref->offs may point to the end of LEB when the journal head points | ||
800 | * to the end of LEB and we write reference node for it during commit. | ||
801 | * So this is why we require 'offs > c->leb_size'. | ||
802 | */ | ||
803 | if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || | ||
804 | lnum < c->main_first || offs > c->leb_size || | ||
805 | offs & (c->min_io_size - 1)) | ||
806 | return -EINVAL; | ||
807 | |||
808 | /* Make sure we have not already looked at this bud */ | ||
809 | bud = ubifs_search_bud(c, lnum); | ||
810 | if (bud) { | ||
811 | if (bud->jhead == jhead && bud->start <= offs) | ||
812 | return 1; | ||
813 | ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); | ||
814 | return -EINVAL; | ||
815 | } | ||
816 | |||
817 | return 0; | ||
818 | } | ||
819 | |||
820 | /** | ||
821 | * replay_log_leb - replay a log logical eraseblock. | ||
822 | * @c: UBIFS file-system description object | ||
823 | * @lnum: log logical eraseblock to replay | ||
824 | * @offs: offset to start replaying from | ||
825 | * @sbuf: scan buffer | ||
826 | * | ||
827 | * This function replays a log LEB and returns zero in case of success, %1 if | ||
828 | * this is the last LEB in the log, and a negative error code in case of | ||
829 | * failure. | ||
830 | */ | ||
831 | static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) | ||
832 | { | ||
833 | int err; | ||
834 | struct ubifs_scan_leb *sleb; | ||
835 | struct ubifs_scan_node *snod; | ||
836 | const struct ubifs_cs_node *node; | ||
837 | |||
838 | dbg_mnt("replay log LEB %d:%d", lnum, offs); | ||
839 | sleb = ubifs_scan(c, lnum, offs, sbuf); | ||
840 | if (IS_ERR(sleb)) { | ||
841 | if (c->need_recovery) | ||
842 | sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); | ||
843 | if (IS_ERR(sleb)) | ||
844 | return PTR_ERR(sleb); | ||
845 | } | ||
846 | |||
847 | if (sleb->nodes_cnt == 0) { | ||
848 | err = 1; | ||
849 | goto out; | ||
850 | } | ||
851 | |||
852 | node = sleb->buf; | ||
853 | |||
854 | snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); | ||
855 | if (c->cs_sqnum == 0) { | ||
856 | /* | ||
857 | * This is the first log LEB we are looking at, make sure that | ||
858 | * the first node is a commit start node. Also record its | ||
859 | * sequence number so that UBIFS can determine where the log | ||
860 | * ends, because all nodes which were have higher sequence | ||
861 | * numbers. | ||
862 | */ | ||
863 | if (snod->type != UBIFS_CS_NODE) { | ||
864 | dbg_err("first log node at LEB %d:%d is not CS node", | ||
865 | lnum, offs); | ||
866 | goto out_dump; | ||
867 | } | ||
868 | if (le64_to_cpu(node->cmt_no) != c->cmt_no) { | ||
869 | dbg_err("first CS node at LEB %d:%d has wrong " | ||
870 | "commit number %llu expected %llu", | ||
871 | lnum, offs, | ||
872 | (unsigned long long)le64_to_cpu(node->cmt_no), | ||
873 | c->cmt_no); | ||
874 | goto out_dump; | ||
875 | } | ||
876 | |||
877 | c->cs_sqnum = le64_to_cpu(node->ch.sqnum); | ||
878 | dbg_mnt("commit start sqnum %llu", c->cs_sqnum); | ||
879 | } | ||
880 | |||
881 | if (snod->sqnum < c->cs_sqnum) { | ||
882 | /* | ||
883 | * This means that we reached end of log and now | ||
884 | * look to the older log data, which was already | ||
885 | * committed but the eraseblock was not erased (UBIFS | ||
886 | * only unmaps it). So this basically means we have to | ||
887 | * exit with "end of log" code. | ||
888 | */ | ||
889 | err = 1; | ||
890 | goto out; | ||
891 | } | ||
892 | |||
893 | /* Make sure the first node sits at offset zero of the LEB */ | ||
894 | if (snod->offs != 0) { | ||
895 | dbg_err("first node is not at zero offset"); | ||
896 | goto out_dump; | ||
897 | } | ||
898 | |||
899 | list_for_each_entry(snod, &sleb->nodes, list) { | ||
900 | |||
901 | cond_resched(); | ||
902 | |||
903 | if (snod->sqnum >= SQNUM_WATERMARK) { | ||
904 | ubifs_err("file system's life ended"); | ||
905 | goto out_dump; | ||
906 | } | ||
907 | |||
908 | if (snod->sqnum < c->cs_sqnum) { | ||
909 | dbg_err("bad sqnum %llu, commit sqnum %llu", | ||
910 | snod->sqnum, c->cs_sqnum); | ||
911 | goto out_dump; | ||
912 | } | ||
913 | |||
914 | if (snod->sqnum > c->max_sqnum) | ||
915 | c->max_sqnum = snod->sqnum; | ||
916 | |||
917 | switch (snod->type) { | ||
918 | case UBIFS_REF_NODE: { | ||
919 | const struct ubifs_ref_node *ref = snod->node; | ||
920 | |||
921 | err = validate_ref(c, ref); | ||
922 | if (err == 1) | ||
923 | break; /* Already have this bud */ | ||
924 | if (err) | ||
925 | goto out_dump; | ||
926 | |||
927 | err = add_replay_bud(c, le32_to_cpu(ref->lnum), | ||
928 | le32_to_cpu(ref->offs), | ||
929 | le32_to_cpu(ref->jhead), | ||
930 | snod->sqnum); | ||
931 | if (err) | ||
932 | goto out; | ||
933 | |||
934 | break; | ||
935 | } | ||
936 | case UBIFS_CS_NODE: | ||
937 | /* Make sure it sits at the beginning of LEB */ | ||
938 | if (snod->offs != 0) { | ||
939 | ubifs_err("unexpected node in log"); | ||
940 | goto out_dump; | ||
941 | } | ||
942 | break; | ||
943 | default: | ||
944 | ubifs_err("unexpected node in log"); | ||
945 | goto out_dump; | ||
946 | } | ||
947 | } | ||
948 | |||
949 | if (sleb->endpt || c->lhead_offs >= c->leb_size) { | ||
950 | c->lhead_lnum = lnum; | ||
951 | c->lhead_offs = sleb->endpt; | ||
952 | } | ||
953 | |||
954 | err = !sleb->endpt; | ||
955 | out: | ||
956 | ubifs_scan_destroy(sleb); | ||
957 | return err; | ||
958 | |||
959 | out_dump: | ||
960 | ubifs_err("log error detected while replying the log at LEB %d:%d", | ||
961 | lnum, offs + snod->offs); | ||
962 | dbg_dump_node(c, snod->node); | ||
963 | ubifs_scan_destroy(sleb); | ||
964 | return -EINVAL; | ||
965 | } | ||
966 | |||
967 | /** | ||
968 | * take_ihead - update the status of the index head in lprops to 'taken'. | ||
969 | * @c: UBIFS file-system description object | ||
970 | * | ||
971 | * This function returns the amount of free space in the index head LEB or a | ||
972 | * negative error code. | ||
973 | */ | ||
974 | static int take_ihead(struct ubifs_info *c) | ||
975 | { | ||
976 | const struct ubifs_lprops *lp; | ||
977 | int err, free; | ||
978 | |||
979 | ubifs_get_lprops(c); | ||
980 | |||
981 | lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); | ||
982 | if (IS_ERR(lp)) { | ||
983 | err = PTR_ERR(lp); | ||
984 | goto out; | ||
985 | } | ||
986 | |||
987 | free = lp->free; | ||
988 | |||
989 | lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, | ||
990 | lp->flags | LPROPS_TAKEN, 0); | ||
991 | if (IS_ERR(lp)) { | ||
992 | err = PTR_ERR(lp); | ||
993 | goto out; | ||
994 | } | ||
995 | |||
996 | err = free; | ||
997 | out: | ||
998 | ubifs_release_lprops(c); | ||
999 | return err; | ||
1000 | } | ||
1001 | |||
1002 | /** | ||
1003 | * ubifs_replay_journal - replay journal. | ||
1004 | * @c: UBIFS file-system description object | ||
1005 | * | ||
1006 | * This function scans the journal, replays and cleans it up. It makes sure all | ||
1007 | * memory data structures related to uncommitted journal are built (dirty TNC | ||
1008 | * tree, tree of buds, modified lprops, etc). | ||
1009 | */ | ||
1010 | int ubifs_replay_journal(struct ubifs_info *c) | ||
1011 | { | ||
1012 | int err, i, lnum, offs, free; | ||
1013 | void *sbuf = NULL; | ||
1014 | |||
1015 | BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); | ||
1016 | |||
1017 | /* Update the status of the index head in lprops to 'taken' */ | ||
1018 | free = take_ihead(c); | ||
1019 | if (free < 0) | ||
1020 | return free; /* Error code */ | ||
1021 | |||
1022 | if (c->ihead_offs != c->leb_size - free) { | ||
1023 | ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, | ||
1024 | c->ihead_offs); | ||
1025 | return -EINVAL; | ||
1026 | } | ||
1027 | |||
1028 | sbuf = vmalloc(c->leb_size); | ||
1029 | if (!sbuf) | ||
1030 | return -ENOMEM; | ||
1031 | |||
1032 | dbg_mnt("start replaying the journal"); | ||
1033 | |||
1034 | c->replaying = 1; | ||
1035 | |||
1036 | lnum = c->ltail_lnum = c->lhead_lnum; | ||
1037 | offs = c->lhead_offs; | ||
1038 | |||
1039 | for (i = 0; i < c->log_lebs; i++, lnum++) { | ||
1040 | if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { | ||
1041 | /* | ||
1042 | * The log is logically circular, we reached the last | ||
1043 | * LEB, switch to the first one. | ||
1044 | */ | ||
1045 | lnum = UBIFS_LOG_LNUM; | ||
1046 | offs = 0; | ||
1047 | } | ||
1048 | err = replay_log_leb(c, lnum, offs, sbuf); | ||
1049 | if (err == 1) | ||
1050 | /* We hit the end of the log */ | ||
1051 | break; | ||
1052 | if (err) | ||
1053 | goto out; | ||
1054 | offs = 0; | ||
1055 | } | ||
1056 | |||
1057 | err = replay_buds(c); | ||
1058 | if (err) | ||
1059 | goto out; | ||
1060 | |||
1061 | err = apply_replay_tree(c); | ||
1062 | if (err) | ||
1063 | goto out; | ||
1064 | |||
1065 | ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); | ||
1066 | dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " | ||
1067 | "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, | ||
1068 | c->highest_inum); | ||
1069 | out: | ||
1070 | destroy_replay_tree(c); | ||
1071 | destroy_bud_list(c); | ||
1072 | vfree(sbuf); | ||
1073 | c->replaying = 0; | ||
1074 | return err; | ||
1075 | } | ||