aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jffs2/readinode.c
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@infradead.org>2007-04-24 22:23:42 -0400
committerDavid Woodhouse <dwmw2@infradead.org>2007-04-24 22:23:42 -0400
commitdf8e96f39103adf5a13332d784040a2c62667243 (patch)
tree01d7259260f628f4075c39c1cc7804b72998a601 /fs/jffs2/readinode.c
parent44b998e1eb254edc87177819ee693690fac68b7f (diff)
[JFFS2] Improve read_inode memory usage, v2.
We originally used to read every node and allocate a jffs2_tmp_dnode_info structure for each, before processing them in (reverse) version order and discarding the ones which are obsoleted by later nodes. With huge logfiles, this behaviour caused memory problems. For example, a file involved in OLPC trac #1292 has 1822391 nodes, and would cause the XO machine to run out of memory during the first stage of read_inode(). Instead of just inserting nodes into a tree in version order as we find them, we now put them into a tree in order of their offset within the file, which allows us to immediately discard nodes which are completely obsoleted. We don't use a full tree with 'fragments' pointing to the real data structure, as we do in the normal fragtree. We sort only on the start address, and add an 'overlapped' flag to the tmp_dnode_info to indicate that the node in question is (partially) overlapped by another. When the scan is complete, we start at the end of the file, adding each node to a real fragtree as before. Where the node is non-overlapped, we just add it (it doesn't matter that it's not the latest version; there is no overlap). When the node at the end of the tree _is_ overlapped, we sort it and all its overlapping nodes into version order and then add them to the fragtree in that order. This 'early discard' reduces the peak allocation of tmp_dnode_info structures from 1.8M to a mere 62872 (3.5%) in the degenerate case referenced above. This version of the patch also correctly rememembers the highest node version# seen for an inode when it's scanned. Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Diffstat (limited to 'fs/jffs2/readinode.c')
-rw-r--r--fs/jffs2/readinode.c732
1 files changed, 595 insertions, 137 deletions
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 1298848336b8..49d4b0a67c55 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -22,30 +22,510 @@
22#include "nodelist.h" 22#include "nodelist.h"
23 23
24/* 24/*
25 * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in 25 * Check the data CRC of the node.
26 * order of increasing version. 26 *
27 * Returns: 0 if the data CRC is correct;
28 * 1 - if incorrect;
29 * error code if an error occured.
27 */ 30 */
28static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root *list) 31static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn)
29{ 32{
30 struct rb_node **p = &list->rb_node; 33 struct jffs2_raw_node_ref *ref = tn->fn->raw;
31 struct rb_node * parent = NULL; 34 int err = 0, pointed = 0;
32 struct jffs2_tmp_dnode_info *this; 35 struct jffs2_eraseblock *jeb;
33 36 unsigned char *buffer;
34 while (*p) { 37 uint32_t crc, ofs, len;
35 parent = *p; 38 size_t retlen;
36 this = rb_entry(parent, struct jffs2_tmp_dnode_info, rb); 39
37 40 BUG_ON(tn->csize == 0);
38 /* There may actually be a collision here, but it doesn't 41
39 actually matter. As long as the two nodes with the same 42 if (!jffs2_is_writebuffered(c))
40 version are together, it's all fine. */ 43 goto adj_acc;
41 if (tn->version > this->version) 44
42 p = &(*p)->rb_left; 45 /* Calculate how many bytes were already checked */
46 ofs = ref_offset(ref) + sizeof(struct jffs2_raw_inode);
47 len = ofs % c->wbuf_pagesize;
48 if (likely(len))
49 len = c->wbuf_pagesize - len;
50
51 if (len >= tn->csize) {
52 dbg_readinode("no need to check node at %#08x, data length %u, data starts at %#08x - it has already been checked.\n",
53 ref_offset(ref), tn->csize, ofs);
54 goto adj_acc;
55 }
56
57 ofs += len;
58 len = tn->csize - len;
59
60 dbg_readinode("check node at %#08x, data length %u, partial CRC %#08x, correct CRC %#08x, data starts at %#08x, start checking from %#08x - %u bytes.\n",
61 ref_offset(ref), tn->csize, tn->partial_crc, tn->data_crc, ofs - len, ofs, len);
62
63#ifndef __ECOS
64 /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
65 * adding and jffs2_flash_read_end() interface. */
66 if (c->mtd->point) {
67 err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer);
68 if (!err && retlen < tn->csize) {
69 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
70 c->mtd->unpoint(c->mtd, buffer, ofs, len);
71 } else if (err)
72 JFFS2_WARNING("MTD point failed: error code %d.\n", err);
43 else 73 else
44 p = &(*p)->rb_right; 74 pointed = 1; /* succefully pointed to device */
75 }
76#endif
77
78 if (!pointed) {
79 buffer = kmalloc(len, GFP_KERNEL);
80 if (unlikely(!buffer))
81 return -ENOMEM;
82
83 /* TODO: this is very frequent pattern, make it a separate
84 * routine */
85 err = jffs2_flash_read(c, ofs, len, &retlen, buffer);
86 if (err) {
87 JFFS2_ERROR("can not read %d bytes from 0x%08x, error code: %d.\n", len, ofs, err);
88 goto free_out;
89 }
90
91 if (retlen != len) {
92 JFFS2_ERROR("short read at %#08x: %zd instead of %d.\n", ofs, retlen, len);
93 err = -EIO;
94 goto free_out;
95 }
96 }
97
98 /* Continue calculating CRC */
99 crc = crc32(tn->partial_crc, buffer, len);
100 if(!pointed)
101 kfree(buffer);
102#ifndef __ECOS
103 else
104 c->mtd->unpoint(c->mtd, buffer, ofs, len);
105#endif
106
107 if (crc != tn->data_crc) {
108 JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
109 ofs, tn->data_crc, crc);
110 return 1;
45 } 111 }
46 112
47 rb_link_node(&tn->rb, parent, p); 113adj_acc:
48 rb_insert_color(&tn->rb, list); 114 jeb = &c->blocks[ref->flash_offset / c->sector_size];
115 len = ref_totlen(c, jeb, ref);
116 /* If it should be REF_NORMAL, it'll get marked as such when
117 we build the fragtree, shortly. No need to worry about GC
118 moving it while it's marked REF_PRISTINE -- GC won't happen
119 till we've finished checking every inode anyway. */
120 ref->flash_offset |= REF_PRISTINE;
121 /*
122 * Mark the node as having been checked and fix the
123 * accounting accordingly.
124 */
125 spin_lock(&c->erase_completion_lock);
126 jeb->used_size += len;
127 jeb->unchecked_size -= len;
128 c->used_size += len;
129 c->unchecked_size -= len;
130 jffs2_dbg_acct_paranoia_check_nolock(c, jeb);
131 spin_unlock(&c->erase_completion_lock);
132
133 return 0;
134
135free_out:
136 if(!pointed)
137 kfree(buffer);
138#ifndef __ECOS
139 else
140 c->mtd->unpoint(c->mtd, buffer, ofs, len);
141#endif
142 return err;
143}
144
145/*
146 * Helper function for jffs2_add_older_frag_to_fragtree().
147 *
148 * Checks the node if we are in the checking stage.
149 */
150static int check_tn_node(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn)
151{
152 int ret;
153
154 BUG_ON(ref_obsolete(tn->fn->raw));
155
156 /* We only check the data CRC of unchecked nodes */
157 if (ref_flags(tn->fn->raw) != REF_UNCHECKED)
158 return 0;
159
160 dbg_readinode("check node %#04x-%#04x, phys offs %#08x\n",
161 tn->fn->ofs, tn->fn->ofs + tn->fn->size, ref_offset(tn->fn->raw));
162
163 ret = check_node_data(c, tn);
164 if (unlikely(ret < 0)) {
165 JFFS2_ERROR("check_node_data() returned error: %d.\n",
166 ret);
167 } else if (unlikely(ret > 0)) {
168 dbg_readinode("CRC error, mark it obsolete.\n");
169 jffs2_mark_node_obsolete(c, tn->fn->raw);
170 }
171
172 return ret;
173}
174
175static struct jffs2_tmp_dnode_info *jffs2_lookup_tn(struct rb_root *tn_root, uint32_t offset)
176{
177 struct rb_node *next;
178 struct jffs2_tmp_dnode_info *tn = NULL;
179
180 dbg_readinode("root %p, offset %d\n", tn_root, offset);
181
182 next = tn_root->rb_node;
183
184 while (next) {
185 tn = rb_entry(next, struct jffs2_tmp_dnode_info, rb);
186
187 if (tn->fn->ofs < offset)
188 next = tn->rb.rb_right;
189 else if (tn->fn->ofs >= offset)
190 next = tn->rb.rb_left;
191 else
192 break;
193 }
194
195 return tn;
196}
197
198
199static void jffs2_kill_tn(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn)
200{
201 jffs2_mark_node_obsolete(c, tn->fn->raw);
202 jffs2_free_full_dnode(tn->fn);
203 jffs2_free_tmp_dnode_info(tn);
204}
205/*
206 * This function is used when we read an inode. Data nodes arrive in
207 * arbitrary order -- they may be older or newer than the nodes which
208 * are already in the tree. Where overlaps occur, the older node can
209 * be discarded as long as the newer passes the CRC check. We don't
210 * bother to keep track of holes in this rbtree, and neither do we deal
211 * with frags -- we can have multiple entries starting at the same
212 * offset, and the one with the smallest length will come first in the
213 * ordering.
214 *
215 * Returns 0 if the node was inserted
216 * 1 if the node is obsolete (because we can't mark it so yet)
217 * < 0 an if error occurred
218 */
219static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
220 struct jffs2_readinode_info *rii,
221 struct jffs2_tmp_dnode_info *tn)
222{
223 uint32_t fn_end = tn->fn->ofs + tn->fn->size;
224 struct jffs2_tmp_dnode_info *insert_point = NULL, *this;
225
226 dbg_readinode("insert fragment %#04x-%#04x, ver %u\n", tn->fn->ofs, fn_end, tn->version);
227
228 /* If a node has zero dsize, we only have to keep if it if it might be the
229 node with highest version -- i.e. the one which will end up as f->metadata.
230 Note that such nodes won't be REF_UNCHECKED since there are no data to
231 check anyway. */
232 if (!tn->fn->size) {
233 if (rii->mdata_tn) {
234 /* We had a candidate mdata node already */
235 dbg_readinode("kill old mdata with ver %d\n", rii->mdata_tn->version);
236 jffs2_kill_tn(c, rii->mdata_tn);
237 }
238 rii->mdata_tn = tn;
239 dbg_readinode("keep new mdata with ver %d\n", tn->version);
240 return 0;
241 }
242
243 /* Find the earliest node which _may_ be relevant to this one */
244 this = jffs2_lookup_tn(&rii->tn_root, tn->fn->ofs);
245 if (!this) {
246 /* First addition to empty tree. $DEITY how I love the easy cases */
247 rb_link_node(&tn->rb, NULL, &rii->tn_root.rb_node);
248 rb_insert_color(&tn->rb, &rii->tn_root);
249 dbg_readinode("keep new frag\n");
250 return 0;
251 }
252
253 /* If we add a new node it'll be somewhere under here. */
254 insert_point = this;
255
256 /* If the node is coincident with another at a lower address,
257 back up until the other node is found. It may be relevant */
258 while (tn->overlapped)
259 tn = tn_prev(tn);
260
261 dbg_readinode("'this' found %#04x-%#04x (%s)\n", this->fn->ofs, this->fn->ofs + this->fn->size, this->fn ? "data" : "hole");
262
263 while (this) {
264 if (this->fn->ofs > fn_end)
265 break;
266 dbg_readinode("Ponder this ver %d, 0x%x-0x%x\n",
267 this->version, this->fn->ofs, this->fn->size);
268
269 if (this->version == tn->version) {
270 /* Version number collision means REF_PRISTINE GC. Accept either of them
271 as long as the CRC is correct. Check the one we have already... */
272 if (!check_tn_node(c, this)) {
273 /* The one we already had was OK. Keep it and throw away the new one */
274 dbg_readinode("Like old node. Throw away new\n");
275 jffs2_kill_tn(c, tn);
276 return 0;
277 } else {
278 /* Who cares if the new one is good; keep it for now anyway. */
279 rb_replace_node(&this->rb, &tn->rb, &rii->tn_root);
280 /* Same overlapping from in front and behind */
281 tn->overlapped = this->overlapped;
282 jffs2_kill_tn(c, this);
283 dbg_readinode("Like new node. Throw away old\n");
284 return 0;
285 }
286 }
287 if (this->version < tn->version &&
288 this->fn->ofs >= tn->fn->ofs &&
289 this->fn->ofs + this->fn->size <= fn_end) {
290 /* New node entirely overlaps 'this' */
291 if (check_tn_node(c, tn)) {
292 dbg_readinode("new node bad CRC\n");
293 jffs2_kill_tn(c, tn);
294 return 0;
295 }
296 /* ... and is good. Kill 'this'... */
297 rb_replace_node(&this->rb, &tn->rb, &rii->tn_root);
298 tn->overlapped = this->overlapped;
299 jffs2_kill_tn(c, this);
300 /* ... and any subsequent nodes which are also overlapped */
301 this = tn_next(tn);
302 while (this && this->fn->ofs + this->fn->size < fn_end) {
303 struct jffs2_tmp_dnode_info *next = tn_next(this);
304 if (this->version < tn->version) {
305 tn_erase(this, &rii->tn_root);
306 dbg_readinode("Kill overlapped ver %d, 0x%x-0x%x\n",
307 this->version, this->fn->ofs,
308 this->fn->ofs+this->fn->size);
309 jffs2_kill_tn(c, this);
310 }
311 this = next;
312 }
313 dbg_readinode("Done inserting new\n");
314 return 0;
315 }
316 if (this->version > tn->version &&
317 this->fn->ofs <= tn->fn->ofs &&
318 this->fn->ofs+this->fn->size >= fn_end) {
319 /* New node entirely overlapped by 'this' */
320 if (!check_tn_node(c, this)) {
321 dbg_readinode("Good CRC on old node. Kill new\n");
322 jffs2_kill_tn(c, tn);
323 return 0;
324 }
325 /* ... but 'this' was bad. Replace it... */
326 rb_replace_node(&this->rb, &tn->rb, &rii->tn_root);
327 dbg_readinode("Bad CRC on old overlapping node. Kill it\n");
328 jffs2_kill_tn(c, this);
329 return 0;
330 }
331 /* We want to be inserted under the last node which is
332 either at a lower offset _or_ has a smaller range */
333 if (this->fn->ofs < tn->fn->ofs ||
334 (this->fn->ofs == tn->fn->ofs &&
335 this->fn->size <= tn->fn->size))
336 insert_point = this;
337
338 this = tn_next(this);
339 }
340 dbg_readinode("insert_point %p, ver %d, 0x%x-0x%x, ov %d\n",
341 insert_point, insert_point->version, insert_point->fn->ofs,
342 insert_point->fn->ofs+insert_point->fn->size,
343 insert_point->overlapped);
344 /* We neither completely obsoleted nor were completely
345 obsoleted by an earlier node. Insert under insert_point */
346 {
347 struct rb_node *parent = &insert_point->rb;
348 struct rb_node **link = &parent;
349
350 while (*link) {
351 parent = *link;
352 insert_point = rb_entry(parent, struct jffs2_tmp_dnode_info, rb);
353 if (tn->fn->ofs > insert_point->fn->ofs)
354 link = &insert_point->rb.rb_right;
355 else if (tn->fn->ofs < insert_point->fn->ofs ||
356 tn->fn->size < insert_point->fn->size)
357 link = &insert_point->rb.rb_left;
358 else
359 link = &insert_point->rb.rb_right;
360 }
361 rb_link_node(&tn->rb, &insert_point->rb, link);
362 rb_insert_color(&tn->rb, &rii->tn_root);
363 }
364 /* If there's anything behind that overlaps us, note it */
365 this = tn_prev(tn);
366 if (this) {
367 while (1) {
368 if (this->fn->ofs + this->fn->size > tn->fn->ofs) {
369 dbg_readinode("Node is overlapped by %p (v %d, 0x%x-0x%x)\n",
370 this, this->version, this->fn->ofs,
371 this->fn->ofs+this->fn->size);
372 tn->overlapped = 1;
373 break;
374 }
375 if (!this->overlapped)
376 break;
377 this = tn_prev(this);
378 }
379 }
380
381 /* If the new node overlaps anything ahead, note it */
382 this = tn_next(tn);
383 while (this && this->fn->ofs < fn_end) {
384 this->overlapped = 1;
385 dbg_readinode("Node ver %d, 0x%x-0x%x is overlapped\n",
386 this->version, this->fn->ofs,
387 this->fn->ofs+this->fn->size);
388 this = tn_next(this);
389 }
390 return 0;
391}
392
393/* Trivial function to remove the last node in the tree. Which by definition
394 has no right-hand -- so can be removed just by making its only child (if
395 any) take its place under its parent. */
396static void eat_last(struct rb_root *root, struct rb_node *node)
397{
398 struct rb_node *parent = rb_parent(node);
399 struct rb_node **link;
400
401 /* LAST! */
402 BUG_ON(node->rb_right);
403
404 if (!parent)
405 link = &root->rb_node;
406 else if (node == parent->rb_left)
407 link = &parent->rb_left;
408 else
409 link = &parent->rb_right;
410
411 *link = node->rb_left;
412 /* Colour doesn't matter now. Only the parent pointer. */
413 if (node->rb_left)
414 node->rb_left->rb_parent_color = node->rb_parent_color;
415}
416
417/* We put this in reverse order, so we can just use eat_last */
418static void ver_insert(struct rb_root *ver_root, struct jffs2_tmp_dnode_info *tn)
419{
420 struct rb_node **link = &ver_root->rb_node;
421 struct rb_node *parent = NULL;
422 struct jffs2_tmp_dnode_info *this_tn;
423
424 while (*link) {
425 parent = *link;
426 this_tn = rb_entry(parent, struct jffs2_tmp_dnode_info, rb);
427
428 if (tn->version > this_tn->version)
429 link = &parent->rb_left;
430 else
431 link = &parent->rb_right;
432 }
433 dbg_readinode("Link new node at %p (root is %p)\n", link, ver_root);
434 rb_link_node(&tn->rb, parent, link);
435 rb_insert_color(&tn->rb, ver_root);
436}
437
438/* Build final, normal fragtree from tn tree. It doesn't matter which order
439 we add nodes to the real fragtree, as long as they don't overlap. And
440 having thrown away the majority of overlapped nodes as we went, there
441 really shouldn't be many sets of nodes which do overlap. If we start at
442 the end, we can use the overlap markers -- we can just eat nodes which
443 aren't overlapped, and when we encounter nodes which _do_ overlap we
444 sort them all into a temporary tree in version order before replaying them. */
445static int jffs2_build_inode_fragtree(struct jffs2_sb_info *c,
446 struct jffs2_inode_info *f,
447 struct jffs2_readinode_info *rii)
448{
449 struct jffs2_tmp_dnode_info *pen, *last, *this;
450 struct rb_root ver_root = RB_ROOT;
451 uint32_t high_ver = 0;
452
453 if (rii->mdata_tn) {
454 dbg_readinode("potential mdata is ver %d at %p\n", rii->mdata_tn->version, rii->mdata_tn);
455 high_ver = rii->mdata_tn->version;
456 rii->latest_ref = rii->mdata_tn->fn->raw;
457 }
458#ifdef JFFS2_DBG_READINODE_MESSAGES
459 this = tn_last(&rii->tn_root);
460 while (this) {
461 dbg_readinode("tn %p ver %d range 0x%x-0x%x ov %d\n", this, this->version, this->fn->ofs,
462 this->fn->ofs+this->fn->size, this->overlapped);
463 this = tn_prev(this);
464 }
465#endif
466 pen = tn_last(&rii->tn_root);
467 while ((last = pen)) {
468 pen = tn_prev(last);
469
470 eat_last(&rii->tn_root, &last->rb);
471 ver_insert(&ver_root, last);
472
473 if (unlikely(last->overlapped))
474 continue;
475
476 /* Now we have a bunch of nodes in reverse version
477 order, in the tree at ver_root. Most of the time,
478 there'll actually be only one node in the 'tree',
479 in fact. */
480 this = tn_last(&ver_root);
481
482 while (this) {
483 struct jffs2_tmp_dnode_info *vers_next;
484 int ret;
485 vers_next = tn_prev(this);
486 eat_last(&ver_root, &this->rb);
487 if (check_tn_node(c, this)) {
488 dbg_readinode("node ver %x, 0x%x-0x%x failed CRC\n",
489 this->version, this->fn->ofs,
490 this->fn->ofs+this->fn->size);
491 jffs2_kill_tn(c, this);
492 } else {
493 if (this->version > high_ver) {
494 /* Note that this is different from the other
495 highest_version, because this one is only
496 counting _valid_ nodes which could give the
497 latest inode metadata */
498 high_ver = this->version;
499 rii->latest_ref = this->fn->raw;
500 }
501 dbg_readinode("Add %p (v %x, 0x%x-0x%x, ov %d) to fragtree\n",
502 this, this->version, this->fn->ofs,
503 this->fn->ofs+this->fn->size, this->overlapped);
504
505 ret = jffs2_add_full_dnode_to_inode(c, f, this->fn);
506 if (ret) {
507 /* Free the nodes in vers_root; let the caller
508 deal with the rest */
509 JFFS2_ERROR("Add node to tree failed %d\n", ret);
510 while (1) {
511 vers_next = tn_prev(this);
512 if (check_tn_node(c, this))
513 jffs2_mark_node_obsolete(c, this->fn->raw);
514 jffs2_free_full_dnode(this->fn);
515 jffs2_free_tmp_dnode_info(this);
516 this = vers_next;
517 if (!this)
518 break;
519 eat_last(&ver_root, &vers_next->rb);
520 }
521 return ret;
522 }
523 jffs2_free_tmp_dnode_info(this);
524 }
525 this = vers_next;
526 }
527 }
528 return 0;
49} 529}
50 530
51static void jffs2_free_tmp_dnode_info_list(struct rb_root *list) 531static void jffs2_free_tmp_dnode_info_list(struct rb_root *list)
@@ -112,8 +592,8 @@ static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_r
112 * negative error code on failure. 592 * negative error code on failure.
113 */ 593 */
114static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, 594static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
115 struct jffs2_raw_dirent *rd, size_t read, struct jffs2_full_dirent **fdp, 595 struct jffs2_raw_dirent *rd, size_t read,
116 uint32_t *latest_mctime, uint32_t *mctime_ver) 596 struct jffs2_readinode_info *rii)
117{ 597{
118 struct jffs2_full_dirent *fd; 598 struct jffs2_full_dirent *fd;
119 uint32_t crc; 599 uint32_t crc;
@@ -125,7 +605,8 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
125 if (unlikely(crc != je32_to_cpu(rd->node_crc))) { 605 if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
126 JFFS2_NOTICE("header CRC failed on dirent node at %#08x: read %#08x, calculated %#08x\n", 606 JFFS2_NOTICE("header CRC failed on dirent node at %#08x: read %#08x, calculated %#08x\n",
127 ref_offset(ref), je32_to_cpu(rd->node_crc), crc); 607 ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
128 return 1; 608 jffs2_mark_node_obsolete(c, ref);
609 return 0;
129 } 610 }
130 611
131 /* If we've never checked the CRCs on this node, check them now */ 612 /* If we've never checked the CRCs on this node, check them now */
@@ -137,7 +618,8 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
137 if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) { 618 if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) {
138 JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n", 619 JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n",
139 ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen)); 620 ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen));
140 return 1; 621 jffs2_mark_node_obsolete(c, ref);
622 return 0;
141 } 623 }
142 624
143 jeb = &c->blocks[ref->flash_offset / c->sector_size]; 625 jeb = &c->blocks[ref->flash_offset / c->sector_size];
@@ -161,10 +643,13 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
161 fd->ino = je32_to_cpu(rd->ino); 643 fd->ino = je32_to_cpu(rd->ino);
162 fd->type = rd->type; 644 fd->type = rd->type;
163 645
646 if (fd->version > rii->highest_version)
647 rii->highest_version = fd->version;
648
164 /* Pick out the mctime of the latest dirent */ 649 /* Pick out the mctime of the latest dirent */
165 if(fd->version > *mctime_ver && je32_to_cpu(rd->mctime)) { 650 if(fd->version > rii->mctime_ver && je32_to_cpu(rd->mctime)) {
166 *mctime_ver = fd->version; 651 rii->mctime_ver = fd->version;
167 *latest_mctime = je32_to_cpu(rd->mctime); 652 rii->latest_mctime = je32_to_cpu(rd->mctime);
168 } 653 }
169 654
170 /* 655 /*
@@ -201,7 +686,7 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
201 * Wheee. We now have a complete jffs2_full_dirent structure, with 686 * Wheee. We now have a complete jffs2_full_dirent structure, with
202 * the name in it and everything. Link it into the list 687 * the name in it and everything. Link it into the list
203 */ 688 */
204 jffs2_add_fd_to_list(c, fd, fdp); 689 jffs2_add_fd_to_list(c, fd, &rii->fds);
205 690
206 return 0; 691 return 0;
207} 692}
@@ -210,13 +695,13 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
210 * Helper function for jffs2_get_inode_nodes(). 695 * Helper function for jffs2_get_inode_nodes().
211 * It is called every time an inode node is found. 696 * It is called every time an inode node is found.
212 * 697 *
213 * Returns: 0 on succes; 698 * Returns: 0 on success;
214 * 1 if the node should be marked obsolete; 699 * 1 if the node should be marked obsolete;
215 * negative error code on failure. 700 * negative error code on failure.
216 */ 701 */
217static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, 702static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
218 struct jffs2_raw_inode *rd, struct rb_root *tnp, int rdlen, 703 struct jffs2_raw_inode *rd, int rdlen,
219 uint32_t *latest_mctime, uint32_t *mctime_ver) 704 struct jffs2_readinode_info *rii)
220{ 705{
221 struct jffs2_tmp_dnode_info *tn; 706 struct jffs2_tmp_dnode_info *tn;
222 uint32_t len, csize; 707 uint32_t len, csize;
@@ -230,7 +715,8 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
230 if (unlikely(crc != je32_to_cpu(rd->node_crc))) { 715 if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
231 JFFS2_NOTICE("node CRC failed on dnode at %#08x: read %#08x, calculated %#08x\n", 716 JFFS2_NOTICE("node CRC failed on dnode at %#08x: read %#08x, calculated %#08x\n",
232 ref_offset(ref), je32_to_cpu(rd->node_crc), crc); 717 ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
233 return 1; 718 jffs2_mark_node_obsolete(c, ref);
719 return 0;
234 } 720 }
235 721
236 tn = jffs2_alloc_tmp_dnode_info(); 722 tn = jffs2_alloc_tmp_dnode_info();
@@ -342,6 +828,10 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
342 tn->data_crc = je32_to_cpu(rd->data_crc); 828 tn->data_crc = je32_to_cpu(rd->data_crc);
343 tn->csize = csize; 829 tn->csize = csize;
344 tn->fn->raw = ref; 830 tn->fn->raw = ref;
831 tn->overlapped = 0;
832
833 if (tn->version > rii->highest_version)
834 rii->highest_version = tn->version;
345 835
346 /* There was a bug where we wrote hole nodes out with 836 /* There was a bug where we wrote hole nodes out with
347 csize/dsize swapped. Deal with it */ 837 csize/dsize swapped. Deal with it */
@@ -353,13 +843,25 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
353 dbg_readinode("dnode @%08x: ver %u, offset %#04x, dsize %#04x, csize %#04x\n", 843 dbg_readinode("dnode @%08x: ver %u, offset %#04x, dsize %#04x, csize %#04x\n",
354 ref_offset(ref), je32_to_cpu(rd->version), je32_to_cpu(rd->offset), je32_to_cpu(rd->dsize), csize); 844 ref_offset(ref), je32_to_cpu(rd->version), je32_to_cpu(rd->offset), je32_to_cpu(rd->dsize), csize);
355 845
356 jffs2_add_tn_to_tree(tn, tnp); 846 ret = jffs2_add_tn_to_tree(c, rii, tn);
357 847
848 if (ret) {
849 jffs2_free_full_dnode(tn->fn);
850 free_out:
851 jffs2_free_tmp_dnode_info(tn);
852 return ret;
853 }
854#ifdef JFFS2_DBG_READINODE_MESSAGES
855 dbg_readinode("After adding ver %d:\n", tn->version);
856 tn = tn_first(&rii->tn_root);
857 while (tn) {
858 dbg_readinode("%p: v %d r 0x%x-0x%x ov %d\n",
859 tn, tn->version, tn->fn->ofs,
860 tn->fn->ofs+tn->fn->size, tn->overlapped);
861 tn = tn_next(tn);
862 }
863#endif
358 return 0; 864 return 0;
359
360free_out:
361 jffs2_free_tmp_dnode_info(tn);
362 return ret;
363} 865}
364 866
365/* 867/*
@@ -379,7 +881,8 @@ static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_re
379 JFFS2_ERROR("Node is {%04x,%04x,%08x,%08x}. Please report this error.\n", 881 JFFS2_ERROR("Node is {%04x,%04x,%08x,%08x}. Please report this error.\n",
380 je16_to_cpu(un->magic), je16_to_cpu(un->nodetype), 882 je16_to_cpu(un->magic), je16_to_cpu(un->nodetype),
381 je32_to_cpu(un->totlen), je32_to_cpu(un->hdr_crc)); 883 je32_to_cpu(un->totlen), je32_to_cpu(un->hdr_crc));
382 return 1; 884 jffs2_mark_node_obsolete(c, ref);
885 return 0;
383 } 886 }
384 887
385 un->nodetype = cpu_to_je16(JFFS2_NODE_ACCURATE | je16_to_cpu(un->nodetype)); 888 un->nodetype = cpu_to_je16(JFFS2_NODE_ACCURATE | je16_to_cpu(un->nodetype));
@@ -407,7 +910,8 @@ static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_re
407 case JFFS2_FEATURE_RWCOMPAT_DELETE: 910 case JFFS2_FEATURE_RWCOMPAT_DELETE:
408 JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n", 911 JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n",
409 je16_to_cpu(un->nodetype), ref_offset(ref)); 912 je16_to_cpu(un->nodetype), ref_offset(ref));
410 return 1; 913 jffs2_mark_node_obsolete(c, ref);
914 return 0;
411 } 915 }
412 916
413 return 0; 917 return 0;
@@ -457,21 +961,20 @@ static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
457} 961}
458 962
459/* Get tmp_dnode_info and full_dirent for all non-obsolete nodes associated 963/* Get tmp_dnode_info and full_dirent for all non-obsolete nodes associated
460 with this ino, returning the former in order of version */ 964 with this ino. Perform a preliminary ordering on data nodes, throwing away
965 those which are completely obsoleted by newer ones. The naïve approach we
966 use to take of just returning them _all_ in version order will cause us to
967 run out of memory in certain degenerate cases. */
461static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f, 968static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
462 struct rb_root *tnp, struct jffs2_full_dirent **fdp, 969 struct jffs2_readinode_info *rii)
463 uint32_t *highest_version, uint32_t *latest_mctime,
464 uint32_t *mctime_ver)
465{ 970{
466 struct jffs2_raw_node_ref *ref, *valid_ref; 971 struct jffs2_raw_node_ref *ref, *valid_ref;
467 struct rb_root ret_tn = RB_ROOT;
468 struct jffs2_full_dirent *ret_fd = NULL;
469 unsigned char *buf = NULL; 972 unsigned char *buf = NULL;
470 union jffs2_node_union *node; 973 union jffs2_node_union *node;
471 size_t retlen; 974 size_t retlen;
472 int len, err; 975 int len, err;
473 976
474 *mctime_ver = 0; 977 rii->mctime_ver = 0;
475 978
476 dbg_readinode("ino #%u\n", f->inocache->ino); 979 dbg_readinode("ino #%u\n", f->inocache->ino);
477 980
@@ -569,16 +1072,10 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
569 goto free_out; 1072 goto free_out;
570 } 1073 }
571 1074
572 err = read_direntry(c, ref, &node->d, retlen, &ret_fd, latest_mctime, mctime_ver); 1075 err = read_direntry(c, ref, &node->d, retlen, rii);
573 if (err == 1) { 1076 if (unlikely(err))
574 jffs2_mark_node_obsolete(c, ref);
575 break;
576 } else if (unlikely(err))
577 goto free_out; 1077 goto free_out;
578 1078
579 if (je32_to_cpu(node->d.version) > *highest_version)
580 *highest_version = je32_to_cpu(node->d.version);
581
582 break; 1079 break;
583 1080
584 case JFFS2_NODETYPE_INODE: 1081 case JFFS2_NODETYPE_INODE:
@@ -589,16 +1086,10 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
589 goto free_out; 1086 goto free_out;
590 } 1087 }
591 1088
592 err = read_dnode(c, ref, &node->i, &ret_tn, len, latest_mctime, mctime_ver); 1089 err = read_dnode(c, ref, &node->i, len, rii);
593 if (err == 1) { 1090 if (unlikely(err))
594 jffs2_mark_node_obsolete(c, ref);
595 break;
596 } else if (unlikely(err))
597 goto free_out; 1091 goto free_out;
598 1092
599 if (je32_to_cpu(node->i.version) > *highest_version)
600 *highest_version = je32_to_cpu(node->i.version);
601
602 break; 1093 break;
603 1094
604 default: 1095 default:
@@ -621,17 +1112,19 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
621 } 1112 }
622 1113
623 spin_unlock(&c->erase_completion_lock); 1114 spin_unlock(&c->erase_completion_lock);
624 *tnp = ret_tn;
625 *fdp = ret_fd;
626 kfree(buf); 1115 kfree(buf);
627 1116
1117 f->highest_version = rii->highest_version;
1118
628 dbg_readinode("nodes of inode #%u were read, the highest version is %u, latest_mctime %u, mctime_ver %u.\n", 1119 dbg_readinode("nodes of inode #%u were read, the highest version is %u, latest_mctime %u, mctime_ver %u.\n",
629 f->inocache->ino, *highest_version, *latest_mctime, *mctime_ver); 1120 f->inocache->ino, rii->highest_version, rii->latest_mctime,
1121 rii->mctime_ver);
630 return 0; 1122 return 0;
631 1123
632 free_out: 1124 free_out:
633 jffs2_free_tmp_dnode_info_list(&ret_tn); 1125 jffs2_free_tmp_dnode_info_list(&rii->tn_root);
634 jffs2_free_full_dirent_list(ret_fd); 1126 jffs2_free_full_dirent_list(rii->fds);
1127 rii->fds = NULL;
635 kfree(buf); 1128 kfree(buf);
636 return err; 1129 return err;
637} 1130}
@@ -640,20 +1133,17 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
640 struct jffs2_inode_info *f, 1133 struct jffs2_inode_info *f,
641 struct jffs2_raw_inode *latest_node) 1134 struct jffs2_raw_inode *latest_node)
642{ 1135{
643 struct jffs2_tmp_dnode_info *tn; 1136 struct jffs2_readinode_info rii;
644 struct rb_root tn_list;
645 struct rb_node *rb, *repl_rb;
646 struct jffs2_full_dirent *fd_list;
647 struct jffs2_full_dnode *fn, *first_fn = NULL;
648 uint32_t crc; 1137 uint32_t crc;
649 uint32_t latest_mctime, mctime_ver;
650 size_t retlen; 1138 size_t retlen;
651 int ret; 1139 int ret;
652 1140
653 dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink); 1141 dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink);
654 1142
1143 memset(&rii, 0, sizeof(rii));
1144
655 /* Grab all nodes relevant to this ino */ 1145 /* Grab all nodes relevant to this ino */
656 ret = jffs2_get_inode_nodes(c, f, &tn_list, &fd_list, &f->highest_version, &latest_mctime, &mctime_ver); 1146 ret = jffs2_get_inode_nodes(c, f, &rii);
657 1147
658 if (ret) { 1148 if (ret) {
659 JFFS2_ERROR("cannot read nodes for ino %u, returned error is %d\n", f->inocache->ino, ret); 1149 JFFS2_ERROR("cannot read nodes for ino %u, returned error is %d\n", f->inocache->ino, ret);
@@ -661,74 +1151,42 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
661 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT); 1151 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
662 return ret; 1152 return ret;
663 } 1153 }
664 f->dents = fd_list;
665
666 rb = rb_first(&tn_list);
667 1154
668 while (rb) { 1155 ret = jffs2_build_inode_fragtree(c, f, &rii);
669 cond_resched(); 1156 if (ret) {
670 tn = rb_entry(rb, struct jffs2_tmp_dnode_info, rb); 1157 JFFS2_ERROR("Failed to build final fragtree for inode #%u: error %d\n",
671 fn = tn->fn; 1158 f->inocache->ino, ret);
672 ret = 1; 1159 if (f->inocache->state == INO_STATE_READING)
673 dbg_readinode("consider node ver %u, phys offset " 1160 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
674 "%#08x(%d), range %u-%u.\n", tn->version, 1161 jffs2_free_tmp_dnode_info_list(&rii.tn_root);
675 ref_offset(fn->raw), ref_flags(fn->raw), 1162 /* FIXME: We could at least crc-check them all */
676 fn->ofs, fn->ofs + fn->size); 1163 if (rii.mdata_tn) {
677 1164 jffs2_free_full_dnode(rii.mdata_tn->fn);
678 if (fn->size) { 1165 jffs2_free_tmp_dnode_info(rii.mdata_tn);
679 ret = jffs2_add_older_frag_to_fragtree(c, f, tn); 1166 rii.mdata_tn = NULL;
680 /* TODO: the error code isn't checked, check it */ 1167 }
681 jffs2_dbg_fragtree_paranoia_check_nolock(f); 1168 return ret;
682 BUG_ON(ret < 0); 1169 }
683 if (!first_fn && ret == 0)
684 first_fn = fn;
685 } else if (!first_fn) {
686 first_fn = fn;
687 f->metadata = fn;
688 ret = 0; /* Prevent freeing the metadata update node */
689 } else
690 jffs2_mark_node_obsolete(c, fn->raw);
691
692 BUG_ON(rb->rb_left);
693 if (rb_parent(rb) && rb_parent(rb)->rb_left == rb) {
694 /* We were then left-hand child of our parent. We need
695 * to move our own right-hand child into our place. */
696 repl_rb = rb->rb_right;
697 if (repl_rb)
698 rb_set_parent(repl_rb, rb_parent(rb));
699 } else
700 repl_rb = NULL;
701
702 rb = rb_next(rb);
703
704 /* Remove the spent tn from the tree; don't bother rebalancing
705 * but put our right-hand child in our own place. */
706 if (rb_parent(&tn->rb)) {
707 if (rb_parent(&tn->rb)->rb_left == &tn->rb)
708 rb_parent(&tn->rb)->rb_left = repl_rb;
709 else if (rb_parent(&tn->rb)->rb_right == &tn->rb)
710 rb_parent(&tn->rb)->rb_right = repl_rb;
711 else BUG();
712 } else if (tn->rb.rb_right)
713 rb_set_parent(tn->rb.rb_right, NULL);
714 1170
715 jffs2_free_tmp_dnode_info(tn); 1171 if (rii.mdata_tn) {
716 if (ret) { 1172 if (rii.mdata_tn->fn->raw == rii.latest_ref) {
717 dbg_readinode("delete dnode %u-%u.\n", 1173 f->metadata = rii.mdata_tn->fn;
718 fn->ofs, fn->ofs + fn->size); 1174 jffs2_free_tmp_dnode_info(rii.mdata_tn);
719 jffs2_free_full_dnode(fn); 1175 } else {
1176 jffs2_kill_tn(c, rii.mdata_tn);
720 } 1177 }
1178 rii.mdata_tn = NULL;
721 } 1179 }
722 jffs2_dbg_fragtree_paranoia_check_nolock(f);
723 1180
724 BUG_ON(first_fn && ref_obsolete(first_fn->raw)); 1181 f->dents = rii.fds;
1182
1183 jffs2_dbg_fragtree_paranoia_check_nolock(f);
725 1184
726 fn = first_fn; 1185 if (unlikely(!rii.latest_ref)) {
727 if (unlikely(!first_fn)) {
728 /* No data nodes for this inode. */ 1186 /* No data nodes for this inode. */
729 if (f->inocache->ino != 1) { 1187 if (f->inocache->ino != 1) {
730 JFFS2_WARNING("no data nodes found for ino #%u\n", f->inocache->ino); 1188 JFFS2_WARNING("no data nodes found for ino #%u\n", f->inocache->ino);
731 if (!fd_list) { 1189 if (!rii.fds) {
732 if (f->inocache->state == INO_STATE_READING) 1190 if (f->inocache->state == INO_STATE_READING)
733 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT); 1191 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
734 return -EIO; 1192 return -EIO;
@@ -746,7 +1204,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
746 return 0; 1204 return 0;
747 } 1205 }
748 1206
749 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(*latest_node), &retlen, (void *)latest_node); 1207 ret = jffs2_flash_read(c, ref_offset(rii.latest_ref), sizeof(*latest_node), &retlen, (void *)latest_node);
750 if (ret || retlen != sizeof(*latest_node)) { 1208 if (ret || retlen != sizeof(*latest_node)) {
751 JFFS2_ERROR("failed to read from flash: error %d, %zd of %zd bytes read\n", 1209 JFFS2_ERROR("failed to read from flash: error %d, %zd of %zd bytes read\n",
752 ret, retlen, sizeof(*latest_node)); 1210 ret, retlen, sizeof(*latest_node));
@@ -759,7 +1217,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
759 crc = crc32(0, latest_node, sizeof(*latest_node)-8); 1217 crc = crc32(0, latest_node, sizeof(*latest_node)-8);
760 if (crc != je32_to_cpu(latest_node->node_crc)) { 1218 if (crc != je32_to_cpu(latest_node->node_crc)) {
761 JFFS2_ERROR("CRC failed for read_inode of inode %u at physical location 0x%x\n", 1219 JFFS2_ERROR("CRC failed for read_inode of inode %u at physical location 0x%x\n",
762 f->inocache->ino, ref_offset(fn->raw)); 1220 f->inocache->ino, ref_offset(rii.latest_ref));
763 up(&f->sem); 1221 up(&f->sem);
764 jffs2_do_clear_inode(c, f); 1222 jffs2_do_clear_inode(c, f);
765 return -EIO; 1223 return -EIO;
@@ -767,10 +1225,10 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
767 1225
768 switch(jemode_to_cpu(latest_node->mode) & S_IFMT) { 1226 switch(jemode_to_cpu(latest_node->mode) & S_IFMT) {
769 case S_IFDIR: 1227 case S_IFDIR:
770 if (mctime_ver > je32_to_cpu(latest_node->version)) { 1228 if (rii.mctime_ver > je32_to_cpu(latest_node->version)) {
771 /* The times in the latest_node are actually older than 1229 /* The times in the latest_node are actually older than
772 mctime in the latest dirent. Cheat. */ 1230 mctime in the latest dirent. Cheat. */
773 latest_node->ctime = latest_node->mtime = cpu_to_je32(latest_mctime); 1231 latest_node->ctime = latest_node->mtime = cpu_to_je32(rii.latest_mctime);
774 } 1232 }
775 break; 1233 break;
776 1234
@@ -800,7 +1258,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
800 return -ENOMEM; 1258 return -ENOMEM;
801 } 1259 }
802 1260
803 ret = jffs2_flash_read(c, ref_offset(fn->raw) + sizeof(*latest_node), 1261 ret = jffs2_flash_read(c, ref_offset(rii.latest_ref) + sizeof(*latest_node),
804 je32_to_cpu(latest_node->csize), &retlen, (char *)f->target); 1262 je32_to_cpu(latest_node->csize), &retlen, (char *)f->target);
805 1263
806 if (ret || retlen != je32_to_cpu(latest_node->csize)) { 1264 if (ret || retlen != je32_to_cpu(latest_node->csize)) {