aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@infradead.org>2007-04-24 22:23:42 -0400
committerDavid Woodhouse <dwmw2@infradead.org>2007-04-24 22:23:42 -0400
commitdf8e96f39103adf5a13332d784040a2c62667243 (patch)
tree01d7259260f628f4075c39c1cc7804b72998a601 /fs
parent44b998e1eb254edc87177819ee693690fac68b7f (diff)
[JFFS2] Improve read_inode memory usage, v2.
We originally used to read every node and allocate a jffs2_tmp_dnode_info structure for each, before processing them in (reverse) version order and discarding the ones which are obsoleted by later nodes. With huge logfiles, this behaviour caused memory problems. For example, a file involved in OLPC trac #1292 has 1822391 nodes, and would cause the XO machine to run out of memory during the first stage of read_inode(). Instead of just inserting nodes into a tree in version order as we find them, we now put them into a tree in order of their offset within the file, which allows us to immediately discard nodes which are completely obsoleted. We don't use a full tree with 'fragments' pointing to the real data structure, as we do in the normal fragtree. We sort only on the start address, and add an 'overlapped' flag to the tmp_dnode_info to indicate that the node in question is (partially) overlapped by another. When the scan is complete, we start at the end of the file, adding each node to a real fragtree as before. Where the node is non-overlapped, we just add it (it doesn't matter that it's not the latest version; there is no overlap). When the node at the end of the tree _is_ overlapped, we sort it and all its overlapping nodes into version order and then add them to the fragtree in that order. This 'early discard' reduces the peak allocation of tmp_dnode_info structures from 1.8M to a mere 62872 (3.5%) in the degenerate case referenced above. This version of the patch also correctly rememembers the highest node version# seen for an inode when it's scanned. Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/jffs2/nodelist.c460
-rw-r--r--fs/jffs2/nodelist.h25
-rw-r--r--fs/jffs2/readinode.c732
3 files changed, 618 insertions, 599 deletions
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 5a6b4d64206c..fecffbc63552 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -397,466 +397,6 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in
397 return 0; 397 return 0;
398} 398}
399 399
400/*
401 * Check the data CRC of the node.
402 *
403 * Returns: 0 if the data CRC is correct;
404 * 1 - if incorrect;
405 * error code if an error occured.
406 */
407static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn)
408{
409 struct jffs2_raw_node_ref *ref = tn->fn->raw;
410 int err = 0, pointed = 0;
411 struct jffs2_eraseblock *jeb;
412 unsigned char *buffer;
413 uint32_t crc, ofs, len;
414 size_t retlen;
415
416 BUG_ON(tn->csize == 0);
417
418 if (!jffs2_is_writebuffered(c))
419 goto adj_acc;
420
421 /* Calculate how many bytes were already checked */
422 ofs = ref_offset(ref) + sizeof(struct jffs2_raw_inode);
423 len = ofs % c->wbuf_pagesize;
424 if (likely(len))
425 len = c->wbuf_pagesize - len;
426
427 if (len >= tn->csize) {
428 dbg_readinode("no need to check node at %#08x, data length %u, data starts at %#08x - it has already been checked.\n",
429 ref_offset(ref), tn->csize, ofs);
430 goto adj_acc;
431 }
432
433 ofs += len;
434 len = tn->csize - len;
435
436 dbg_readinode("check node at %#08x, data length %u, partial CRC %#08x, correct CRC %#08x, data starts at %#08x, start checking from %#08x - %u bytes.\n",
437 ref_offset(ref), tn->csize, tn->partial_crc, tn->data_crc, ofs - len, ofs, len);
438
439#ifndef __ECOS
440 /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
441 * adding and jffs2_flash_read_end() interface. */
442 if (c->mtd->point) {
443 err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer);
444 if (!err && retlen < tn->csize) {
445 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
446 c->mtd->unpoint(c->mtd, buffer, ofs, len);
447 } else if (err)
448 JFFS2_WARNING("MTD point failed: error code %d.\n", err);
449 else
450 pointed = 1; /* succefully pointed to device */
451 }
452#endif
453
454 if (!pointed) {
455 buffer = kmalloc(len, GFP_KERNEL);
456 if (unlikely(!buffer))
457 return -ENOMEM;
458
459 /* TODO: this is very frequent pattern, make it a separate
460 * routine */
461 err = jffs2_flash_read(c, ofs, len, &retlen, buffer);
462 if (err) {
463 JFFS2_ERROR("can not read %d bytes from 0x%08x, error code: %d.\n", len, ofs, err);
464 goto free_out;
465 }
466
467 if (retlen != len) {
468 JFFS2_ERROR("short read at %#08x: %zd instead of %d.\n", ofs, retlen, len);
469 err = -EIO;
470 goto free_out;
471 }
472 }
473
474 /* Continue calculating CRC */
475 crc = crc32(tn->partial_crc, buffer, len);
476 if(!pointed)
477 kfree(buffer);
478#ifndef __ECOS
479 else
480 c->mtd->unpoint(c->mtd, buffer, ofs, len);
481#endif
482
483 if (crc != tn->data_crc) {
484 JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
485 ofs, tn->data_crc, crc);
486 return 1;
487 }
488
489adj_acc:
490 jeb = &c->blocks[ref->flash_offset / c->sector_size];
491 len = ref_totlen(c, jeb, ref);
492
493 /*
494 * Mark the node as having been checked and fix the
495 * accounting accordingly.
496 */
497 spin_lock(&c->erase_completion_lock);
498 jeb->used_size += len;
499 jeb->unchecked_size -= len;
500 c->used_size += len;
501 c->unchecked_size -= len;
502 spin_unlock(&c->erase_completion_lock);
503
504 return 0;
505
506free_out:
507 if(!pointed)
508 kfree(buffer);
509#ifndef __ECOS
510 else
511 c->mtd->unpoint(c->mtd, buffer, ofs, len);
512#endif
513 return err;
514}
515
516/*
517 * Helper function for jffs2_add_older_frag_to_fragtree().
518 *
519 * Checks the node if we are in the checking stage.
520 */
521static int check_node(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn)
522{
523 int ret;
524
525 BUG_ON(ref_obsolete(tn->fn->raw));
526
527 /* We only check the data CRC of unchecked nodes */
528 if (ref_flags(tn->fn->raw) != REF_UNCHECKED)
529 return 0;
530
531 dbg_fragtree2("check node %#04x-%#04x, phys offs %#08x.\n",
532 tn->fn->ofs, tn->fn->ofs + tn->fn->size, ref_offset(tn->fn->raw));
533
534 ret = check_node_data(c, tn);
535 if (unlikely(ret < 0)) {
536 JFFS2_ERROR("check_node_data() returned error: %d.\n",
537 ret);
538 } else if (unlikely(ret > 0)) {
539 dbg_fragtree2("CRC error, mark it obsolete.\n");
540 jffs2_mark_node_obsolete(c, tn->fn->raw);
541 }
542
543 return ret;
544}
545
546/*
547 * Helper function for jffs2_add_older_frag_to_fragtree().
548 *
549 * Called when the new fragment that is being inserted
550 * splits a hole fragment.
551 */
552static int split_hole(struct jffs2_sb_info *c, struct rb_root *root,
553 struct jffs2_node_frag *newfrag, struct jffs2_node_frag *hole)
554{
555 dbg_fragtree2("fragment %#04x-%#04x splits the hole %#04x-%#04x\n",
556 newfrag->ofs, newfrag->ofs + newfrag->size, hole->ofs, hole->ofs + hole->size);
557
558 if (hole->ofs == newfrag->ofs) {
559 /*
560 * Well, the new fragment actually starts at the same offset as
561 * the hole.
562 */
563 if (hole->ofs + hole->size > newfrag->ofs + newfrag->size) {
564 /*
565 * We replace the overlapped left part of the hole by
566 * the new node.
567 */
568
569 dbg_fragtree2("insert fragment %#04x-%#04x and cut the left part of the hole\n",
570 newfrag->ofs, newfrag->ofs + newfrag->size);
571 rb_replace_node(&hole->rb, &newfrag->rb, root);
572
573 hole->ofs += newfrag->size;
574 hole->size -= newfrag->size;
575
576 /*
577 * We know that 'hole' should be the right hand
578 * fragment.
579 */
580 jffs2_fragtree_insert(hole, newfrag);
581 rb_insert_color(&hole->rb, root);
582 } else {
583 /*
584 * Ah, the new fragment is of the same size as the hole.
585 * Relace the hole by it.
586 */
587 dbg_fragtree2("insert fragment %#04x-%#04x and overwrite hole\n",
588 newfrag->ofs, newfrag->ofs + newfrag->size);
589 rb_replace_node(&hole->rb, &newfrag->rb, root);
590 jffs2_free_node_frag(hole);
591 }
592 } else {
593 /* The new fragment lefts some hole space at the left */
594
595 struct jffs2_node_frag * newfrag2 = NULL;
596
597 if (hole->ofs + hole->size > newfrag->ofs + newfrag->size) {
598 /* The new frag also lefts some space at the right */
599 newfrag2 = new_fragment(NULL, newfrag->ofs +
600 newfrag->size, hole->ofs + hole->size
601 - newfrag->ofs - newfrag->size);
602 if (unlikely(!newfrag2)) {
603 jffs2_free_node_frag(newfrag);
604 return -ENOMEM;
605 }
606 }
607
608 hole->size = newfrag->ofs - hole->ofs;
609 dbg_fragtree2("left the hole %#04x-%#04x at the left and inserd fragment %#04x-%#04x\n",
610 hole->ofs, hole->ofs + hole->size, newfrag->ofs, newfrag->ofs + newfrag->size);
611
612 jffs2_fragtree_insert(newfrag, hole);
613 rb_insert_color(&newfrag->rb, root);
614
615 if (newfrag2) {
616 dbg_fragtree2("left the hole %#04x-%#04x at the right\n",
617 newfrag2->ofs, newfrag2->ofs + newfrag2->size);
618 jffs2_fragtree_insert(newfrag2, newfrag);
619 rb_insert_color(&newfrag2->rb, root);
620 }
621 }
622
623 return 0;
624}
625
626/*
627 * This function is used when we build inode. It expects the nodes are passed
628 * in the decreasing version order. The whole point of this is to improve the
629 * inodes checking on NAND: we check the nodes' data CRC only when they are not
630 * obsoleted. Previously, add_frag_to_fragtree() function was used and
631 * nodes were passed to it in the increasing version ordes and CRCs of all
632 * nodes were checked.
633 *
634 * Note: tn->fn->size shouldn't be zero.
635 *
636 * Returns 0 if the node was inserted
637 * 1 if it wasn't inserted (since it is obsolete)
638 * < 0 an if error occured
639 */
640int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
641 struct jffs2_tmp_dnode_info *tn)
642{
643 struct jffs2_node_frag *this, *newfrag;
644 uint32_t lastend;
645 struct jffs2_full_dnode *fn = tn->fn;
646 struct rb_root *root = &f->fragtree;
647 uint32_t fn_size = fn->size, fn_ofs = fn->ofs;
648 int err, checked = 0;
649 int ref_flag;
650
651 dbg_fragtree("insert fragment %#04x-%#04x, ver %u\n", fn_ofs, fn_ofs + fn_size, tn->version);
652
653 /* Skip all the nodes which are completed before this one starts */
654 this = jffs2_lookup_node_frag(root, fn_ofs);
655 if (this)
656 dbg_fragtree2("'this' found %#04x-%#04x (%s)\n", this->ofs, this->ofs + this->size, this->node ? "data" : "hole");
657
658 if (this)
659 lastend = this->ofs + this->size;
660 else
661 lastend = 0;
662
663 /* Detect the preliminary type of node */
664 if (fn->size >= PAGE_CACHE_SIZE)
665 ref_flag = REF_PRISTINE;
666 else
667 ref_flag = REF_NORMAL;
668
669 /* See if we ran off the end of the root */
670 if (lastend <= fn_ofs) {
671 /* We did */
672
673 /*
674 * We are going to insert the new node into the
675 * fragment tree, so check it.
676 */
677 err = check_node(c, f, tn);
678 if (err != 0)
679 return err;
680
681 fn->frags = 1;
682
683 newfrag = new_fragment(fn, fn_ofs, fn_size);
684 if (unlikely(!newfrag))
685 return -ENOMEM;
686
687 err = no_overlapping_node(c, root, newfrag, this, lastend);
688 if (unlikely(err != 0)) {
689 jffs2_free_node_frag(newfrag);
690 return err;
691 }
692
693 goto out_ok;
694 }
695
696 fn->frags = 0;
697
698 while (1) {
699 /*
700 * Here we have:
701 * fn_ofs < this->ofs + this->size && fn_ofs >= this->ofs.
702 *
703 * Remember, 'this' has higher version, any non-hole node
704 * which is already in the fragtree is newer then the newly
705 * inserted.
706 */
707 if (!this->node) {
708 /*
709 * 'this' is the hole fragment, so at least the
710 * beginning of the new fragment is valid.
711 */
712
713 /*
714 * We are going to insert the new node into the
715 * fragment tree, so check it.
716 */
717 if (!checked) {
718 err = check_node(c, f, tn);
719 if (unlikely(err != 0))
720 return err;
721 checked = 1;
722 }
723
724 if (this->ofs + this->size >= fn_ofs + fn_size) {
725 /* We split the hole on two parts */
726
727 fn->frags += 1;
728 newfrag = new_fragment(fn, fn_ofs, fn_size);
729 if (unlikely(!newfrag))
730 return -ENOMEM;
731
732 err = split_hole(c, root, newfrag, this);
733 if (unlikely(err))
734 return err;
735 goto out_ok;
736 }
737
738 /*
739 * The beginning of the new fragment is valid since it
740 * overlaps the hole node.
741 */
742
743 ref_flag = REF_NORMAL;
744
745 fn->frags += 1;
746 newfrag = new_fragment(fn, fn_ofs,
747 this->ofs + this->size - fn_ofs);
748 if (unlikely(!newfrag))
749 return -ENOMEM;
750
751 if (fn_ofs == this->ofs) {
752 /*
753 * The new node starts at the same offset as
754 * the hole and supersieds the hole.
755 */
756 dbg_fragtree2("add the new fragment instead of hole %#04x-%#04x, refcnt %d\n",
757 fn_ofs, fn_ofs + this->ofs + this->size - fn_ofs, fn->frags);
758
759 rb_replace_node(&this->rb, &newfrag->rb, root);
760 jffs2_free_node_frag(this);
761 } else {
762 /*
763 * The hole becomes shorter as its right part
764 * is supersieded by the new fragment.
765 */
766 dbg_fragtree2("reduce size of hole %#04x-%#04x to %#04x-%#04x\n",
767 this->ofs, this->ofs + this->size, this->ofs, this->ofs + this->size - newfrag->size);
768
769 dbg_fragtree2("add new fragment %#04x-%#04x, refcnt %d\n", fn_ofs,
770 fn_ofs + this->ofs + this->size - fn_ofs, fn->frags);
771
772 this->size -= newfrag->size;
773 jffs2_fragtree_insert(newfrag, this);
774 rb_insert_color(&newfrag->rb, root);
775 }
776
777 fn_ofs += newfrag->size;
778 fn_size -= newfrag->size;
779 this = rb_entry(rb_next(&newfrag->rb),
780 struct jffs2_node_frag, rb);
781
782 dbg_fragtree2("switch to the next 'this' fragment: %#04x-%#04x %s\n",
783 this->ofs, this->ofs + this->size, this->node ? "(data)" : "(hole)");
784 }
785
786 /*
787 * 'This' node is not the hole so it obsoletes the new fragment
788 * either fully or partially.
789 */
790 if (this->ofs + this->size >= fn_ofs + fn_size) {
791 /* The new node is obsolete, drop it */
792 if (fn->frags == 0) {
793 dbg_fragtree2("%#04x-%#04x is obsolete, mark it obsolete\n", fn_ofs, fn_ofs + fn_size);
794 ref_flag = REF_OBSOLETE;
795 }
796 goto out_ok;
797 } else {
798 struct jffs2_node_frag *new_this;
799
800 /* 'This' node obsoletes the beginning of the new node */
801 dbg_fragtree2("the beginning %#04x-%#04x is obsolete\n", fn_ofs, this->ofs + this->size);
802
803 ref_flag = REF_NORMAL;
804
805 fn_size -= this->ofs + this->size - fn_ofs;
806 fn_ofs = this->ofs + this->size;
807 dbg_fragtree2("now considering %#04x-%#04x\n", fn_ofs, fn_ofs + fn_size);
808
809 new_this = rb_entry(rb_next(&this->rb), struct jffs2_node_frag, rb);
810 if (!new_this) {
811 /*
812 * There is no next fragment. Add the rest of
813 * the new node as the right-hand child.
814 */
815 if (!checked) {
816 err = check_node(c, f, tn);
817 if (unlikely(err != 0))
818 return err;
819 checked = 1;
820 }
821
822 fn->frags += 1;
823 newfrag = new_fragment(fn, fn_ofs, fn_size);
824 if (unlikely(!newfrag))
825 return -ENOMEM;
826
827 dbg_fragtree2("there are no more fragments, insert %#04x-%#04x\n",
828 newfrag->ofs, newfrag->ofs + newfrag->size);
829 rb_link_node(&newfrag->rb, &this->rb, &this->rb.rb_right);
830 rb_insert_color(&newfrag->rb, root);
831 goto out_ok;
832 } else {
833 this = new_this;
834 dbg_fragtree2("switch to the next 'this' fragment: %#04x-%#04x %s\n",
835 this->ofs, this->ofs + this->size, this->node ? "(data)" : "(hole)");
836 }
837 }
838 }
839
840out_ok:
841 BUG_ON(fn->size < PAGE_CACHE_SIZE && ref_flag == REF_PRISTINE);
842
843 if (ref_flag == REF_OBSOLETE) {
844 dbg_fragtree2("the node is obsolete now\n");
845 /* jffs2_mark_node_obsolete() will adjust space accounting */
846 jffs2_mark_node_obsolete(c, fn->raw);
847 return 1;
848 }
849
850 dbg_fragtree2("the node is \"%s\" now\n", ref_flag == REF_NORMAL ? "REF_NORMAL" : "REF_PRISTINE");
851
852 /* Space accounting was adjusted at check_node_data() */
853 spin_lock(&c->erase_completion_lock);
854 fn->raw->flash_offset = ref_offset(fn->raw) | ref_flag;
855 spin_unlock(&c->erase_completion_lock);
856
857 return 0;
858}
859
860void jffs2_set_inocache_state(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, int state) 400void jffs2_set_inocache_state(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, int state)
861{ 401{
862 spin_lock(&c->inocache_lock); 402 spin_lock(&c->inocache_lock);
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 382662cc61e7..e5c8f2be8e22 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -225,7 +225,20 @@ struct jffs2_tmp_dnode_info
225 uint32_t version; 225 uint32_t version;
226 uint32_t data_crc; 226 uint32_t data_crc;
227 uint32_t partial_crc; 227 uint32_t partial_crc;
228 uint32_t csize; 228 uint16_t csize;
229 uint16_t overlapped;
230};
231
232/* Temporary data structure used during readinode. */
233struct jffs2_readinode_info
234{
235 struct rb_root tn_root;
236 struct jffs2_tmp_dnode_info *mdata_tn;
237 uint32_t highest_version;
238 uint32_t latest_mctime;
239 uint32_t mctime_ver;
240 struct jffs2_full_dirent *fds;
241 struct jffs2_raw_node_ref *latest_ref;
229}; 242};
230 243
231struct jffs2_full_dirent 244struct jffs2_full_dirent
@@ -328,6 +341,15 @@ static inline struct jffs2_node_frag *frag_last(struct rb_root *root)
328#define frag_right(frag) rb_entry((frag)->rb.rb_right, struct jffs2_node_frag, rb) 341#define frag_right(frag) rb_entry((frag)->rb.rb_right, struct jffs2_node_frag, rb)
329#define frag_erase(frag, list) rb_erase(&frag->rb, list); 342#define frag_erase(frag, list) rb_erase(&frag->rb, list);
330 343
344#define tn_next(tn) rb_entry(rb_next(&(tn)->rb), struct jffs2_tmp_dnode_info, rb)
345#define tn_prev(tn) rb_entry(rb_prev(&(tn)->rb), struct jffs2_tmp_dnode_info, rb)
346#define tn_parent(tn) rb_entry(rb_parent(&(tn)->rb), struct jffs2_tmp_dnode_info, rb)
347#define tn_left(tn) rb_entry((tn)->rb.rb_left, struct jffs2_tmp_dnode_info, rb)
348#define tn_right(tn) rb_entry((tn)->rb.rb_right, struct jffs2_tmp_dnode_info, rb)
349#define tn_erase(tn, list) rb_erase(&tn->rb, list);
350#define tn_last(list) rb_entry(rb_last(list), struct jffs2_tmp_dnode_info, rb)
351#define tn_first(list) rb_entry(rb_first(list), struct jffs2_tmp_dnode_info, rb)
352
331/* nodelist.c */ 353/* nodelist.c */
332void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new, struct jffs2_full_dirent **list); 354void jffs2_add_fd_to_list(struct jffs2_sb_info *c, struct jffs2_full_dirent *new, struct jffs2_full_dirent **list);
333void jffs2_set_inocache_state(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, int state); 355void jffs2_set_inocache_state(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, int state);
@@ -343,7 +365,6 @@ struct rb_node *rb_prev(struct rb_node *);
343void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); 365void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root);
344int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn); 366int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_full_dnode *fn);
345void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size); 367void jffs2_truncate_fragtree (struct jffs2_sb_info *c, struct rb_root *list, uint32_t size);
346int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn);
347struct jffs2_raw_node_ref *jffs2_link_node_ref(struct jffs2_sb_info *c, 368struct jffs2_raw_node_ref *jffs2_link_node_ref(struct jffs2_sb_info *c,
348 struct jffs2_eraseblock *jeb, 369 struct jffs2_eraseblock *jeb,
349 uint32_t ofs, uint32_t len, 370 uint32_t ofs, uint32_t len,
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 1298848336b8..49d4b0a67c55 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -22,30 +22,510 @@
22#include "nodelist.h" 22#include "nodelist.h"
23 23
24/* 24/*
25 * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in 25 * Check the data CRC of the node.
26 * order of increasing version. 26 *
27 * Returns: 0 if the data CRC is correct;
28 * 1 - if incorrect;
29 * error code if an error occured.
27 */ 30 */
28static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root *list) 31static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn)
29{ 32{
30 struct rb_node **p = &list->rb_node; 33 struct jffs2_raw_node_ref *ref = tn->fn->raw;
31 struct rb_node * parent = NULL; 34 int err = 0, pointed = 0;
32 struct jffs2_tmp_dnode_info *this; 35 struct jffs2_eraseblock *jeb;
33 36 unsigned char *buffer;
34 while (*p) { 37 uint32_t crc, ofs, len;
35 parent = *p; 38 size_t retlen;
36 this = rb_entry(parent, struct jffs2_tmp_dnode_info, rb); 39
37 40 BUG_ON(tn->csize == 0);
38 /* There may actually be a collision here, but it doesn't 41
39 actually matter. As long as the two nodes with the same 42 if (!jffs2_is_writebuffered(c))
40 version are together, it's all fine. */ 43 goto adj_acc;
41 if (tn->version > this->version) 44
42 p = &(*p)->rb_left; 45 /* Calculate how many bytes were already checked */
46 ofs = ref_offset(ref) + sizeof(struct jffs2_raw_inode);
47 len = ofs % c->wbuf_pagesize;
48 if (likely(len))
49 len = c->wbuf_pagesize - len;
50
51 if (len >= tn->csize) {
52 dbg_readinode("no need to check node at %#08x, data length %u, data starts at %#08x - it has already been checked.\n",
53 ref_offset(ref), tn->csize, ofs);
54 goto adj_acc;
55 }
56
57 ofs += len;
58 len = tn->csize - len;
59
60 dbg_readinode("check node at %#08x, data length %u, partial CRC %#08x, correct CRC %#08x, data starts at %#08x, start checking from %#08x - %u bytes.\n",
61 ref_offset(ref), tn->csize, tn->partial_crc, tn->data_crc, ofs - len, ofs, len);
62
63#ifndef __ECOS
64 /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
65 * adding and jffs2_flash_read_end() interface. */
66 if (c->mtd->point) {
67 err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer);
68 if (!err && retlen < tn->csize) {
69 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
70 c->mtd->unpoint(c->mtd, buffer, ofs, len);
71 } else if (err)
72 JFFS2_WARNING("MTD point failed: error code %d.\n", err);
43 else 73 else
44 p = &(*p)->rb_right; 74 pointed = 1; /* succefully pointed to device */
75 }
76#endif
77
78 if (!pointed) {
79 buffer = kmalloc(len, GFP_KERNEL);
80 if (unlikely(!buffer))
81 return -ENOMEM;
82
83 /* TODO: this is very frequent pattern, make it a separate
84 * routine */
85 err = jffs2_flash_read(c, ofs, len, &retlen, buffer);
86 if (err) {
87 JFFS2_ERROR("can not read %d bytes from 0x%08x, error code: %d.\n", len, ofs, err);
88 goto free_out;
89 }
90
91 if (retlen != len) {
92 JFFS2_ERROR("short read at %#08x: %zd instead of %d.\n", ofs, retlen, len);
93 err = -EIO;
94 goto free_out;
95 }
96 }
97
98 /* Continue calculating CRC */
99 crc = crc32(tn->partial_crc, buffer, len);
100 if(!pointed)
101 kfree(buffer);
102#ifndef __ECOS
103 else
104 c->mtd->unpoint(c->mtd, buffer, ofs, len);
105#endif
106
107 if (crc != tn->data_crc) {
108 JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
109 ofs, tn->data_crc, crc);
110 return 1;
45 } 111 }
46 112
47 rb_link_node(&tn->rb, parent, p); 113adj_acc:
48 rb_insert_color(&tn->rb, list); 114 jeb = &c->blocks[ref->flash_offset / c->sector_size];
115 len = ref_totlen(c, jeb, ref);
116 /* If it should be REF_NORMAL, it'll get marked as such when
117 we build the fragtree, shortly. No need to worry about GC
118 moving it while it's marked REF_PRISTINE -- GC won't happen
119 till we've finished checking every inode anyway. */
120 ref->flash_offset |= REF_PRISTINE;
121 /*
122 * Mark the node as having been checked and fix the
123 * accounting accordingly.
124 */
125 spin_lock(&c->erase_completion_lock);
126 jeb->used_size += len;
127 jeb->unchecked_size -= len;
128 c->used_size += len;
129 c->unchecked_size -= len;
130 jffs2_dbg_acct_paranoia_check_nolock(c, jeb);
131 spin_unlock(&c->erase_completion_lock);
132
133 return 0;
134
135free_out:
136 if(!pointed)
137 kfree(buffer);
138#ifndef __ECOS
139 else
140 c->mtd->unpoint(c->mtd, buffer, ofs, len);
141#endif
142 return err;
143}
144
145/*
146 * Helper function for jffs2_add_older_frag_to_fragtree().
147 *
148 * Checks the node if we are in the checking stage.
149 */
150static int check_tn_node(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn)
151{
152 int ret;
153
154 BUG_ON(ref_obsolete(tn->fn->raw));
155
156 /* We only check the data CRC of unchecked nodes */
157 if (ref_flags(tn->fn->raw) != REF_UNCHECKED)
158 return 0;
159
160 dbg_readinode("check node %#04x-%#04x, phys offs %#08x\n",
161 tn->fn->ofs, tn->fn->ofs + tn->fn->size, ref_offset(tn->fn->raw));
162
163 ret = check_node_data(c, tn);
164 if (unlikely(ret < 0)) {
165 JFFS2_ERROR("check_node_data() returned error: %d.\n",
166 ret);
167 } else if (unlikely(ret > 0)) {
168 dbg_readinode("CRC error, mark it obsolete.\n");
169 jffs2_mark_node_obsolete(c, tn->fn->raw);
170 }
171
172 return ret;
173}
174
175static struct jffs2_tmp_dnode_info *jffs2_lookup_tn(struct rb_root *tn_root, uint32_t offset)
176{
177 struct rb_node *next;
178 struct jffs2_tmp_dnode_info *tn = NULL;
179
180 dbg_readinode("root %p, offset %d\n", tn_root, offset);
181
182 next = tn_root->rb_node;
183
184 while (next) {
185 tn = rb_entry(next, struct jffs2_tmp_dnode_info, rb);
186
187 if (tn->fn->ofs < offset)
188 next = tn->rb.rb_right;
189 else if (tn->fn->ofs >= offset)
190 next = tn->rb.rb_left;
191 else
192 break;
193 }
194
195 return tn;
196}
197
198
199static void jffs2_kill_tn(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn)
200{
201 jffs2_mark_node_obsolete(c, tn->fn->raw);
202 jffs2_free_full_dnode(tn->fn);
203 jffs2_free_tmp_dnode_info(tn);
204}
205/*
206 * This function is used when we read an inode. Data nodes arrive in
207 * arbitrary order -- they may be older or newer than the nodes which
208 * are already in the tree. Where overlaps occur, the older node can
209 * be discarded as long as the newer passes the CRC check. We don't
210 * bother to keep track of holes in this rbtree, and neither do we deal
211 * with frags -- we can have multiple entries starting at the same
212 * offset, and the one with the smallest length will come first in the
213 * ordering.
214 *
215 * Returns 0 if the node was inserted
216 * 1 if the node is obsolete (because we can't mark it so yet)
217 * < 0 an if error occurred
218 */
219static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
220 struct jffs2_readinode_info *rii,
221 struct jffs2_tmp_dnode_info *tn)
222{
223 uint32_t fn_end = tn->fn->ofs + tn->fn->size;
224 struct jffs2_tmp_dnode_info *insert_point = NULL, *this;
225
226 dbg_readinode("insert fragment %#04x-%#04x, ver %u\n", tn->fn->ofs, fn_end, tn->version);
227
228 /* If a node has zero dsize, we only have to keep if it if it might be the
229 node with highest version -- i.e. the one which will end up as f->metadata.
230 Note that such nodes won't be REF_UNCHECKED since there are no data to
231 check anyway. */
232 if (!tn->fn->size) {
233 if (rii->mdata_tn) {
234 /* We had a candidate mdata node already */
235 dbg_readinode("kill old mdata with ver %d\n", rii->mdata_tn->version);
236 jffs2_kill_tn(c, rii->mdata_tn);
237 }
238 rii->mdata_tn = tn;
239 dbg_readinode("keep new mdata with ver %d\n", tn->version);
240 return 0;
241 }
242
243 /* Find the earliest node which _may_ be relevant to this one */
244 this = jffs2_lookup_tn(&rii->tn_root, tn->fn->ofs);
245 if (!this) {
246 /* First addition to empty tree. $DEITY how I love the easy cases */
247 rb_link_node(&tn->rb, NULL, &rii->tn_root.rb_node);
248 rb_insert_color(&tn->rb, &rii->tn_root);
249 dbg_readinode("keep new frag\n");
250 return 0;
251 }
252
253 /* If we add a new node it'll be somewhere under here. */
254 insert_point = this;
255
256 /* If the node is coincident with another at a lower address,
257 back up until the other node is found. It may be relevant */
258 while (tn->overlapped)
259 tn = tn_prev(tn);
260
261 dbg_readinode("'this' found %#04x-%#04x (%s)\n", this->fn->ofs, this->fn->ofs + this->fn->size, this->fn ? "data" : "hole");
262
263 while (this) {
264 if (this->fn->ofs > fn_end)
265 break;
266 dbg_readinode("Ponder this ver %d, 0x%x-0x%x\n",
267 this->version, this->fn->ofs, this->fn->size);
268
269 if (this->version == tn->version) {
270 /* Version number collision means REF_PRISTINE GC. Accept either of them
271 as long as the CRC is correct. Check the one we have already... */
272 if (!check_tn_node(c, this)) {
273 /* The one we already had was OK. Keep it and throw away the new one */
274 dbg_readinode("Like old node. Throw away new\n");
275 jffs2_kill_tn(c, tn);
276 return 0;
277 } else {
278 /* Who cares if the new one is good; keep it for now anyway. */
279 rb_replace_node(&this->rb, &tn->rb, &rii->tn_root);
280 /* Same overlapping from in front and behind */
281 tn->overlapped = this->overlapped;
282 jffs2_kill_tn(c, this);
283 dbg_readinode("Like new node. Throw away old\n");
284 return 0;
285 }
286 }
287 if (this->version < tn->version &&
288 this->fn->ofs >= tn->fn->ofs &&
289 this->fn->ofs + this->fn->size <= fn_end) {
290 /* New node entirely overlaps 'this' */
291 if (check_tn_node(c, tn)) {
292 dbg_readinode("new node bad CRC\n");
293 jffs2_kill_tn(c, tn);
294 return 0;
295 }
296 /* ... and is good. Kill 'this'... */
297 rb_replace_node(&this->rb, &tn->rb, &rii->tn_root);
298 tn->overlapped = this->overlapped;
299 jffs2_kill_tn(c, this);
300 /* ... and any subsequent nodes which are also overlapped */
301 this = tn_next(tn);
302 while (this && this->fn->ofs + this->fn->size < fn_end) {
303 struct jffs2_tmp_dnode_info *next = tn_next(this);
304 if (this->version < tn->version) {
305 tn_erase(this, &rii->tn_root);
306 dbg_readinode("Kill overlapped ver %d, 0x%x-0x%x\n",
307 this->version, this->fn->ofs,
308 this->fn->ofs+this->fn->size);
309 jffs2_kill_tn(c, this);
310 }
311 this = next;
312 }
313 dbg_readinode("Done inserting new\n");
314 return 0;
315 }
316 if (this->version > tn->version &&
317 this->fn->ofs <= tn->fn->ofs &&
318 this->fn->ofs+this->fn->size >= fn_end) {
319 /* New node entirely overlapped by 'this' */
320 if (!check_tn_node(c, this)) {
321 dbg_readinode("Good CRC on old node. Kill new\n");
322 jffs2_kill_tn(c, tn);
323 return 0;
324 }
325 /* ... but 'this' was bad. Replace it... */
326 rb_replace_node(&this->rb, &tn->rb, &rii->tn_root);
327 dbg_readinode("Bad CRC on old overlapping node. Kill it\n");
328 jffs2_kill_tn(c, this);
329 return 0;
330 }
331 /* We want to be inserted under the last node which is
332 either at a lower offset _or_ has a smaller range */
333 if (this->fn->ofs < tn->fn->ofs ||
334 (this->fn->ofs == tn->fn->ofs &&
335 this->fn->size <= tn->fn->size))
336 insert_point = this;
337
338 this = tn_next(this);
339 }
340 dbg_readinode("insert_point %p, ver %d, 0x%x-0x%x, ov %d\n",
341 insert_point, insert_point->version, insert_point->fn->ofs,
342 insert_point->fn->ofs+insert_point->fn->size,
343 insert_point->overlapped);
344 /* We neither completely obsoleted nor were completely
345 obsoleted by an earlier node. Insert under insert_point */
346 {
347 struct rb_node *parent = &insert_point->rb;
348 struct rb_node **link = &parent;
349
350 while (*link) {
351 parent = *link;
352 insert_point = rb_entry(parent, struct jffs2_tmp_dnode_info, rb);
353 if (tn->fn->ofs > insert_point->fn->ofs)
354 link = &insert_point->rb.rb_right;
355 else if (tn->fn->ofs < insert_point->fn->ofs ||
356 tn->fn->size < insert_point->fn->size)
357 link = &insert_point->rb.rb_left;
358 else
359 link = &insert_point->rb.rb_right;
360 }
361 rb_link_node(&tn->rb, &insert_point->rb, link);
362 rb_insert_color(&tn->rb, &rii->tn_root);
363 }
364 /* If there's anything behind that overlaps us, note it */
365 this = tn_prev(tn);
366 if (this) {
367 while (1) {
368 if (this->fn->ofs + this->fn->size > tn->fn->ofs) {
369 dbg_readinode("Node is overlapped by %p (v %d, 0x%x-0x%x)\n",
370 this, this->version, this->fn->ofs,
371 this->fn->ofs+this->fn->size);
372 tn->overlapped = 1;
373 break;
374 }
375 if (!this->overlapped)
376 break;
377 this = tn_prev(this);
378 }
379 }
380
381 /* If the new node overlaps anything ahead, note it */
382 this = tn_next(tn);
383 while (this && this->fn->ofs < fn_end) {
384 this->overlapped = 1;
385 dbg_readinode("Node ver %d, 0x%x-0x%x is overlapped\n",
386 this->version, this->fn->ofs,
387 this->fn->ofs+this->fn->size);
388 this = tn_next(this);
389 }
390 return 0;
391}
392
393/* Trivial function to remove the last node in the tree. Which by definition
394 has no right-hand -- so can be removed just by making its only child (if
395 any) take its place under its parent. */
396static void eat_last(struct rb_root *root, struct rb_node *node)
397{
398 struct rb_node *parent = rb_parent(node);
399 struct rb_node **link;
400
401 /* LAST! */
402 BUG_ON(node->rb_right);
403
404 if (!parent)
405 link = &root->rb_node;
406 else if (node == parent->rb_left)
407 link = &parent->rb_left;
408 else
409 link = &parent->rb_right;
410
411 *link = node->rb_left;
412 /* Colour doesn't matter now. Only the parent pointer. */
413 if (node->rb_left)
414 node->rb_left->rb_parent_color = node->rb_parent_color;
415}
416
417/* We put this in reverse order, so we can just use eat_last */
418static void ver_insert(struct rb_root *ver_root, struct jffs2_tmp_dnode_info *tn)
419{
420 struct rb_node **link = &ver_root->rb_node;
421 struct rb_node *parent = NULL;
422 struct jffs2_tmp_dnode_info *this_tn;
423
424 while (*link) {
425 parent = *link;
426 this_tn = rb_entry(parent, struct jffs2_tmp_dnode_info, rb);
427
428 if (tn->version > this_tn->version)
429 link = &parent->rb_left;
430 else
431 link = &parent->rb_right;
432 }
433 dbg_readinode("Link new node at %p (root is %p)\n", link, ver_root);
434 rb_link_node(&tn->rb, parent, link);
435 rb_insert_color(&tn->rb, ver_root);
436}
437
438/* Build final, normal fragtree from tn tree. It doesn't matter which order
439 we add nodes to the real fragtree, as long as they don't overlap. And
440 having thrown away the majority of overlapped nodes as we went, there
441 really shouldn't be many sets of nodes which do overlap. If we start at
442 the end, we can use the overlap markers -- we can just eat nodes which
443 aren't overlapped, and when we encounter nodes which _do_ overlap we
444 sort them all into a temporary tree in version order before replaying them. */
445static int jffs2_build_inode_fragtree(struct jffs2_sb_info *c,
446 struct jffs2_inode_info *f,
447 struct jffs2_readinode_info *rii)
448{
449 struct jffs2_tmp_dnode_info *pen, *last, *this;
450 struct rb_root ver_root = RB_ROOT;
451 uint32_t high_ver = 0;
452
453 if (rii->mdata_tn) {
454 dbg_readinode("potential mdata is ver %d at %p\n", rii->mdata_tn->version, rii->mdata_tn);
455 high_ver = rii->mdata_tn->version;
456 rii->latest_ref = rii->mdata_tn->fn->raw;
457 }
458#ifdef JFFS2_DBG_READINODE_MESSAGES
459 this = tn_last(&rii->tn_root);
460 while (this) {
461 dbg_readinode("tn %p ver %d range 0x%x-0x%x ov %d\n", this, this->version, this->fn->ofs,
462 this->fn->ofs+this->fn->size, this->overlapped);
463 this = tn_prev(this);
464 }
465#endif
466 pen = tn_last(&rii->tn_root);
467 while ((last = pen)) {
468 pen = tn_prev(last);
469
470 eat_last(&rii->tn_root, &last->rb);
471 ver_insert(&ver_root, last);
472
473 if (unlikely(last->overlapped))
474 continue;
475
476 /* Now we have a bunch of nodes in reverse version
477 order, in the tree at ver_root. Most of the time,
478 there'll actually be only one node in the 'tree',
479 in fact. */
480 this = tn_last(&ver_root);
481
482 while (this) {
483 struct jffs2_tmp_dnode_info *vers_next;
484 int ret;
485 vers_next = tn_prev(this);
486 eat_last(&ver_root, &this->rb);
487 if (check_tn_node(c, this)) {
488 dbg_readinode("node ver %x, 0x%x-0x%x failed CRC\n",
489 this->version, this->fn->ofs,
490 this->fn->ofs+this->fn->size);
491 jffs2_kill_tn(c, this);
492 } else {
493 if (this->version > high_ver) {
494 /* Note that this is different from the other
495 highest_version, because this one is only
496 counting _valid_ nodes which could give the
497 latest inode metadata */
498 high_ver = this->version;
499 rii->latest_ref = this->fn->raw;
500 }
501 dbg_readinode("Add %p (v %x, 0x%x-0x%x, ov %d) to fragtree\n",
502 this, this->version, this->fn->ofs,
503 this->fn->ofs+this->fn->size, this->overlapped);
504
505 ret = jffs2_add_full_dnode_to_inode(c, f, this->fn);
506 if (ret) {
507 /* Free the nodes in vers_root; let the caller
508 deal with the rest */
509 JFFS2_ERROR("Add node to tree failed %d\n", ret);
510 while (1) {
511 vers_next = tn_prev(this);
512 if (check_tn_node(c, this))
513 jffs2_mark_node_obsolete(c, this->fn->raw);
514 jffs2_free_full_dnode(this->fn);
515 jffs2_free_tmp_dnode_info(this);
516 this = vers_next;
517 if (!this)
518 break;
519 eat_last(&ver_root, &vers_next->rb);
520 }
521 return ret;
522 }
523 jffs2_free_tmp_dnode_info(this);
524 }
525 this = vers_next;
526 }
527 }
528 return 0;
49} 529}
50 530
51static void jffs2_free_tmp_dnode_info_list(struct rb_root *list) 531static void jffs2_free_tmp_dnode_info_list(struct rb_root *list)
@@ -112,8 +592,8 @@ static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_r
112 * negative error code on failure. 592 * negative error code on failure.
113 */ 593 */
114static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, 594static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
115 struct jffs2_raw_dirent *rd, size_t read, struct jffs2_full_dirent **fdp, 595 struct jffs2_raw_dirent *rd, size_t read,
116 uint32_t *latest_mctime, uint32_t *mctime_ver) 596 struct jffs2_readinode_info *rii)
117{ 597{
118 struct jffs2_full_dirent *fd; 598 struct jffs2_full_dirent *fd;
119 uint32_t crc; 599 uint32_t crc;
@@ -125,7 +605,8 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
125 if (unlikely(crc != je32_to_cpu(rd->node_crc))) { 605 if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
126 JFFS2_NOTICE("header CRC failed on dirent node at %#08x: read %#08x, calculated %#08x\n", 606 JFFS2_NOTICE("header CRC failed on dirent node at %#08x: read %#08x, calculated %#08x\n",
127 ref_offset(ref), je32_to_cpu(rd->node_crc), crc); 607 ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
128 return 1; 608 jffs2_mark_node_obsolete(c, ref);
609 return 0;
129 } 610 }
130 611
131 /* If we've never checked the CRCs on this node, check them now */ 612 /* If we've never checked the CRCs on this node, check them now */
@@ -137,7 +618,8 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
137 if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) { 618 if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) {
138 JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n", 619 JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n",
139 ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen)); 620 ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen));
140 return 1; 621 jffs2_mark_node_obsolete(c, ref);
622 return 0;
141 } 623 }
142 624
143 jeb = &c->blocks[ref->flash_offset / c->sector_size]; 625 jeb = &c->blocks[ref->flash_offset / c->sector_size];
@@ -161,10 +643,13 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
161 fd->ino = je32_to_cpu(rd->ino); 643 fd->ino = je32_to_cpu(rd->ino);
162 fd->type = rd->type; 644 fd->type = rd->type;
163 645
646 if (fd->version > rii->highest_version)
647 rii->highest_version = fd->version;
648
164 /* Pick out the mctime of the latest dirent */ 649 /* Pick out the mctime of the latest dirent */
165 if(fd->version > *mctime_ver && je32_to_cpu(rd->mctime)) { 650 if(fd->version > rii->mctime_ver && je32_to_cpu(rd->mctime)) {
166 *mctime_ver = fd->version; 651 rii->mctime_ver = fd->version;
167 *latest_mctime = je32_to_cpu(rd->mctime); 652 rii->latest_mctime = je32_to_cpu(rd->mctime);
168 } 653 }
169 654
170 /* 655 /*
@@ -201,7 +686,7 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
201 * Wheee. We now have a complete jffs2_full_dirent structure, with 686 * Wheee. We now have a complete jffs2_full_dirent structure, with
202 * the name in it and everything. Link it into the list 687 * the name in it and everything. Link it into the list
203 */ 688 */
204 jffs2_add_fd_to_list(c, fd, fdp); 689 jffs2_add_fd_to_list(c, fd, &rii->fds);
205 690
206 return 0; 691 return 0;
207} 692}
@@ -210,13 +695,13 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
210 * Helper function for jffs2_get_inode_nodes(). 695 * Helper function for jffs2_get_inode_nodes().
211 * It is called every time an inode node is found. 696 * It is called every time an inode node is found.
212 * 697 *
213 * Returns: 0 on succes; 698 * Returns: 0 on success;
214 * 1 if the node should be marked obsolete; 699 * 1 if the node should be marked obsolete;
215 * negative error code on failure. 700 * negative error code on failure.
216 */ 701 */
217static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, 702static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
218 struct jffs2_raw_inode *rd, struct rb_root *tnp, int rdlen, 703 struct jffs2_raw_inode *rd, int rdlen,
219 uint32_t *latest_mctime, uint32_t *mctime_ver) 704 struct jffs2_readinode_info *rii)
220{ 705{
221 struct jffs2_tmp_dnode_info *tn; 706 struct jffs2_tmp_dnode_info *tn;
222 uint32_t len, csize; 707 uint32_t len, csize;
@@ -230,7 +715,8 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
230 if (unlikely(crc != je32_to_cpu(rd->node_crc))) { 715 if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
231 JFFS2_NOTICE("node CRC failed on dnode at %#08x: read %#08x, calculated %#08x\n", 716 JFFS2_NOTICE("node CRC failed on dnode at %#08x: read %#08x, calculated %#08x\n",
232 ref_offset(ref), je32_to_cpu(rd->node_crc), crc); 717 ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
233 return 1; 718 jffs2_mark_node_obsolete(c, ref);
719 return 0;
234 } 720 }
235 721
236 tn = jffs2_alloc_tmp_dnode_info(); 722 tn = jffs2_alloc_tmp_dnode_info();
@@ -342,6 +828,10 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
342 tn->data_crc = je32_to_cpu(rd->data_crc); 828 tn->data_crc = je32_to_cpu(rd->data_crc);
343 tn->csize = csize; 829 tn->csize = csize;
344 tn->fn->raw = ref; 830 tn->fn->raw = ref;
831 tn->overlapped = 0;
832
833 if (tn->version > rii->highest_version)
834 rii->highest_version = tn->version;
345 835
346 /* There was a bug where we wrote hole nodes out with 836 /* There was a bug where we wrote hole nodes out with
347 csize/dsize swapped. Deal with it */ 837 csize/dsize swapped. Deal with it */
@@ -353,13 +843,25 @@ static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
353 dbg_readinode("dnode @%08x: ver %u, offset %#04x, dsize %#04x, csize %#04x\n", 843 dbg_readinode("dnode @%08x: ver %u, offset %#04x, dsize %#04x, csize %#04x\n",
354 ref_offset(ref), je32_to_cpu(rd->version), je32_to_cpu(rd->offset), je32_to_cpu(rd->dsize), csize); 844 ref_offset(ref), je32_to_cpu(rd->version), je32_to_cpu(rd->offset), je32_to_cpu(rd->dsize), csize);
355 845
356 jffs2_add_tn_to_tree(tn, tnp); 846 ret = jffs2_add_tn_to_tree(c, rii, tn);
357 847
848 if (ret) {
849 jffs2_free_full_dnode(tn->fn);
850 free_out:
851 jffs2_free_tmp_dnode_info(tn);
852 return ret;
853 }
854#ifdef JFFS2_DBG_READINODE_MESSAGES
855 dbg_readinode("After adding ver %d:\n", tn->version);
856 tn = tn_first(&rii->tn_root);
857 while (tn) {
858 dbg_readinode("%p: v %d r 0x%x-0x%x ov %d\n",
859 tn, tn->version, tn->fn->ofs,
860 tn->fn->ofs+tn->fn->size, tn->overlapped);
861 tn = tn_next(tn);
862 }
863#endif
358 return 0; 864 return 0;
359
360free_out:
361 jffs2_free_tmp_dnode_info(tn);
362 return ret;
363} 865}
364 866
365/* 867/*
@@ -379,7 +881,8 @@ static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_re
379 JFFS2_ERROR("Node is {%04x,%04x,%08x,%08x}. Please report this error.\n", 881 JFFS2_ERROR("Node is {%04x,%04x,%08x,%08x}. Please report this error.\n",
380 je16_to_cpu(un->magic), je16_to_cpu(un->nodetype), 882 je16_to_cpu(un->magic), je16_to_cpu(un->nodetype),
381 je32_to_cpu(un->totlen), je32_to_cpu(un->hdr_crc)); 883 je32_to_cpu(un->totlen), je32_to_cpu(un->hdr_crc));
382 return 1; 884 jffs2_mark_node_obsolete(c, ref);
885 return 0;
383 } 886 }
384 887
385 un->nodetype = cpu_to_je16(JFFS2_NODE_ACCURATE | je16_to_cpu(un->nodetype)); 888 un->nodetype = cpu_to_je16(JFFS2_NODE_ACCURATE | je16_to_cpu(un->nodetype));
@@ -407,7 +910,8 @@ static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_re
407 case JFFS2_FEATURE_RWCOMPAT_DELETE: 910 case JFFS2_FEATURE_RWCOMPAT_DELETE:
408 JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n", 911 JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n",
409 je16_to_cpu(un->nodetype), ref_offset(ref)); 912 je16_to_cpu(un->nodetype), ref_offset(ref));
410 return 1; 913 jffs2_mark_node_obsolete(c, ref);
914 return 0;
411 } 915 }
412 916
413 return 0; 917 return 0;
@@ -457,21 +961,20 @@ static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
457} 961}
458 962
459/* Get tmp_dnode_info and full_dirent for all non-obsolete nodes associated 963/* Get tmp_dnode_info and full_dirent for all non-obsolete nodes associated
460 with this ino, returning the former in order of version */ 964 with this ino. Perform a preliminary ordering on data nodes, throwing away
965 those which are completely obsoleted by newer ones. The naïve approach we
966 use to take of just returning them _all_ in version order will cause us to
967 run out of memory in certain degenerate cases. */
461static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f, 968static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
462 struct rb_root *tnp, struct jffs2_full_dirent **fdp, 969 struct jffs2_readinode_info *rii)
463 uint32_t *highest_version, uint32_t *latest_mctime,
464 uint32_t *mctime_ver)
465{ 970{
466 struct jffs2_raw_node_ref *ref, *valid_ref; 971 struct jffs2_raw_node_ref *ref, *valid_ref;
467 struct rb_root ret_tn = RB_ROOT;
468 struct jffs2_full_dirent *ret_fd = NULL;
469 unsigned char *buf = NULL; 972 unsigned char *buf = NULL;
470 union jffs2_node_union *node; 973 union jffs2_node_union *node;
471 size_t retlen; 974 size_t retlen;
472 int len, err; 975 int len, err;
473 976
474 *mctime_ver = 0; 977 rii->mctime_ver = 0;
475 978
476 dbg_readinode("ino #%u\n", f->inocache->ino); 979 dbg_readinode("ino #%u\n", f->inocache->ino);
477 980
@@ -569,16 +1072,10 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
569 goto free_out; 1072 goto free_out;
570 } 1073 }
571 1074
572 err = read_direntry(c, ref, &node->d, retlen, &ret_fd, latest_mctime, mctime_ver); 1075 err = read_direntry(c, ref, &node->d, retlen, rii);
573 if (err == 1) { 1076 if (unlikely(err))
574 jffs2_mark_node_obsolete(c, ref);
575 break;
576 } else if (unlikely(err))
577 goto free_out; 1077 goto free_out;
578 1078
579 if (je32_to_cpu(node->d.version) > *highest_version)
580 *highest_version = je32_to_cpu(node->d.version);
581
582 break; 1079 break;
583 1080
584 case JFFS2_NODETYPE_INODE: 1081 case JFFS2_NODETYPE_INODE:
@@ -589,16 +1086,10 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
589 goto free_out; 1086 goto free_out;
590 } 1087 }
591 1088
592 err = read_dnode(c, ref, &node->i, &ret_tn, len, latest_mctime, mctime_ver); 1089 err = read_dnode(c, ref, &node->i, len, rii);
593 if (err == 1) { 1090 if (unlikely(err))
594 jffs2_mark_node_obsolete(c, ref);
595 break;
596 } else if (unlikely(err))
597 goto free_out; 1091 goto free_out;
598 1092
599 if (je32_to_cpu(node->i.version) > *highest_version)
600 *highest_version = je32_to_cpu(node->i.version);
601
602 break; 1093 break;
603 1094
604 default: 1095 default:
@@ -621,17 +1112,19 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
621 } 1112 }
622 1113
623 spin_unlock(&c->erase_completion_lock); 1114 spin_unlock(&c->erase_completion_lock);
624 *tnp = ret_tn;
625 *fdp = ret_fd;
626 kfree(buf); 1115 kfree(buf);
627 1116
1117 f->highest_version = rii->highest_version;
1118
628 dbg_readinode("nodes of inode #%u were read, the highest version is %u, latest_mctime %u, mctime_ver %u.\n", 1119 dbg_readinode("nodes of inode #%u were read, the highest version is %u, latest_mctime %u, mctime_ver %u.\n",
629 f->inocache->ino, *highest_version, *latest_mctime, *mctime_ver); 1120 f->inocache->ino, rii->highest_version, rii->latest_mctime,
1121 rii->mctime_ver);
630 return 0; 1122 return 0;
631 1123
632 free_out: 1124 free_out:
633 jffs2_free_tmp_dnode_info_list(&ret_tn); 1125 jffs2_free_tmp_dnode_info_list(&rii->tn_root);
634 jffs2_free_full_dirent_list(ret_fd); 1126 jffs2_free_full_dirent_list(rii->fds);
1127 rii->fds = NULL;
635 kfree(buf); 1128 kfree(buf);
636 return err; 1129 return err;
637} 1130}
@@ -640,20 +1133,17 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
640 struct jffs2_inode_info *f, 1133 struct jffs2_inode_info *f,
641 struct jffs2_raw_inode *latest_node) 1134 struct jffs2_raw_inode *latest_node)
642{ 1135{
643 struct jffs2_tmp_dnode_info *tn; 1136 struct jffs2_readinode_info rii;
644 struct rb_root tn_list;
645 struct rb_node *rb, *repl_rb;
646 struct jffs2_full_dirent *fd_list;
647 struct jffs2_full_dnode *fn, *first_fn = NULL;
648 uint32_t crc; 1137 uint32_t crc;
649 uint32_t latest_mctime, mctime_ver;
650 size_t retlen; 1138 size_t retlen;
651 int ret; 1139 int ret;
652 1140
653 dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink); 1141 dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink);
654 1142
1143 memset(&rii, 0, sizeof(rii));
1144
655 /* Grab all nodes relevant to this ino */ 1145 /* Grab all nodes relevant to this ino */
656 ret = jffs2_get_inode_nodes(c, f, &tn_list, &fd_list, &f->highest_version, &latest_mctime, &mctime_ver); 1146 ret = jffs2_get_inode_nodes(c, f, &rii);
657 1147
658 if (ret) { 1148 if (ret) {
659 JFFS2_ERROR("cannot read nodes for ino %u, returned error is %d\n", f->inocache->ino, ret); 1149 JFFS2_ERROR("cannot read nodes for ino %u, returned error is %d\n", f->inocache->ino, ret);
@@ -661,74 +1151,42 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
661 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT); 1151 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
662 return ret; 1152 return ret;
663 } 1153 }
664 f->dents = fd_list;
665
666 rb = rb_first(&tn_list);
667 1154
668 while (rb) { 1155 ret = jffs2_build_inode_fragtree(c, f, &rii);
669 cond_resched(); 1156 if (ret) {
670 tn = rb_entry(rb, struct jffs2_tmp_dnode_info, rb); 1157 JFFS2_ERROR("Failed to build final fragtree for inode #%u: error %d\n",
671 fn = tn->fn; 1158 f->inocache->ino, ret);
672 ret = 1; 1159 if (f->inocache->state == INO_STATE_READING)
673 dbg_readinode("consider node ver %u, phys offset " 1160 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
674 "%#08x(%d), range %u-%u.\n", tn->version, 1161 jffs2_free_tmp_dnode_info_list(&rii.tn_root);
675 ref_offset(fn->raw), ref_flags(fn->raw), 1162 /* FIXME: We could at least crc-check them all */
676 fn->ofs, fn->ofs + fn->size); 1163 if (rii.mdata_tn) {
677 1164 jffs2_free_full_dnode(rii.mdata_tn->fn);
678 if (fn->size) { 1165 jffs2_free_tmp_dnode_info(rii.mdata_tn);
679 ret = jffs2_add_older_frag_to_fragtree(c, f, tn); 1166 rii.mdata_tn = NULL;
680 /* TODO: the error code isn't checked, check it */ 1167 }
681 jffs2_dbg_fragtree_paranoia_check_nolock(f); 1168 return ret;
682 BUG_ON(ret < 0); 1169 }
683 if (!first_fn && ret == 0)
684 first_fn = fn;
685 } else if (!first_fn) {
686 first_fn = fn;
687 f->metadata = fn;
688 ret = 0; /* Prevent freeing the metadata update node */
689 } else
690 jffs2_mark_node_obsolete(c, fn->raw);
691
692 BUG_ON(rb->rb_left);
693 if (rb_parent(rb) && rb_parent(rb)->rb_left == rb) {
694 /* We were then left-hand child of our parent. We need
695 * to move our own right-hand child into our place. */
696 repl_rb = rb->rb_right;
697 if (repl_rb)
698 rb_set_parent(repl_rb, rb_parent(rb));
699 } else
700 repl_rb = NULL;
701
702 rb = rb_next(rb);
703
704 /* Remove the spent tn from the tree; don't bother rebalancing
705 * but put our right-hand child in our own place. */
706 if (rb_parent(&tn->rb)) {
707 if (rb_parent(&tn->rb)->rb_left == &tn->rb)
708 rb_parent(&tn->rb)->rb_left = repl_rb;
709 else if (rb_parent(&tn->rb)->rb_right == &tn->rb)
710 rb_parent(&tn->rb)->rb_right = repl_rb;
711 else BUG();
712 } else if (tn->rb.rb_right)
713 rb_set_parent(tn->rb.rb_right, NULL);
714 1170
715 jffs2_free_tmp_dnode_info(tn); 1171 if (rii.mdata_tn) {
716 if (ret) { 1172 if (rii.mdata_tn->fn->raw == rii.latest_ref) {
717 dbg_readinode("delete dnode %u-%u.\n", 1173 f->metadata = rii.mdata_tn->fn;
718 fn->ofs, fn->ofs + fn->size); 1174 jffs2_free_tmp_dnode_info(rii.mdata_tn);
719 jffs2_free_full_dnode(fn); 1175 } else {
1176 jffs2_kill_tn(c, rii.mdata_tn);
720 } 1177 }
1178 rii.mdata_tn = NULL;
721 } 1179 }
722 jffs2_dbg_fragtree_paranoia_check_nolock(f);
723 1180
724 BUG_ON(first_fn && ref_obsolete(first_fn->raw)); 1181 f->dents = rii.fds;
1182
1183 jffs2_dbg_fragtree_paranoia_check_nolock(f);
725 1184
726 fn = first_fn; 1185 if (unlikely(!rii.latest_ref)) {
727 if (unlikely(!first_fn)) {
728 /* No data nodes for this inode. */ 1186 /* No data nodes for this inode. */
729 if (f->inocache->ino != 1) { 1187 if (f->inocache->ino != 1) {
730 JFFS2_WARNING("no data nodes found for ino #%u\n", f->inocache->ino); 1188 JFFS2_WARNING("no data nodes found for ino #%u\n", f->inocache->ino);
731 if (!fd_list) { 1189 if (!rii.fds) {
732 if (f->inocache->state == INO_STATE_READING) 1190 if (f->inocache->state == INO_STATE_READING)
733 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT); 1191 jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
734 return -EIO; 1192 return -EIO;
@@ -746,7 +1204,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
746 return 0; 1204 return 0;
747 } 1205 }
748 1206
749 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(*latest_node), &retlen, (void *)latest_node); 1207 ret = jffs2_flash_read(c, ref_offset(rii.latest_ref), sizeof(*latest_node), &retlen, (void *)latest_node);
750 if (ret || retlen != sizeof(*latest_node)) { 1208 if (ret || retlen != sizeof(*latest_node)) {
751 JFFS2_ERROR("failed to read from flash: error %d, %zd of %zd bytes read\n", 1209 JFFS2_ERROR("failed to read from flash: error %d, %zd of %zd bytes read\n",
752 ret, retlen, sizeof(*latest_node)); 1210 ret, retlen, sizeof(*latest_node));
@@ -759,7 +1217,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
759 crc = crc32(0, latest_node, sizeof(*latest_node)-8); 1217 crc = crc32(0, latest_node, sizeof(*latest_node)-8);
760 if (crc != je32_to_cpu(latest_node->node_crc)) { 1218 if (crc != je32_to_cpu(latest_node->node_crc)) {
761 JFFS2_ERROR("CRC failed for read_inode of inode %u at physical location 0x%x\n", 1219 JFFS2_ERROR("CRC failed for read_inode of inode %u at physical location 0x%x\n",
762 f->inocache->ino, ref_offset(fn->raw)); 1220 f->inocache->ino, ref_offset(rii.latest_ref));
763 up(&f->sem); 1221 up(&f->sem);
764 jffs2_do_clear_inode(c, f); 1222 jffs2_do_clear_inode(c, f);
765 return -EIO; 1223 return -EIO;
@@ -767,10 +1225,10 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
767 1225
768 switch(jemode_to_cpu(latest_node->mode) & S_IFMT) { 1226 switch(jemode_to_cpu(latest_node->mode) & S_IFMT) {
769 case S_IFDIR: 1227 case S_IFDIR:
770 if (mctime_ver > je32_to_cpu(latest_node->version)) { 1228 if (rii.mctime_ver > je32_to_cpu(latest_node->version)) {
771 /* The times in the latest_node are actually older than 1229 /* The times in the latest_node are actually older than
772 mctime in the latest dirent. Cheat. */ 1230 mctime in the latest dirent. Cheat. */
773 latest_node->ctime = latest_node->mtime = cpu_to_je32(latest_mctime); 1231 latest_node->ctime = latest_node->mtime = cpu_to_je32(rii.latest_mctime);
774 } 1232 }
775 break; 1233 break;
776 1234
@@ -800,7 +1258,7 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
800 return -ENOMEM; 1258 return -ENOMEM;
801 } 1259 }
802 1260
803 ret = jffs2_flash_read(c, ref_offset(fn->raw) + sizeof(*latest_node), 1261 ret = jffs2_flash_read(c, ref_offset(rii.latest_ref) + sizeof(*latest_node),
804 je32_to_cpu(latest_node->csize), &retlen, (char *)f->target); 1262 je32_to_cpu(latest_node->csize), &retlen, (char *)f->target);
805 1263
806 if (ret || retlen != je32_to_cpu(latest_node->csize)) { 1264 if (ret || retlen != je32_to_cpu(latest_node->csize)) {