aboutsummaryrefslogtreecommitdiffstats
path: root/fs/jffs2/nodelist.c
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@infradead.org>2007-04-24 22:23:42 -0400
committerDavid Woodhouse <dwmw2@infradead.org>2007-04-24 22:23:42 -0400
commitdf8e96f39103adf5a13332d784040a2c62667243 (patch)
tree01d7259260f628f4075c39c1cc7804b72998a601 /fs/jffs2/nodelist.c
parent44b998e1eb254edc87177819ee693690fac68b7f (diff)
[JFFS2] Improve read_inode memory usage, v2.
We originally used to read every node and allocate a jffs2_tmp_dnode_info structure for each, before processing them in (reverse) version order and discarding the ones which are obsoleted by later nodes. With huge logfiles, this behaviour caused memory problems. For example, a file involved in OLPC trac #1292 has 1822391 nodes, and would cause the XO machine to run out of memory during the first stage of read_inode(). Instead of just inserting nodes into a tree in version order as we find them, we now put them into a tree in order of their offset within the file, which allows us to immediately discard nodes which are completely obsoleted. We don't use a full tree with 'fragments' pointing to the real data structure, as we do in the normal fragtree. We sort only on the start address, and add an 'overlapped' flag to the tmp_dnode_info to indicate that the node in question is (partially) overlapped by another. When the scan is complete, we start at the end of the file, adding each node to a real fragtree as before. Where the node is non-overlapped, we just add it (it doesn't matter that it's not the latest version; there is no overlap). When the node at the end of the tree _is_ overlapped, we sort it and all its overlapping nodes into version order and then add them to the fragtree in that order. This 'early discard' reduces the peak allocation of tmp_dnode_info structures from 1.8M to a mere 62872 (3.5%) in the degenerate case referenced above. This version of the patch also correctly rememembers the highest node version# seen for an inode when it's scanned. Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Diffstat (limited to 'fs/jffs2/nodelist.c')
-rw-r--r--fs/jffs2/nodelist.c460
1 files changed, 0 insertions, 460 deletions
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 5a6b4d64206c..fecffbc63552 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -397,466 +397,6 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in
397 return 0; 397 return 0;
398} 398}
399 399
400/*
401 * Check the data CRC of the node.
402 *
403 * Returns: 0 if the data CRC is correct;
404 * 1 - if incorrect;
405 * error code if an error occured.
406 */
407static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info *tn)
408{
409 struct jffs2_raw_node_ref *ref = tn->fn->raw;
410 int err = 0, pointed = 0;
411 struct jffs2_eraseblock *jeb;
412 unsigned char *buffer;
413 uint32_t crc, ofs, len;
414 size_t retlen;
415
416 BUG_ON(tn->csize == 0);
417
418 if (!jffs2_is_writebuffered(c))
419 goto adj_acc;
420
421 /* Calculate how many bytes were already checked */
422 ofs = ref_offset(ref) + sizeof(struct jffs2_raw_inode);
423 len = ofs % c->wbuf_pagesize;
424 if (likely(len))
425 len = c->wbuf_pagesize - len;
426
427 if (len >= tn->csize) {
428 dbg_readinode("no need to check node at %#08x, data length %u, data starts at %#08x - it has already been checked.\n",
429 ref_offset(ref), tn->csize, ofs);
430 goto adj_acc;
431 }
432
433 ofs += len;
434 len = tn->csize - len;
435
436 dbg_readinode("check node at %#08x, data length %u, partial CRC %#08x, correct CRC %#08x, data starts at %#08x, start checking from %#08x - %u bytes.\n",
437 ref_offset(ref), tn->csize, tn->partial_crc, tn->data_crc, ofs - len, ofs, len);
438
439#ifndef __ECOS
440 /* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
441 * adding and jffs2_flash_read_end() interface. */
442 if (c->mtd->point) {
443 err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer);
444 if (!err && retlen < tn->csize) {
445 JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
446 c->mtd->unpoint(c->mtd, buffer, ofs, len);
447 } else if (err)
448 JFFS2_WARNING("MTD point failed: error code %d.\n", err);
449 else
450 pointed = 1; /* succefully pointed to device */
451 }
452#endif
453
454 if (!pointed) {
455 buffer = kmalloc(len, GFP_KERNEL);
456 if (unlikely(!buffer))
457 return -ENOMEM;
458
459 /* TODO: this is very frequent pattern, make it a separate
460 * routine */
461 err = jffs2_flash_read(c, ofs, len, &retlen, buffer);
462 if (err) {
463 JFFS2_ERROR("can not read %d bytes from 0x%08x, error code: %d.\n", len, ofs, err);
464 goto free_out;
465 }
466
467 if (retlen != len) {
468 JFFS2_ERROR("short read at %#08x: %zd instead of %d.\n", ofs, retlen, len);
469 err = -EIO;
470 goto free_out;
471 }
472 }
473
474 /* Continue calculating CRC */
475 crc = crc32(tn->partial_crc, buffer, len);
476 if(!pointed)
477 kfree(buffer);
478#ifndef __ECOS
479 else
480 c->mtd->unpoint(c->mtd, buffer, ofs, len);
481#endif
482
483 if (crc != tn->data_crc) {
484 JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
485 ofs, tn->data_crc, crc);
486 return 1;
487 }
488
489adj_acc:
490 jeb = &c->blocks[ref->flash_offset / c->sector_size];
491 len = ref_totlen(c, jeb, ref);
492
493 /*
494 * Mark the node as having been checked and fix the
495 * accounting accordingly.
496 */
497 spin_lock(&c->erase_completion_lock);
498 jeb->used_size += len;
499 jeb->unchecked_size -= len;
500 c->used_size += len;
501 c->unchecked_size -= len;
502 spin_unlock(&c->erase_completion_lock);
503
504 return 0;
505
506free_out:
507 if(!pointed)
508 kfree(buffer);
509#ifndef __ECOS
510 else
511 c->mtd->unpoint(c->mtd, buffer, ofs, len);
512#endif
513 return err;
514}
515
516/*
517 * Helper function for jffs2_add_older_frag_to_fragtree().
518 *
519 * Checks the node if we are in the checking stage.
520 */
521static int check_node(struct jffs2_sb_info *c, struct jffs2_inode_info *f, struct jffs2_tmp_dnode_info *tn)
522{
523 int ret;
524
525 BUG_ON(ref_obsolete(tn->fn->raw));
526
527 /* We only check the data CRC of unchecked nodes */
528 if (ref_flags(tn->fn->raw) != REF_UNCHECKED)
529 return 0;
530
531 dbg_fragtree2("check node %#04x-%#04x, phys offs %#08x.\n",
532 tn->fn->ofs, tn->fn->ofs + tn->fn->size, ref_offset(tn->fn->raw));
533
534 ret = check_node_data(c, tn);
535 if (unlikely(ret < 0)) {
536 JFFS2_ERROR("check_node_data() returned error: %d.\n",
537 ret);
538 } else if (unlikely(ret > 0)) {
539 dbg_fragtree2("CRC error, mark it obsolete.\n");
540 jffs2_mark_node_obsolete(c, tn->fn->raw);
541 }
542
543 return ret;
544}
545
546/*
547 * Helper function for jffs2_add_older_frag_to_fragtree().
548 *
549 * Called when the new fragment that is being inserted
550 * splits a hole fragment.
551 */
552static int split_hole(struct jffs2_sb_info *c, struct rb_root *root,
553 struct jffs2_node_frag *newfrag, struct jffs2_node_frag *hole)
554{
555 dbg_fragtree2("fragment %#04x-%#04x splits the hole %#04x-%#04x\n",
556 newfrag->ofs, newfrag->ofs + newfrag->size, hole->ofs, hole->ofs + hole->size);
557
558 if (hole->ofs == newfrag->ofs) {
559 /*
560 * Well, the new fragment actually starts at the same offset as
561 * the hole.
562 */
563 if (hole->ofs + hole->size > newfrag->ofs + newfrag->size) {
564 /*
565 * We replace the overlapped left part of the hole by
566 * the new node.
567 */
568
569 dbg_fragtree2("insert fragment %#04x-%#04x and cut the left part of the hole\n",
570 newfrag->ofs, newfrag->ofs + newfrag->size);
571 rb_replace_node(&hole->rb, &newfrag->rb, root);
572
573 hole->ofs += newfrag->size;
574 hole->size -= newfrag->size;
575
576 /*
577 * We know that 'hole' should be the right hand
578 * fragment.
579 */
580 jffs2_fragtree_insert(hole, newfrag);
581 rb_insert_color(&hole->rb, root);
582 } else {
583 /*
584 * Ah, the new fragment is of the same size as the hole.
585 * Relace the hole by it.
586 */
587 dbg_fragtree2("insert fragment %#04x-%#04x and overwrite hole\n",
588 newfrag->ofs, newfrag->ofs + newfrag->size);
589 rb_replace_node(&hole->rb, &newfrag->rb, root);
590 jffs2_free_node_frag(hole);
591 }
592 } else {
593 /* The new fragment lefts some hole space at the left */
594
595 struct jffs2_node_frag * newfrag2 = NULL;
596
597 if (hole->ofs + hole->size > newfrag->ofs + newfrag->size) {
598 /* The new frag also lefts some space at the right */
599 newfrag2 = new_fragment(NULL, newfrag->ofs +
600 newfrag->size, hole->ofs + hole->size
601 - newfrag->ofs - newfrag->size);
602 if (unlikely(!newfrag2)) {
603 jffs2_free_node_frag(newfrag);
604 return -ENOMEM;
605 }
606 }
607
608 hole->size = newfrag->ofs - hole->ofs;
609 dbg_fragtree2("left the hole %#04x-%#04x at the left and inserd fragment %#04x-%#04x\n",
610 hole->ofs, hole->ofs + hole->size, newfrag->ofs, newfrag->ofs + newfrag->size);
611
612 jffs2_fragtree_insert(newfrag, hole);
613 rb_insert_color(&newfrag->rb, root);
614
615 if (newfrag2) {
616 dbg_fragtree2("left the hole %#04x-%#04x at the right\n",
617 newfrag2->ofs, newfrag2->ofs + newfrag2->size);
618 jffs2_fragtree_insert(newfrag2, newfrag);
619 rb_insert_color(&newfrag2->rb, root);
620 }
621 }
622
623 return 0;
624}
625
626/*
627 * This function is used when we build inode. It expects the nodes are passed
628 * in the decreasing version order. The whole point of this is to improve the
629 * inodes checking on NAND: we check the nodes' data CRC only when they are not
630 * obsoleted. Previously, add_frag_to_fragtree() function was used and
631 * nodes were passed to it in the increasing version ordes and CRCs of all
632 * nodes were checked.
633 *
634 * Note: tn->fn->size shouldn't be zero.
635 *
636 * Returns 0 if the node was inserted
637 * 1 if it wasn't inserted (since it is obsolete)
638 * < 0 an if error occured
639 */
640int jffs2_add_older_frag_to_fragtree(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
641 struct jffs2_tmp_dnode_info *tn)
642{
643 struct jffs2_node_frag *this, *newfrag;
644 uint32_t lastend;
645 struct jffs2_full_dnode *fn = tn->fn;
646 struct rb_root *root = &f->fragtree;
647 uint32_t fn_size = fn->size, fn_ofs = fn->ofs;
648 int err, checked = 0;
649 int ref_flag;
650
651 dbg_fragtree("insert fragment %#04x-%#04x, ver %u\n", fn_ofs, fn_ofs + fn_size, tn->version);
652
653 /* Skip all the nodes which are completed before this one starts */
654 this = jffs2_lookup_node_frag(root, fn_ofs);
655 if (this)
656 dbg_fragtree2("'this' found %#04x-%#04x (%s)\n", this->ofs, this->ofs + this->size, this->node ? "data" : "hole");
657
658 if (this)
659 lastend = this->ofs + this->size;
660 else
661 lastend = 0;
662
663 /* Detect the preliminary type of node */
664 if (fn->size >= PAGE_CACHE_SIZE)
665 ref_flag = REF_PRISTINE;
666 else
667 ref_flag = REF_NORMAL;
668
669 /* See if we ran off the end of the root */
670 if (lastend <= fn_ofs) {
671 /* We did */
672
673 /*
674 * We are going to insert the new node into the
675 * fragment tree, so check it.
676 */
677 err = check_node(c, f, tn);
678 if (err != 0)
679 return err;
680
681 fn->frags = 1;
682
683 newfrag = new_fragment(fn, fn_ofs, fn_size);
684 if (unlikely(!newfrag))
685 return -ENOMEM;
686
687 err = no_overlapping_node(c, root, newfrag, this, lastend);
688 if (unlikely(err != 0)) {
689 jffs2_free_node_frag(newfrag);
690 return err;
691 }
692
693 goto out_ok;
694 }
695
696 fn->frags = 0;
697
698 while (1) {
699 /*
700 * Here we have:
701 * fn_ofs < this->ofs + this->size && fn_ofs >= this->ofs.
702 *
703 * Remember, 'this' has higher version, any non-hole node
704 * which is already in the fragtree is newer then the newly
705 * inserted.
706 */
707 if (!this->node) {
708 /*
709 * 'this' is the hole fragment, so at least the
710 * beginning of the new fragment is valid.
711 */
712
713 /*
714 * We are going to insert the new node into the
715 * fragment tree, so check it.
716 */
717 if (!checked) {
718 err = check_node(c, f, tn);
719 if (unlikely(err != 0))
720 return err;
721 checked = 1;
722 }
723
724 if (this->ofs + this->size >= fn_ofs + fn_size) {
725 /* We split the hole on two parts */
726
727 fn->frags += 1;
728 newfrag = new_fragment(fn, fn_ofs, fn_size);
729 if (unlikely(!newfrag))
730 return -ENOMEM;
731
732 err = split_hole(c, root, newfrag, this);
733 if (unlikely(err))
734 return err;
735 goto out_ok;
736 }
737
738 /*
739 * The beginning of the new fragment is valid since it
740 * overlaps the hole node.
741 */
742
743 ref_flag = REF_NORMAL;
744
745 fn->frags += 1;
746 newfrag = new_fragment(fn, fn_ofs,
747 this->ofs + this->size - fn_ofs);
748 if (unlikely(!newfrag))
749 return -ENOMEM;
750
751 if (fn_ofs == this->ofs) {
752 /*
753 * The new node starts at the same offset as
754 * the hole and supersieds the hole.
755 */
756 dbg_fragtree2("add the new fragment instead of hole %#04x-%#04x, refcnt %d\n",
757 fn_ofs, fn_ofs + this->ofs + this->size - fn_ofs, fn->frags);
758
759 rb_replace_node(&this->rb, &newfrag->rb, root);
760 jffs2_free_node_frag(this);
761 } else {
762 /*
763 * The hole becomes shorter as its right part
764 * is supersieded by the new fragment.
765 */
766 dbg_fragtree2("reduce size of hole %#04x-%#04x to %#04x-%#04x\n",
767 this->ofs, this->ofs + this->size, this->ofs, this->ofs + this->size - newfrag->size);
768
769 dbg_fragtree2("add new fragment %#04x-%#04x, refcnt %d\n", fn_ofs,
770 fn_ofs + this->ofs + this->size - fn_ofs, fn->frags);
771
772 this->size -= newfrag->size;
773 jffs2_fragtree_insert(newfrag, this);
774 rb_insert_color(&newfrag->rb, root);
775 }
776
777 fn_ofs += newfrag->size;
778 fn_size -= newfrag->size;
779 this = rb_entry(rb_next(&newfrag->rb),
780 struct jffs2_node_frag, rb);
781
782 dbg_fragtree2("switch to the next 'this' fragment: %#04x-%#04x %s\n",
783 this->ofs, this->ofs + this->size, this->node ? "(data)" : "(hole)");
784 }
785
786 /*
787 * 'This' node is not the hole so it obsoletes the new fragment
788 * either fully or partially.
789 */
790 if (this->ofs + this->size >= fn_ofs + fn_size) {
791 /* The new node is obsolete, drop it */
792 if (fn->frags == 0) {
793 dbg_fragtree2("%#04x-%#04x is obsolete, mark it obsolete\n", fn_ofs, fn_ofs + fn_size);
794 ref_flag = REF_OBSOLETE;
795 }
796 goto out_ok;
797 } else {
798 struct jffs2_node_frag *new_this;
799
800 /* 'This' node obsoletes the beginning of the new node */
801 dbg_fragtree2("the beginning %#04x-%#04x is obsolete\n", fn_ofs, this->ofs + this->size);
802
803 ref_flag = REF_NORMAL;
804
805 fn_size -= this->ofs + this->size - fn_ofs;
806 fn_ofs = this->ofs + this->size;
807 dbg_fragtree2("now considering %#04x-%#04x\n", fn_ofs, fn_ofs + fn_size);
808
809 new_this = rb_entry(rb_next(&this->rb), struct jffs2_node_frag, rb);
810 if (!new_this) {
811 /*
812 * There is no next fragment. Add the rest of
813 * the new node as the right-hand child.
814 */
815 if (!checked) {
816 err = check_node(c, f, tn);
817 if (unlikely(err != 0))
818 return err;
819 checked = 1;
820 }
821
822 fn->frags += 1;
823 newfrag = new_fragment(fn, fn_ofs, fn_size);
824 if (unlikely(!newfrag))
825 return -ENOMEM;
826
827 dbg_fragtree2("there are no more fragments, insert %#04x-%#04x\n",
828 newfrag->ofs, newfrag->ofs + newfrag->size);
829 rb_link_node(&newfrag->rb, &this->rb, &this->rb.rb_right);
830 rb_insert_color(&newfrag->rb, root);
831 goto out_ok;
832 } else {
833 this = new_this;
834 dbg_fragtree2("switch to the next 'this' fragment: %#04x-%#04x %s\n",
835 this->ofs, this->ofs + this->size, this->node ? "(data)" : "(hole)");
836 }
837 }
838 }
839
840out_ok:
841 BUG_ON(fn->size < PAGE_CACHE_SIZE && ref_flag == REF_PRISTINE);
842
843 if (ref_flag == REF_OBSOLETE) {
844 dbg_fragtree2("the node is obsolete now\n");
845 /* jffs2_mark_node_obsolete() will adjust space accounting */
846 jffs2_mark_node_obsolete(c, fn->raw);
847 return 1;
848 }
849
850 dbg_fragtree2("the node is \"%s\" now\n", ref_flag == REF_NORMAL ? "REF_NORMAL" : "REF_PRISTINE");
851
852 /* Space accounting was adjusted at check_node_data() */
853 spin_lock(&c->erase_completion_lock);
854 fn->raw->flash_offset = ref_offset(fn->raw) | ref_flag;
855 spin_unlock(&c->erase_completion_lock);
856
857 return 0;
858}
859
860void jffs2_set_inocache_state(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, int state) 400void jffs2_set_inocache_state(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic, int state)
861{ 401{
862 spin_lock(&c->inocache_lock); 402 spin_lock(&c->inocache_lock);