diff options
-rw-r--r-- | fs/ubifs/recovery.c | 82 |
1 files changed, 72 insertions, 10 deletions
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 74281f135b04..731d9e2e7b50 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c | |||
@@ -564,13 +564,16 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, | |||
564 | } | 564 | } |
565 | 565 | ||
566 | /** | 566 | /** |
567 | * drop_incomplete_group - drop nodes from an incomplete group. | 567 | * drop_last_node - drop the last node or group of nodes. |
568 | * @sleb: scanned LEB information | 568 | * @sleb: scanned LEB information |
569 | * @offs: offset of dropped nodes is returned here | 569 | * @offs: offset of dropped nodes is returned here |
570 | * @grouped: non-zero if whole group of nodes have to be dropped | ||
570 | * | 571 | * |
571 | * This function returns %1 if nodes are dropped and %0 otherwise. | 572 | * This is a helper function for 'ubifs_recover_leb()' which drops the last |
573 | * node of the scanned LEB or the last group of nodes if @grouped is not zero. | ||
574 | * This function returns %1 if a node was dropped and %0 otherwise. | ||
572 | */ | 575 | */ |
573 | static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) | 576 | static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) |
574 | { | 577 | { |
575 | int dropped = 0; | 578 | int dropped = 0; |
576 | 579 | ||
@@ -589,6 +592,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) | |||
589 | kfree(snod); | 592 | kfree(snod); |
590 | sleb->nodes_cnt -= 1; | 593 | sleb->nodes_cnt -= 1; |
591 | dropped = 1; | 594 | dropped = 1; |
595 | if (!grouped) | ||
596 | break; | ||
592 | } | 597 | } |
593 | return dropped; | 598 | return dropped; |
594 | } | 599 | } |
@@ -609,8 +614,7 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) | |||
609 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | 614 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, |
610 | int offs, void *sbuf, int grouped) | 615 | int offs, void *sbuf, int grouped) |
611 | { | 616 | { |
612 | int ret = 0, err, len = c->leb_size - offs; | 617 | int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; |
613 | int start = offs; | ||
614 | struct ubifs_scan_leb *sleb; | 618 | struct ubifs_scan_leb *sleb; |
615 | void *buf = sbuf + offs; | 619 | void *buf = sbuf + offs; |
616 | 620 | ||
@@ -620,6 +624,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
620 | if (IS_ERR(sleb)) | 624 | if (IS_ERR(sleb)) |
621 | return sleb; | 625 | return sleb; |
622 | 626 | ||
627 | ubifs_assert(len >= 8); | ||
623 | while (len >= 8) { | 628 | while (len >= 8) { |
624 | dbg_scan("look at LEB %d:%d (%d bytes left)", | 629 | dbg_scan("look at LEB %d:%d (%d bytes left)", |
625 | lnum, offs, len); | 630 | lnum, offs, len); |
@@ -684,11 +689,68 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
684 | } | 689 | } |
685 | } | 690 | } |
686 | 691 | ||
687 | /* Drop nodes from incomplete group */ | 692 | min_io_unit = round_down(offs, c->min_io_size); |
688 | if (grouped && drop_incomplete_group(sleb, &offs)) { | 693 | if (grouped) |
689 | buf = sbuf + offs; | 694 | /* |
690 | len = c->leb_size - offs; | 695 | * If nodes are grouped, always drop the incomplete group at |
691 | } | 696 | * the end. |
697 | */ | ||
698 | drop_last_node(sleb, &offs, 1); | ||
699 | |||
700 | /* | ||
701 | * While we are in the middle of the same min. I/O unit keep dropping | ||
702 | * nodes. So basically, what we want is to make sure that the last min. | ||
703 | * I/O unit where we saw the corruption is dropped completely with all | ||
704 | * the uncorrupted node which may possibly sit there. | ||
705 | * | ||
706 | * In other words, let's name the min. I/O unit where the corruption | ||
707 | * starts B, and the previous min. I/O unit A. The below code tries to | ||
708 | * deal with a situation when half of B contains valid nodes or the end | ||
709 | * of a valid node, and the second half of B contains corrupted data or | ||
710 | * garbage. This means that UBIFS had been writing to B just before the | ||
711 | * power cut happened. I do not know how realistic is this scenario | ||
712 | * that half of the min. I/O unit had been written successfully and the | ||
713 | * other half not, but this is possible in our 'failure mode emulation' | ||
714 | * infrastructure at least. | ||
715 | * | ||
716 | * So what is the problem, why we need to drop those nodes? Whey can't | ||
717 | * we just clean-up the second half of B by putting a padding node | ||
718 | * there? We can, and this works fine with one exception which was | ||
719 | * reproduced with power cut emulation testing and happens extremely | ||
720 | * rarely. The description follows, but it is worth noting that that is | ||
721 | * only about the GC head, so we could do this trick only if the bud | ||
722 | * belongs to the GC head, but it does not seem to be worth an | ||
723 | * additional "if" statement. | ||
724 | * | ||
725 | * So, imagine the file-system is full, we run GC which is moving valid | ||
726 | * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head | ||
727 | * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X | ||
728 | * and will try to continue. Imagine that LEB X is currently the | ||
729 | * dirtiest LEB, and the amount of used space in LEB Y is exactly the | ||
730 | * same as amount of free space in LEB X. | ||
731 | * | ||
732 | * And a power cut happens when nodes are moved from LEB X to LEB Y. We | ||
733 | * are here trying to recover LEB Y which is the GC head LEB. We find | ||
734 | * the min. I/O unit B as described above. Then we clean-up LEB Y by | ||
735 | * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function | ||
736 | * fails, because it cannot find a dirty LEB which could be GC'd into | ||
737 | * LEB Y! Even LEB X does not match because the amount of valid nodes | ||
738 | * there does not fit the free space in LEB Y any more! And this is | ||
739 | * because of the padding node which we added to LEB Y. The | ||
740 | * user-visible effect of this which I once observed and analysed is | ||
741 | * that we cannot mount the file-system with -ENOSPC error. | ||
742 | * | ||
743 | * So obviously, to make sure that situation does not happen we should | ||
744 | * free min. I/O unit B in LEB Y completely and the last used min. I/O | ||
745 | * unit in LEB Y should be A. This is basically what the below code | ||
746 | * tries to do. | ||
747 | */ | ||
748 | while (min_io_unit == round_down(offs, c->min_io_size) && | ||
749 | min_io_unit != offs && | ||
750 | drop_last_node(sleb, &offs, grouped)); | ||
751 | |||
752 | buf = sbuf + offs; | ||
753 | len = c->leb_size - offs; | ||
692 | 754 | ||
693 | clean_buf(c, &buf, lnum, &offs, &len); | 755 | clean_buf(c, &buf, lnum, &offs, &len); |
694 | ubifs_end_scan(c, sleb, lnum, offs); | 756 | ubifs_end_scan(c, sleb, lnum, offs); |