aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ubifs/recovery.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ubifs/recovery.c')
-rw-r--r--fs/ubifs/recovery.c354
1 files changed, 178 insertions, 176 deletions
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 3dbad6fbd1eb..731d9e2e7b50 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -564,13 +564,16 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
564} 564}
565 565
566/** 566/**
567 * drop_incomplete_group - drop nodes from an incomplete group. 567 * drop_last_node - drop the last node or group of nodes.
568 * @sleb: scanned LEB information 568 * @sleb: scanned LEB information
569 * @offs: offset of dropped nodes is returned here 569 * @offs: offset of dropped nodes is returned here
570 * @grouped: non-zero if whole group of nodes have to be dropped
570 * 571 *
571 * This function returns %1 if nodes are dropped and %0 otherwise. 572 * This is a helper function for 'ubifs_recover_leb()' which drops the last
573 * node of the scanned LEB or the last group of nodes if @grouped is not zero.
574 * This function returns %1 if a node was dropped and %0 otherwise.
572 */ 575 */
573static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) 576static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped)
574{ 577{
575 int dropped = 0; 578 int dropped = 0;
576 579
@@ -589,6 +592,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
589 kfree(snod); 592 kfree(snod);
590 sleb->nodes_cnt -= 1; 593 sleb->nodes_cnt -= 1;
591 dropped = 1; 594 dropped = 1;
595 if (!grouped)
596 break;
592 } 597 }
593 return dropped; 598 return dropped;
594} 599}
@@ -609,8 +614,7 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
609struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 614struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
610 int offs, void *sbuf, int grouped) 615 int offs, void *sbuf, int grouped)
611{ 616{
612 int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; 617 int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
613 int empty_chkd = 0, start = offs;
614 struct ubifs_scan_leb *sleb; 618 struct ubifs_scan_leb *sleb;
615 void *buf = sbuf + offs; 619 void *buf = sbuf + offs;
616 620
@@ -620,12 +624,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
620 if (IS_ERR(sleb)) 624 if (IS_ERR(sleb))
621 return sleb; 625 return sleb;
622 626
623 if (sleb->ecc) 627 ubifs_assert(len >= 8);
624 need_clean = 1;
625
626 while (len >= 8) { 628 while (len >= 8) {
627 int ret;
628
629 dbg_scan("look at LEB %d:%d (%d bytes left)", 629 dbg_scan("look at LEB %d:%d (%d bytes left)",
630 lnum, offs, len); 630 lnum, offs, len);
631 631
@@ -635,8 +635,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
635 * Scan quietly until there is an error from which we cannot 635 * Scan quietly until there is an error from which we cannot
636 * recover 636 * recover
637 */ 637 */
638 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); 638 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
639
640 if (ret == SCANNED_A_NODE) { 639 if (ret == SCANNED_A_NODE) {
641 /* A valid node, and not a padding node */ 640 /* A valid node, and not a padding node */
642 struct ubifs_ch *ch = buf; 641 struct ubifs_ch *ch = buf;
@@ -649,70 +648,32 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
649 offs += node_len; 648 offs += node_len;
650 buf += node_len; 649 buf += node_len;
651 len -= node_len; 650 len -= node_len;
652 continue; 651 } else if (ret > 0) {
653 }
654
655 if (ret > 0) {
656 /* Padding bytes or a valid padding node */ 652 /* Padding bytes or a valid padding node */
657 offs += ret; 653 offs += ret;
658 buf += ret; 654 buf += ret;
659 len -= ret; 655 len -= ret;
660 continue; 656 } else if (ret == SCANNED_EMPTY_SPACE ||
661 } 657 ret == SCANNED_GARBAGE ||
662 658 ret == SCANNED_A_BAD_PAD_NODE ||
663 if (ret == SCANNED_EMPTY_SPACE) { 659 ret == SCANNED_A_CORRUPT_NODE) {
664 if (!is_empty(buf, len)) { 660 dbg_rcvry("found corruption - %d", ret);
665 if (!is_last_write(c, buf, offs))
666 break;
667 clean_buf(c, &buf, lnum, &offs, &len);
668 need_clean = 1;
669 }
670 empty_chkd = 1;
671 break; 661 break;
672 } 662 } else {
673 663 dbg_err("unexpected return value %d", ret);
674 if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
675 if (is_last_write(c, buf, offs)) {
676 clean_buf(c, &buf, lnum, &offs, &len);
677 need_clean = 1;
678 empty_chkd = 1;
679 break;
680 }
681
682 if (ret == SCANNED_A_CORRUPT_NODE)
683 if (no_more_nodes(c, buf, len, lnum, offs)) {
684 clean_buf(c, &buf, lnum, &offs, &len);
685 need_clean = 1;
686 empty_chkd = 1;
687 break;
688 }
689
690 if (quiet) {
691 /* Redo the last scan but noisily */
692 quiet = 0;
693 continue;
694 }
695
696 switch (ret) {
697 case SCANNED_GARBAGE:
698 dbg_err("garbage");
699 goto corrupted;
700 case SCANNED_A_CORRUPT_NODE:
701 case SCANNED_A_BAD_PAD_NODE:
702 dbg_err("bad node");
703 goto corrupted;
704 default:
705 dbg_err("unknown");
706 err = -EINVAL; 664 err = -EINVAL;
707 goto error; 665 goto error;
708 } 666 }
709 } 667 }
710 668
711 if (!empty_chkd && !is_empty(buf, len)) { 669 if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
712 if (is_last_write(c, buf, offs)) { 670 if (!is_last_write(c, buf, offs))
713 clean_buf(c, &buf, lnum, &offs, &len); 671 goto corrupted_rescan;
714 need_clean = 1; 672 } else if (ret == SCANNED_A_CORRUPT_NODE) {
715 } else { 673 if (!no_more_nodes(c, buf, len, lnum, offs))
674 goto corrupted_rescan;
675 } else if (!is_empty(buf, len)) {
676 if (!is_last_write(c, buf, offs)) {
716 int corruption = first_non_ff(buf, len); 677 int corruption = first_non_ff(buf, len);
717 678
718 /* 679 /*
@@ -728,29 +689,82 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
728 } 689 }
729 } 690 }
730 691
731 /* Drop nodes from incomplete group */ 692 min_io_unit = round_down(offs, c->min_io_size);
732 if (grouped && drop_incomplete_group(sleb, &offs)) { 693 if (grouped)
733 buf = sbuf + offs; 694 /*
734 len = c->leb_size - offs; 695 * If nodes are grouped, always drop the incomplete group at
735 clean_buf(c, &buf, lnum, &offs, &len); 696 * the end.
736 need_clean = 1; 697 */
737 } 698 drop_last_node(sleb, &offs, 1);
738 699
739 if (offs % c->min_io_size) { 700 /*
740 clean_buf(c, &buf, lnum, &offs, &len); 701 * While we are in the middle of the same min. I/O unit keep dropping
741 need_clean = 1; 702 * nodes. So basically, what we want is to make sure that the last min.
742 } 703 * I/O unit where we saw the corruption is dropped completely with all
704 * the uncorrupted node which may possibly sit there.
705 *
706 * In other words, let's name the min. I/O unit where the corruption
707 * starts B, and the previous min. I/O unit A. The below code tries to
708 * deal with a situation when half of B contains valid nodes or the end
709 * of a valid node, and the second half of B contains corrupted data or
710 * garbage. This means that UBIFS had been writing to B just before the
711 * power cut happened. I do not know how realistic is this scenario
712 * that half of the min. I/O unit had been written successfully and the
713 * other half not, but this is possible in our 'failure mode emulation'
714 * infrastructure at least.
715 *
716 * So what is the problem, why we need to drop those nodes? Whey can't
717 * we just clean-up the second half of B by putting a padding node
718 * there? We can, and this works fine with one exception which was
719 * reproduced with power cut emulation testing and happens extremely
720 * rarely. The description follows, but it is worth noting that that is
721 * only about the GC head, so we could do this trick only if the bud
722 * belongs to the GC head, but it does not seem to be worth an
723 * additional "if" statement.
724 *
725 * So, imagine the file-system is full, we run GC which is moving valid
726 * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head
727 * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X
728 * and will try to continue. Imagine that LEB X is currently the
729 * dirtiest LEB, and the amount of used space in LEB Y is exactly the
730 * same as amount of free space in LEB X.
731 *
732 * And a power cut happens when nodes are moved from LEB X to LEB Y. We
733 * are here trying to recover LEB Y which is the GC head LEB. We find
734 * the min. I/O unit B as described above. Then we clean-up LEB Y by
735 * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function
736 * fails, because it cannot find a dirty LEB which could be GC'd into
737 * LEB Y! Even LEB X does not match because the amount of valid nodes
738 * there does not fit the free space in LEB Y any more! And this is
739 * because of the padding node which we added to LEB Y. The
740 * user-visible effect of this which I once observed and analysed is
741 * that we cannot mount the file-system with -ENOSPC error.
742 *
743 * So obviously, to make sure that situation does not happen we should
744 * free min. I/O unit B in LEB Y completely and the last used min. I/O
745 * unit in LEB Y should be A. This is basically what the below code
746 * tries to do.
747 */
748 while (min_io_unit == round_down(offs, c->min_io_size) &&
749 min_io_unit != offs &&
750 drop_last_node(sleb, &offs, grouped));
751
752 buf = sbuf + offs;
753 len = c->leb_size - offs;
743 754
755 clean_buf(c, &buf, lnum, &offs, &len);
744 ubifs_end_scan(c, sleb, lnum, offs); 756 ubifs_end_scan(c, sleb, lnum, offs);
745 757
746 if (need_clean) { 758 err = fix_unclean_leb(c, sleb, start);
747 err = fix_unclean_leb(c, sleb, start); 759 if (err)
748 if (err) 760 goto error;
749 goto error;
750 }
751 761
752 return sleb; 762 return sleb;
753 763
764corrupted_rescan:
765 /* Re-scan the corrupted data with verbose messages */
766 dbg_err("corruptio %d", ret);
767 ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
754corrupted: 768corrupted:
755 ubifs_scanned_corruption(c, lnum, offs, buf); 769 ubifs_scanned_corruption(c, lnum, offs, buf);
756 err = -EUCLEAN; 770 err = -EUCLEAN;
@@ -1070,6 +1084,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
1070} 1084}
1071 1085
1072/** 1086/**
1087 * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
1088 * @c: UBIFS file-system description object
1089 *
1090 * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
1091 * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
1092 * zero in case of success and a negative error code in case of failure.
1093 */
1094static int grab_empty_leb(struct ubifs_info *c)
1095{
1096 int lnum, err;
1097
1098 /*
1099 * Note, it is very important to first search for an empty LEB and then
1100 * run the commit, not vice-versa. The reason is that there might be
1101 * only one empty LEB at the moment, the one which has been the
1102 * @c->gc_lnum just before the power cut happened. During the regular
1103 * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
1104 * one but GC can grab it. But at this moment this single empty LEB is
1105 * not marked as taken, so if we run commit - what happens? Right, the
1106 * commit will grab it and write the index there. Remember that the
1107 * index always expands as long as there is free space, and it only
1108 * starts consolidating when we run out of space.
1109 *
1110 * IOW, if we run commit now, we might not be able to find a free LEB
1111 * after this.
1112 */
1113 lnum = ubifs_find_free_leb_for_idx(c);
1114 if (lnum < 0) {
1115 dbg_err("could not find an empty LEB");
1116 dbg_dump_lprops(c);
1117 dbg_dump_budg(c, &c->bi);
1118 return lnum;
1119 }
1120
1121 /* Reset the index flag */
1122 err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1123 LPROPS_INDEX, 0);
1124 if (err)
1125 return err;
1126
1127 c->gc_lnum = lnum;
1128 dbg_rcvry("found empty LEB %d, run commit", lnum);
1129
1130 return ubifs_run_commit(c);
1131}
1132
1133/**
1073 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. 1134 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
1074 * @c: UBIFS file-system description object 1135 * @c: UBIFS file-system description object
1075 * 1136 *
@@ -1091,71 +1152,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1091{ 1152{
1092 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; 1153 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1093 struct ubifs_lprops lp; 1154 struct ubifs_lprops lp;
1094 int lnum, err; 1155 int err;
1156
1157 dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
1095 1158
1096 c->gc_lnum = -1; 1159 c->gc_lnum = -1;
1097 if (wbuf->lnum == -1) { 1160 if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
1098 dbg_rcvry("no GC head LEB"); 1161 return grab_empty_leb(c);
1099 goto find_free; 1162
1100 }
1101 /*
1102 * See whether the used space in the dirtiest LEB fits in the GC head
1103 * LEB.
1104 */
1105 if (wbuf->offs == c->leb_size) {
1106 dbg_rcvry("no room in GC head LEB");
1107 goto find_free;
1108 }
1109 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); 1163 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1110 if (err) { 1164 if (err) {
1111 /* 1165 if (err != -ENOSPC)
1112 * There are no dirty or empty LEBs subject to here being
1113 * enough for the index. Try to use
1114 * 'ubifs_find_free_leb_for_idx()', which will return any empty
1115 * LEBs (ignoring index requirements). If the index then
1116 * doesn't have enough LEBs the recovery commit will fail -
1117 * which is the same result anyway i.e. recovery fails. So
1118 * there is no problem ignoring index requirements and just
1119 * grabbing a free LEB since we have already established there
1120 * is not a dirty LEB we could have used instead.
1121 */
1122 if (err == -ENOSPC) {
1123 dbg_rcvry("could not find a dirty LEB");
1124 goto find_free;
1125 }
1126 return err;
1127 }
1128 ubifs_assert(!(lp.flags & LPROPS_INDEX));
1129 lnum = lp.lnum;
1130 if (lp.free + lp.dirty == c->leb_size) {
1131 /* An empty LEB was returned */
1132 if (lp.free != c->leb_size) {
1133 err = ubifs_change_one_lp(c, lnum, c->leb_size,
1134 0, 0, 0, 0);
1135 if (err)
1136 return err;
1137 }
1138 err = ubifs_leb_unmap(c, lnum);
1139 if (err)
1140 return err; 1166 return err;
1141 c->gc_lnum = lnum; 1167
1142 dbg_rcvry("allocated LEB %d for GC", lnum); 1168 dbg_rcvry("could not find a dirty LEB");
1143 /* Run the commit */ 1169 return grab_empty_leb(c);
1144 dbg_rcvry("committing");
1145 return ubifs_run_commit(c);
1146 }
1147 /*
1148 * There was no empty LEB so the used space in the dirtiest LEB must fit
1149 * in the GC head LEB.
1150 */
1151 if (lp.free + lp.dirty < wbuf->offs) {
1152 dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
1153 lnum, wbuf->lnum, wbuf->offs);
1154 err = ubifs_return_leb(c, lnum);
1155 if (err)
1156 return err;
1157 goto find_free;
1158 } 1170 }
1171
1172 ubifs_assert(!(lp.flags & LPROPS_INDEX));
1173 ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
1174
1159 /* 1175 /*
1160 * We run the commit before garbage collection otherwise subsequent 1176 * We run the commit before garbage collection otherwise subsequent
1161 * mounts will see the GC and orphan deletion in a different order. 1177 * mounts will see the GC and orphan deletion in a different order.
@@ -1164,11 +1180,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1164 err = ubifs_run_commit(c); 1180 err = ubifs_run_commit(c);
1165 if (err) 1181 if (err)
1166 return err; 1182 return err;
1167 /* 1183
1168 * The data in the dirtiest LEB fits in the GC head LEB, so do the GC 1184 dbg_rcvry("GC'ing LEB %d", lp.lnum);
1169 * - use locking to keep 'ubifs_assert()' happy.
1170 */
1171 dbg_rcvry("GC'ing LEB %d", lnum);
1172 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 1185 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
1173 err = ubifs_garbage_collect_leb(c, &lp); 1186 err = ubifs_garbage_collect_leb(c, &lp);
1174 if (err >= 0) { 1187 if (err >= 0) {
@@ -1184,37 +1197,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1184 err = -EINVAL; 1197 err = -EINVAL;
1185 return err; 1198 return err;
1186 } 1199 }
1187 if (err != LEB_RETAINED) { 1200
1188 dbg_err("GC returned %d", err); 1201 ubifs_assert(err == LEB_RETAINED);
1202 if (err != LEB_RETAINED)
1189 return -EINVAL; 1203 return -EINVAL;
1190 } 1204
1191 err = ubifs_leb_unmap(c, c->gc_lnum); 1205 err = ubifs_leb_unmap(c, c->gc_lnum);
1192 if (err) 1206 if (err)
1193 return err; 1207 return err;
1194 dbg_rcvry("allocated LEB %d for GC", lnum);
1195 return 0;
1196 1208
1197find_free: 1209 dbg_rcvry("allocated LEB %d for GC", lp.lnum);
1198 /* 1210 return 0;
1199 * There is no GC head LEB or the free space in the GC head LEB is too
1200 * small, or there are not dirty LEBs. Allocate gc_lnum by calling
1201 * 'ubifs_find_free_leb_for_idx()' so GC is not run.
1202 */
1203 lnum = ubifs_find_free_leb_for_idx(c);
1204 if (lnum < 0) {
1205 dbg_err("could not find an empty LEB");
1206 return lnum;
1207 }
1208 /* And reset the index flag */
1209 err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1210 LPROPS_INDEX, 0);
1211 if (err)
1212 return err;
1213 c->gc_lnum = lnum;
1214 dbg_rcvry("allocated LEB %d for GC", lnum);
1215 /* Run the commit */
1216 dbg_rcvry("committing");
1217 return ubifs_run_commit(c);
1218} 1211}
1219 1212
1220/** 1213/**
@@ -1456,7 +1449,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1456 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); 1449 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
1457 if (err) 1450 if (err)
1458 goto out; 1451 goto out;
1459 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", 1452 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
1460 (unsigned long)e->inum, lnum, offs, i_size, e->d_size); 1453 (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
1461 return 0; 1454 return 0;
1462 1455
@@ -1505,20 +1498,27 @@ int ubifs_recover_size(struct ubifs_info *c)
1505 e->i_size = le64_to_cpu(ino->size); 1498 e->i_size = le64_to_cpu(ino->size);
1506 } 1499 }
1507 } 1500 }
1501
1508 if (e->exists && e->i_size < e->d_size) { 1502 if (e->exists && e->i_size < e->d_size) {
1509 if (!e->inode && c->ro_mount) { 1503 if (c->ro_mount) {
1510 /* Fix the inode size and pin it in memory */ 1504 /* Fix the inode size and pin it in memory */
1511 struct inode *inode; 1505 struct inode *inode;
1506 struct ubifs_inode *ui;
1507
1508 ubifs_assert(!e->inode);
1512 1509
1513 inode = ubifs_iget(c->vfs_sb, e->inum); 1510 inode = ubifs_iget(c->vfs_sb, e->inum);
1514 if (IS_ERR(inode)) 1511 if (IS_ERR(inode))
1515 return PTR_ERR(inode); 1512 return PTR_ERR(inode);
1513
1514 ui = ubifs_inode(inode);
1516 if (inode->i_size < e->d_size) { 1515 if (inode->i_size < e->d_size) {
1517 dbg_rcvry("ino %lu size %lld -> %lld", 1516 dbg_rcvry("ino %lu size %lld -> %lld",
1518 (unsigned long)e->inum, 1517 (unsigned long)e->inum,
1519 e->d_size, inode->i_size); 1518 inode->i_size, e->d_size);
1520 inode->i_size = e->d_size; 1519 inode->i_size = e->d_size;
1521 ubifs_inode(inode)->ui_size = e->d_size; 1520 ui->ui_size = e->d_size;
1521 ui->synced_i_size = e->d_size;
1522 e->inode = inode; 1522 e->inode = inode;
1523 this = rb_next(this); 1523 this = rb_next(this);
1524 continue; 1524 continue;
@@ -1533,9 +1533,11 @@ int ubifs_recover_size(struct ubifs_info *c)
1533 iput(e->inode); 1533 iput(e->inode);
1534 } 1534 }
1535 } 1535 }
1536
1536 this = rb_next(this); 1537 this = rb_next(this);
1537 rb_erase(&e->rb, &c->size_tree); 1538 rb_erase(&e->rb, &c->size_tree);
1538 kfree(e); 1539 kfree(e);
1539 } 1540 }
1541
1540 return 0; 1542 return 0;
1541} 1543}