aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/mtd/ubi/scan.c
diff options
context:
space:
mode:
authorArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2010-09-03 10:11:37 -0400
committerArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2010-10-19 10:19:57 -0400
commit0525dac9fd31e5a12fb934238abd09e2752a5967 (patch)
tree9f5f04c0d073d9c4e628afdaa5ad00a1504061f0 /drivers/mtd/ubi/scan.c
parent3fb34124da9d5e37576d9f87d7a5005ba1d82dd7 (diff)
UBI: do not put eraseblocks to the corrupted list unnecessarily
Currently UBI maintains 2 lists of PEBs during scanning: 1. 'erase' list - PEBs which have no corruptions but should be erased 2. 'corr' list - PEBs which have some corruptions and should be erased But we do not really need 2 lists for PEBs which should be erased after scanning is done - this is redundant. So this patch makes sure all PEBs which are corrupted are moved to the head of the 'erase' list. We add them to the head to make sure they are erased first and we get rid of corruption ASAP. However, we do not remove the 'corr' list and realted functions, because the plan is to use this list for other purposes. Namely, we plan to put eraseblocks with corruption which does not look like it was caused by unclean power cut. Then we'll preserve thes PEBs in order to avoid killing potentially valuable user data. This patch also amends PEBs accounting, because it was closely tight to the 'erase'/'corr' lists separation. Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Diffstat (limited to 'drivers/mtd/ubi/scan.c')
-rw-r--r--drivers/mtd/ubi/scan.c145
1 files changed, 83 insertions, 62 deletions
diff --git a/drivers/mtd/ubi/scan.c b/drivers/mtd/ubi/scan.c
index fba3dc6a97e9..19dc5e04fd61 100644
--- a/drivers/mtd/ubi/scan.c
+++ b/drivers/mtd/ubi/scan.c
@@ -29,7 +29,7 @@
29 * objects which are kept in volume RB-tree with root at the @volumes field. 29 * objects which are kept in volume RB-tree with root at the @volumes field.
30 * The RB-tree is indexed by the volume ID. 30 * The RB-tree is indexed by the volume ID.
31 * 31 *
32 * Found logical eraseblocks are represented by &struct ubi_scan_leb objects. 32 * Scanned logical eraseblocks are represented by &struct ubi_scan_leb objects.
33 * These objects are kept in per-volume RB-trees with the root at the 33 * These objects are kept in per-volume RB-trees with the root at the
34 * corresponding &struct ubi_scan_volume object. To put it differently, we keep 34 * corresponding &struct ubi_scan_volume object. To put it differently, we keep
35 * an RB-tree of per-volume objects and each of these objects is the root of 35 * an RB-tree of per-volume objects and each of these objects is the root of
@@ -38,6 +38,21 @@
38 * Corrupted physical eraseblocks are put to the @corr list, free physical 38 * Corrupted physical eraseblocks are put to the @corr list, free physical
39 * eraseblocks are put to the @free list and the physical eraseblock to be 39 * eraseblocks are put to the @free list and the physical eraseblock to be
40 * erased are put to the @erase list. 40 * erased are put to the @erase list.
41 *
42 * UBI tries to distinguish between 2 types of corruptions.
43 * 1. Corruptions caused by power cuts. These are harmless and expected
44 * corruptions and UBI tries to handle them gracefully, without printing too
45 * many warnings and error messages. The idea is that we do not lose
46 * important data in these case - we may lose only the data which was being
47 * written to the media just before the power cut happened, and the upper
48 * layers are supposed to handle these situations. UBI puts these PEBs to
49 * the head of the @erase list and they are scheduled for erasure.
50 *
51 * 2. Unexpected corruptions which are not caused by power cuts. During
52 * scanning, such PEBs are put to the @corr list and UBI preserves them.
53 * Obviously, this lessens the amount of available PEBs, and if at some
54 * point UBI runs out of free PEBs, it switches to R/O mode. UBI also loudly
55 * informs about such PEBs every time the MTD device is attached.
41 */ 56 */
42 57
43#include <linux/err.h> 58#include <linux/err.h>
@@ -62,23 +77,26 @@ static struct ubi_vid_hdr *vidh;
62 * @si: scanning information 77 * @si: scanning information
63 * @pnum: physical eraseblock number to add 78 * @pnum: physical eraseblock number to add
64 * @ec: erase counter of the physical eraseblock 79 * @ec: erase counter of the physical eraseblock
80 * @to_head: if not zero, add to the head of the list
65 * @list: the list to add to 81 * @list: the list to add to
66 * 82 *
67 * This function adds physical eraseblock @pnum to free, erase, or alien lists. 83 * This function adds physical eraseblock @pnum to free, erase, or alien lists.
68 * Returns zero in case of success and a negative error code in case of 84 * If @to_head is not zero, PEB will be added to the head of the list, which
85 * basically means it will be processed first later. E.g., we add corrupted
86 * PEBs (corrupted due to power cuts) to the head of the erase list to make
87 * sure we erase them first and get rid of corruptions ASAP. This function
88 * returns zero in case of success and a negative error code in case of
69 * failure. 89 * failure.
70 */ 90 */
71static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, 91static int add_to_list(struct ubi_scan_info *si, int pnum, int ec, int to_head,
72 struct list_head *list) 92 struct list_head *list)
73{ 93{
74 struct ubi_scan_leb *seb; 94 struct ubi_scan_leb *seb;
75 95
76 if (list == &si->free) { 96 if (list == &si->free) {
77 dbg_bld("add to free: PEB %d, EC %d", pnum, ec); 97 dbg_bld("add to free: PEB %d, EC %d", pnum, ec);
78 si->free_peb_count += 1;
79 } else if (list == &si->erase) { 98 } else if (list == &si->erase) {
80 dbg_bld("add to erase: PEB %d, EC %d", pnum, ec); 99 dbg_bld("add to erase: PEB %d, EC %d", pnum, ec);
81 si->erase_peb_count += 1;
82 } else if (list == &si->alien) { 100 } else if (list == &si->alien) {
83 dbg_bld("add to alien: PEB %d, EC %d", pnum, ec); 101 dbg_bld("add to alien: PEB %d, EC %d", pnum, ec);
84 si->alien_peb_count += 1; 102 si->alien_peb_count += 1;
@@ -91,7 +109,10 @@ static int add_to_list(struct ubi_scan_info *si, int pnum, int ec,
91 109
92 seb->pnum = pnum; 110 seb->pnum = pnum;
93 seb->ec = ec; 111 seb->ec = ec;
94 list_add_tail(&seb->u.list, list); 112 if (to_head)
113 list_add(&seb->u.list, list);
114 else
115 list_add_tail(&seb->u.list, list);
95 return 0; 116 return 0;
96} 117}
97 118
@@ -282,8 +303,8 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,
282 * created before sequence numbers support has been added. At 303 * created before sequence numbers support has been added. At
283 * that times we used 32-bit LEB versions stored in logical 304 * that times we used 32-bit LEB versions stored in logical
284 * eraseblocks. That was before UBI got into mainline. We do not 305 * eraseblocks. That was before UBI got into mainline. We do not
285 * support these images anymore. Well, those images will work 306 * support these images anymore. Well, those images still work,
286 * still work, but only if no unclean reboots happened. 307 * but only if no unclean reboots happened.
287 */ 308 */
288 ubi_err("unsupported on-flash UBI format\n"); 309 ubi_err("unsupported on-flash UBI format\n");
289 return -EINVAL; 310 return -EINVAL;
@@ -321,7 +342,7 @@ static int compare_lebs(struct ubi_device *ubi, const struct ubi_scan_leb *seb,
321 bitflips = 1; 342 bitflips = 1;
322 else { 343 else {
323 dbg_err("VID of PEB %d header is bad, but it " 344 dbg_err("VID of PEB %d header is bad, but it "
324 "was OK earlier", pnum); 345 "was OK earlier, err %d", pnum, err);
325 if (err > 0) 346 if (err > 0)
326 err = -EIO; 347 err = -EIO;
327 348
@@ -487,11 +508,8 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
487 if (err) 508 if (err)
488 return err; 509 return err;
489 510
490 if (cmp_res & 4) 511 err = add_to_list(si, seb->pnum, seb->ec, cmp_res & 4,
491 err = add_corrupted(si, seb->pnum, seb->ec); 512 &si->erase);
492 else
493 err = add_to_list(si, seb->pnum, seb->ec,
494 &si->erase);
495 if (err) 513 if (err)
496 return err; 514 return err;
497 515
@@ -510,10 +528,8 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
510 * This logical eraseblock is older than the one found 528 * This logical eraseblock is older than the one found
511 * previously. 529 * previously.
512 */ 530 */
513 if (cmp_res & 4) 531 return add_to_list(si, pnum, ec, cmp_res & 4,
514 return add_corrupted(si, pnum, ec); 532 &si->erase);
515 else
516 return add_to_list(si, pnum, ec, &si->erase);
517 } 533 }
518 } 534 }
519 535
@@ -544,7 +560,6 @@ int ubi_scan_add_used(struct ubi_device *ubi, struct ubi_scan_info *si,
544 sv->leb_count += 1; 560 sv->leb_count += 1;
545 rb_link_node(&seb->u.rb, parent, p); 561 rb_link_node(&seb->u.rb, parent, p);
546 rb_insert_color(&seb->u.rb, &sv->root); 562 rb_insert_color(&seb->u.rb, &sv->root);
547 si->used_peb_count += 1;
548 return 0; 563 return 0;
549} 564}
550 565
@@ -776,10 +791,14 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,
776 bitflips = 1; 791 bitflips = 1;
777 break; 792 break;
778 case UBI_IO_FF: 793 case UBI_IO_FF:
794 si->empty_peb_count += 1;
795 return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, 0,
796 &si->erase);
779 case UBI_IO_FF_BITFLIPS: 797 case UBI_IO_FF_BITFLIPS:
780 return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, &si->erase); 798 si->empty_peb_count += 1;
799 return add_to_list(si, pnum, UBI_SCAN_UNKNOWN_EC, 1,
800 &si->erase);
781 case UBI_IO_BAD_HDR_EBADMSG: 801 case UBI_IO_BAD_HDR_EBADMSG:
782 si->read_err_count += 1;
783 case UBI_IO_BAD_HDR: 802 case UBI_IO_BAD_HDR:
784 /* 803 /*
785 * We have to also look at the VID header, possibly it is not 804 * We have to also look at the VID header, possibly it is not
@@ -855,18 +874,25 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,
855 bitflips = 1; 874 bitflips = 1;
856 break; 875 break;
857 case UBI_IO_BAD_HDR_EBADMSG: 876 case UBI_IO_BAD_HDR_EBADMSG:
858 si->read_err_count += 1; 877 if (ec_err == UBI_IO_BAD_HDR_EBADMSG)
878 /*
879 * Both EC and VID headers are corrupted and were read
880 * with data integrity error, probably this is a bad
881 * PEB, bit it is not marked as bad yet. This may also
882 * be a result of power cut during erasure.
883 */
884 si->maybe_bad_peb_count += 1;
859 case UBI_IO_BAD_HDR: 885 case UBI_IO_BAD_HDR:
860 case UBI_IO_FF_BITFLIPS: 886 case UBI_IO_FF_BITFLIPS:
861 err = add_corrupted(si, pnum, ec); 887 err = add_to_list(si, pnum, ec, 1, &si->erase);
862 if (err) 888 if (err)
863 return err; 889 return err;
864 goto adjust_mean_ec; 890 goto adjust_mean_ec;
865 case UBI_IO_FF: 891 case UBI_IO_FF:
866 if (ec_err) 892 if (ec_err)
867 err = add_corrupted(si, pnum, ec); 893 err = add_to_list(si, pnum, ec, 1, &si->erase);
868 else 894 else
869 err = add_to_list(si, pnum, ec, &si->free); 895 err = add_to_list(si, pnum, ec, 0, &si->free);
870 if (err) 896 if (err)
871 return err; 897 return err;
872 goto adjust_mean_ec; 898 goto adjust_mean_ec;
@@ -885,7 +911,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,
885 case UBI_COMPAT_DELETE: 911 case UBI_COMPAT_DELETE:
886 ubi_msg("\"delete\" compatible internal volume %d:%d" 912 ubi_msg("\"delete\" compatible internal volume %d:%d"
887 " found, will remove it", vol_id, lnum); 913 " found, will remove it", vol_id, lnum);
888 err = add_to_list(si, pnum, ec, &si->erase); 914 err = add_to_list(si, pnum, ec, 1, &si->erase);
889 if (err) 915 if (err)
890 return err; 916 return err;
891 return 0; 917 return 0;
@@ -900,7 +926,7 @@ static int process_eb(struct ubi_device *ubi, struct ubi_scan_info *si,
900 case UBI_COMPAT_PRESERVE: 926 case UBI_COMPAT_PRESERVE:
901 ubi_msg("\"preserve\" compatible internal volume %d:%d" 927 ubi_msg("\"preserve\" compatible internal volume %d:%d"
902 " found", vol_id, lnum); 928 " found", vol_id, lnum);
903 err = add_to_list(si, pnum, ec, &si->alien); 929 err = add_to_list(si, pnum, ec, 0, &si->alien);
904 if (err) 930 if (err)
905 return err; 931 return err;
906 return 0; 932 return 0;
@@ -946,19 +972,20 @@ adjust_mean_ec:
946static int check_what_we_have(struct ubi_device *ubi, struct ubi_scan_info *si) 972static int check_what_we_have(struct ubi_device *ubi, struct ubi_scan_info *si)
947{ 973{
948 struct ubi_scan_leb *seb; 974 struct ubi_scan_leb *seb;
949 int max_corr; 975 int max_corr, peb_count;
950 976
951 max_corr = ubi->peb_count - si->bad_peb_count - si->alien_peb_count; 977 peb_count = ubi->peb_count - si->bad_peb_count - si->alien_peb_count;
952 max_corr = max_corr / 20 ?: 8; 978 max_corr = peb_count / 20 ?: 8;
953 979
954 /* 980 /*
955 * Few corrupted PEBs are not a problem and may be just a result of 981 * Few corrupted PEBs is not a problem and may be just a result of
956 * unclean reboots. However, many of them may indicate some problems 982 * unclean reboots. However, many of them may indicate some problems
957 * with the flash HW or driver. 983 * with the flash HW or driver.
958 */ 984 */
959 if (si->corr_peb_count >= 8) { 985 if (si->corr_peb_count) {
960 ubi_warn("%d PEBs are corrupted", si->corr_peb_count); 986 ubi_err("%d PEBs are corrupted and preserved",
961 printk(KERN_WARNING "corrupted PEBs are:"); 987 si->corr_peb_count);
988 printk(KERN_ERR "Corrupted PEBs are:");
962 list_for_each_entry(seb, &si->corr, u.list) 989 list_for_each_entry(seb, &si->corr, u.list)
963 printk(KERN_CONT " %d", seb->pnum); 990 printk(KERN_CONT " %d", seb->pnum);
964 printk(KERN_CONT "\n"); 991 printk(KERN_CONT "\n");
@@ -973,41 +1000,35 @@ static int check_what_we_have(struct ubi_device *ubi, struct ubi_scan_info *si)
973 } 1000 }
974 } 1001 }
975 1002
976 if (si->free_peb_count + si->used_peb_count + 1003 if (si->empty_peb_count + si->maybe_bad_peb_count == peb_count) {
977 si->alien_peb_count == 0) { 1004 /*
978 /* No UBI-formatted eraseblocks were found */ 1005 * All PEBs are empty, or almost all - a couple PEBs look like
979 if (si->corr_peb_count == si->read_err_count && 1006 * they may be bad PEBs which were not marked as bad yet.
980 si->corr_peb_count < 8) { 1007 *
981 /* No or just few corrupted PEBs, and all of them had a 1008 * This piece of code basically tries to distinguish between
982 * read error. We assume that those are bad PEBs, which 1009 * the following situations:
983 * were just not marked as bad so far. 1010 *
984 * 1011 * 1. Flash is empty, but there are few bad PEBs, which are not
985 * This piece of code basically tries to distinguish 1012 * marked as bad so far, and which were read with error. We
986 * between the following 2 situations: 1013 * want to go ahead and format this flash. While formatting,
987 * 1014 * the faulty PEBs will probably be marked as bad.
988 * 1. Flash is empty, but there are few bad PEBs, which 1015 *
989 * are not marked as bad so far, and which were read 1016 * 2. Flash contains non-UBI data and we do not want to format
990 * with error. We want to go ahead and format this 1017 * it and destroy possibly important information.
991 * flash. While formating, the faulty PEBs will 1018 */
992 * probably be marked as bad. 1019 if (si->maybe_bad_peb_count <= 2) {
993 *
994 * 2. Flash probably contains non-UBI data and we do
995 * not want to format it and destroy possibly needed
996 * data (e.g., consider the case when the bootloader
997 * MTD partition was accidentally fed to UBI).
998 */
999 si->is_empty = 1; 1020 si->is_empty = 1;
1000 ubi_msg("empty MTD device detected"); 1021 ubi_msg("empty MTD device detected");
1001 get_random_bytes(&ubi->image_seq, sizeof(ubi->image_seq)); 1022 get_random_bytes(&ubi->image_seq,
1023 sizeof(ubi->image_seq));
1002 } else { 1024 } else {
1003 ubi_err("MTD device possibly contains non-UBI data, " 1025 ubi_err("MTD device is not UBI-formatted and possibly "
1004 "refusing it"); 1026 "contains non-UBI data - refusing it");
1005 return -EINVAL; 1027 return -EINVAL;
1006 } 1028 }
1029
1007 } 1030 }
1008 1031
1009 if (si->corr_peb_count > 0)
1010 ubi_msg("corrupted PEBs will be formatted");
1011 return 0; 1032 return 0;
1012} 1033}
1013 1034