aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Weinberger <richard@nod.at>2016-06-14 04:12:17 -0400
committerRichard Weinberger <richard@nod.at>2016-07-29 17:32:54 -0400
commit74f2c6e9a47cf4e508198c8594626cc82906a13d (patch)
tree353246f510fcff72011013eacaa228b92b4a45db
parent5283ec72b0cca44ccea63070f6954fd8675b9ac4 (diff)
ubi: Be more paranoid while seaching for the most recent Fastmap
Since PEB erasure is asynchornous it can happen that there is more than one Fastmap on the MTD. This is fine because the attach logic will pick the Fastmap data structure with the highest sequence number. On a not so well configured MTD stack spurious ECC errors are common. Causes can be different, bad hardware, wrong operating modes, etc... If the most current Fastmap renders bad due to ECC errors UBI might pick an older Fastmap to attach from. While this can only happen on an anyway broken setup it will show completely different sympthoms and makes finding the root cause much more difficult. So, be debug friendly and fall back to scanning mode of we're facing an ECC error while scanning for Fastmap. Cc: <stable@vger.kernel.org> Signed-off-by: Richard Weinberger <richard@nod.at>
-rw-r--r--drivers/mtd/ubi/attach.c28
-rw-r--r--drivers/mtd/ubi/ubi.h3
2 files changed, 27 insertions, 4 deletions
diff --git a/drivers/mtd/ubi/attach.c b/drivers/mtd/ubi/attach.c
index bd6fc528b10a..903becd31410 100644
--- a/drivers/mtd/ubi/attach.c
+++ b/drivers/mtd/ubi/attach.c
@@ -856,13 +856,15 @@ static bool vol_ignored(int vol_id)
856 * @ubi: UBI device description object 856 * @ubi: UBI device description object
857 * @ai: attaching information 857 * @ai: attaching information
858 * @pnum: the physical eraseblock number 858 * @pnum: the physical eraseblock number
859 * @fast: true if we're scanning for a Fastmap
859 * 860 *
860 * This function reads UBI headers of PEB @pnum, checks them, and adds 861 * This function reads UBI headers of PEB @pnum, checks them, and adds
861 * information about this PEB to the corresponding list or RB-tree in the 862 * information about this PEB to the corresponding list or RB-tree in the
862 * "attaching info" structure. Returns zero if the physical eraseblock was 863 * "attaching info" structure. Returns zero if the physical eraseblock was
863 * successfully handled and a negative error code in case of failure. 864 * successfully handled and a negative error code in case of failure.
864 */ 865 */
865static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, int pnum) 866static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai,
867 int pnum, bool fast)
866{ 868{
867 long long ec; 869 long long ec;
868 int err, bitflips = 0, vol_id = -1, ec_err = 0; 870 int err, bitflips = 0, vol_id = -1, ec_err = 0;
@@ -980,6 +982,20 @@ static int scan_peb(struct ubi_device *ubi, struct ubi_attach_info *ai, int pnum
980 */ 982 */
981 ai->maybe_bad_peb_count += 1; 983 ai->maybe_bad_peb_count += 1;
982 case UBI_IO_BAD_HDR: 984 case UBI_IO_BAD_HDR:
985 /*
986 * If we're facing a bad VID header we have to drop *all*
987 * Fastmap data structures we find. The most recent Fastmap
988 * could be bad and therefore there is a chance that we attach
989 * from an old one. On a fine MTD stack a PEB must not render
990 * bad all of a sudden, but the reality is different.
991 * So, let's be paranoid and help finding the root cause by
992 * falling back to scanning mode instead of attaching with a
993 * bad EBA table and cause data corruption which is hard to
994 * analyze.
995 */
996 if (fast)
997 ai->force_full_scan = 1;
998
983 if (ec_err) 999 if (ec_err)
984 /* 1000 /*
985 * Both headers are corrupted. There is a possibility 1001 * Both headers are corrupted. There is a possibility
@@ -1293,7 +1309,7 @@ static int scan_all(struct ubi_device *ubi, struct ubi_attach_info *ai,
1293 cond_resched(); 1309 cond_resched();
1294 1310
1295 dbg_gen("process PEB %d", pnum); 1311 dbg_gen("process PEB %d", pnum);
1296 err = scan_peb(ubi, ai, pnum); 1312 err = scan_peb(ubi, ai, pnum, false);
1297 if (err < 0) 1313 if (err < 0)
1298 goto out_vidh; 1314 goto out_vidh;
1299 } 1315 }
@@ -1407,7 +1423,7 @@ static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai)
1407 cond_resched(); 1423 cond_resched();
1408 1424
1409 dbg_gen("process PEB %d", pnum); 1425 dbg_gen("process PEB %d", pnum);
1410 err = scan_peb(ubi, scan_ai, pnum); 1426 err = scan_peb(ubi, scan_ai, pnum, true);
1411 if (err < 0) 1427 if (err < 0)
1412 goto out_vidh; 1428 goto out_vidh;
1413 } 1429 }
@@ -1415,7 +1431,11 @@ static int scan_fast(struct ubi_device *ubi, struct ubi_attach_info **ai)
1415 ubi_free_vid_hdr(ubi, vidh); 1431 ubi_free_vid_hdr(ubi, vidh);
1416 kfree(ech); 1432 kfree(ech);
1417 1433
1418 err = ubi_scan_fastmap(ubi, *ai, scan_ai); 1434 if (scan_ai->force_full_scan)
1435 err = UBI_NO_FASTMAP;
1436 else
1437 err = ubi_scan_fastmap(ubi, *ai, scan_ai);
1438
1419 if (err) { 1439 if (err) {
1420 /* 1440 /*
1421 * Didn't attach via fastmap, do a full scan but reuse what 1441 * Didn't attach via fastmap, do a full scan but reuse what
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index c8b90a866d27..b616a115c9d3 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -715,6 +715,8 @@ struct ubi_ainf_volume {
715 * @vols_found: number of volumes found 715 * @vols_found: number of volumes found
716 * @highest_vol_id: highest volume ID 716 * @highest_vol_id: highest volume ID
717 * @is_empty: flag indicating whether the MTD device is empty or not 717 * @is_empty: flag indicating whether the MTD device is empty or not
718 * @force_full_scan: flag indicating whether we need to do a full scan and drop
719 all existing Fastmap data structures
718 * @min_ec: lowest erase counter value 720 * @min_ec: lowest erase counter value
719 * @max_ec: highest erase counter value 721 * @max_ec: highest erase counter value
720 * @max_sqnum: highest sequence number value 722 * @max_sqnum: highest sequence number value
@@ -742,6 +744,7 @@ struct ubi_attach_info {
742 int vols_found; 744 int vols_found;
743 int highest_vol_id; 745 int highest_vol_id;
744 int is_empty; 746 int is_empty;
747 int force_full_scan;
745 int min_ec; 748 int min_ec;
746 int max_ec; 749 int max_ec;
747 unsigned long long max_sqnum; 750 unsigned long long max_sqnum;