UBIFS: expect corruption only in last journal head LEBs

This patch improves UBIFS recovery and teaches it to expect corruption only in the last buds. Indeed, currently we just recover all buds, which is incorrect because only the last buds can have corruptions in case of a power cut. So it is inconsistent with the rest of the recovery strategy which tries hard to distinguish between corruptions cause by power cuts and other types of corruptions. This patch also adds one quirk - a bit older UBIFS was could have corruption in the next to last bud because of the way it switched buds: when bud A is full, it first searched for the next bud B, the wrote a reference node to the log about B, and then synchronized the write-buffer of A. So we could end up with buds A and B, where B is the last, but A had corruption. The UBIFS behavior was fixed, though, so currently it always first synchronizes A's write-buffer and only after this adds B to the log. However, to be make sure that we handle unclean (after a power cut) UBIFS images belonging to older UBIFS - we need to add a quirk and keep it for some time: we need to check for the situation described above. Thankfully, it is easy to check for that situation. When UBIFS adds B to the log, it always first unmaps B, then maps it, and then syncs A's write-buffer. Thus, in that situation we can check that B is empty, in which case it is OK to have corruption in A. To check that B is empty it is enough to just read the first few bytes of the bud and compare them with 0xFFs. This quirk may be removed in a couple of years. Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
author: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> 2011-05-15 06:11:00 -0400
committer: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> 2011-05-16 07:11:25 -0400
commit: 91c66083fca36cdf496e927ef8bea19e6b1bbdce (patch)
tree: 8298bc056e929e1c946b1b2d6acbcc21dd54e235 /fs/ubifs
parent: cb14a18465686ea6add51b1008865b8174c28bd7 (diff)
1 files changed, 71 insertions, 4 deletions
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 0f50fbfe630f..6617280d1679 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -473,6 +473,65 @@ int ubifs_validate_entry(struct ubifs_info *c,
 }
 /**
+ * is_last_bud - check if the bud is the last in the journal head.
+ * @c: UBIFS file-system description object
+ * @bud: bud description object
+ *
+ * This function checks if bud @bud is the last bud in its journal head. This
+ * information is then used by 'replay_bud()' to decide whether the bud can
+ * have corruptions or not. Indeed, only last buds can be corrupted by power
+ * cuts. Returns %1 if this is the last bud, and %0 if not.
+ */
+static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
+{
+        struct ubifs_jhead *jh = &c->jheads[bud->jhead];
+        struct ubifs_bud *next;
+        uint32_t data;
+        int err;
+        if (list_is_last(&bud->list, &jh->buds_list))
+                return 1;
+        /*
+         * The following is a quirk to make sure we work correctly with UBIFS
+         * images used with older UBIFS.
+         *
+         * Normally, the last bud will be the last in the journal head's list
+         * of bud. However, there is one exception if the UBIFS image belongs
+         * to older UBIFS. This is fairly unlikely: one would need to use old
+         * UBIFS, then have a power cut exactly at the right point, and then
+         * try to mount this image with new UBIFS.
+         *
+         * The exception is: it is possible to have 2 buds A and B, A goes
+         * before B, and B is the last, bud B is contains no data, and bud A is
+         * corrupted at the end. The reason is that in older versions when the
+         * journal code switched the next bud (from A to B), it first added a
+         * log reference node for the new bud (B), and only after this it
+         * synchronized the write-buffer of current bud (A). But later this was
+         * changed and UBIFS started to always synchronize the write-buffer of
+         * the bud (A) before writing the log reference for the new bud (B).
+         *
+         * But because older UBIFS always synchronized A's write-buffer before
+         * writing to B, we can recognize this exceptional situation but
+         * checking the contents of bud B - if it is empty, then A can be
+         * treated as the last and we can recover it.
+         *
+         * TODO: remove this piece of code in a couple of years (today it is
+         * 16.05.2011).
+         */
+        next = list_entry(bud->list.next, struct ubifs_bud, list);
+        if (!list_is_last(&next->list, &jh->buds_list))
+                return 0;
+        err = ubi_read(c->ubi, next->lnum, (char *)&data,
+                       next->start, 4);
+        if (err)
+                return 0;
+        return data == 0xFFFFFFFF;
+}
+/**
 * replay_bud - replay a bud logical eraseblock.
 * @c: UBIFS file-system description object
 * @b: bud entry which describes the bud
@@ -483,15 +542,23 @@ int ubifs_validate_entry(struct ubifs_info *c,
 */
 static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
 {
+        int is_last = is_last_bud(c, b->bud);
        int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
-        int jhead = b->bud->jhead;
        struct ubifs_scan_leb *sleb;
        struct ubifs_scan_node *snod;
-        dbg_mnt("replay bud LEB %d, head %d, offs %d", lnum, jhead, offs);
+        dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
+                lnum, b->bud->jhead, offs, is_last);
-        if (c->need_recovery)
+        if (c->need_recovery && is_last)
-                sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
+                /*
+                 * Recover only last LEBs in the journal heads, because power
+                 * cuts may cause corruptions only in these LEBs, because only
+                 * these LEBs could possibly be written to at the power cut
+                 * time.
+                 */
+                sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf,
+                                         b->bud->jhead != GCHD);
        else
                sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
        if (IS_ERR(sleb))
author	Artem Bityutskiy <Artem.Bityutskiy@nokia.com>	2011-05-15 06:11:00 -0400
committer	Artem Bityutskiy <Artem.Bityutskiy@nokia.com>	2011-05-16 07:11:25 -0400
commit	91c66083fca36cdf496e927ef8bea19e6b1bbdce (patch)
tree	8298bc056e929e1c946b1b2d6acbcc21dd54e235 /fs/ubifs
parent	cb14a18465686ea6add51b1008865b8174c28bd7 (diff)

diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 0f50fbfe630f..6617280d1679 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c
@@ -473,6 +473,65 @@ int ubifs_validate_entry(struct ubifs_info *c,
473	}	473	}
474		474
475	/**	475	/**
		476	* is_last_bud - check if the bud is the last in the journal head.
		477	* @c: UBIFS file-system description object
		478	* @bud: bud description object
		479	*
		480	* This function checks if bud @bud is the last bud in its journal head. This
		481	* information is then used by 'replay_bud()' to decide whether the bud can
		482	* have corruptions or not. Indeed, only last buds can be corrupted by power
		483	* cuts. Returns %1 if this is the last bud, and %0 if not.
		484	*/
		485	static int is_last_bud(struct ubifs_info c, struct ubifs_bud bud)
		486	{
		487	struct ubifs_jhead *jh = &c->jheads[bud->jhead];
		488	struct ubifs_bud *next;
		489	uint32_t data;
		490	int err;
		491
		492	if (list_is_last(&bud->list, &jh->buds_list))
		493	return 1;
		494
		495	/*
		496	* The following is a quirk to make sure we work correctly with UBIFS
		497	* images used with older UBIFS.
		498	*
		499	* Normally, the last bud will be the last in the journal head's list
		500	* of bud. However, there is one exception if the UBIFS image belongs
		501	* to older UBIFS. This is fairly unlikely: one would need to use old
		502	* UBIFS, then have a power cut exactly at the right point, and then
		503	* try to mount this image with new UBIFS.
		504	*
		505	* The exception is: it is possible to have 2 buds A and B, A goes
		506	* before B, and B is the last, bud B is contains no data, and bud A is
		507	* corrupted at the end. The reason is that in older versions when the
		508	* journal code switched the next bud (from A to B), it first added a
		509	* log reference node for the new bud (B), and only after this it
		510	* synchronized the write-buffer of current bud (A). But later this was
		511	* changed and UBIFS started to always synchronize the write-buffer of
		512	* the bud (A) before writing the log reference for the new bud (B).
		513	*
		514	* But because older UBIFS always synchronized A's write-buffer before
		515	* writing to B, we can recognize this exceptional situation but
		516	* checking the contents of bud B - if it is empty, then A can be
		517	* treated as the last and we can recover it.
		518	*
		519	* TODO: remove this piece of code in a couple of years (today it is
		520	* 16.05.2011).
		521	*/
		522	next = list_entry(bud->list.next, struct ubifs_bud, list);
		523	if (!list_is_last(&next->list, &jh->buds_list))
		524	return 0;
		525
		526	err = ubi_read(c->ubi, next->lnum, (char *)&data,
		527	next->start, 4);
		528	if (err)
		529	return 0;
		530
		531	return data == 0xFFFFFFFF;
		532	}
		533
		534	/**
476	* replay_bud - replay a bud logical eraseblock.	535	* replay_bud - replay a bud logical eraseblock.
477	* @c: UBIFS file-system description object	536	* @c: UBIFS file-system description object
478	* @b: bud entry which describes the bud	537	* @b: bud entry which describes the bud
@@ -483,15 +542,23 @@ int ubifs_validate_entry(struct ubifs_info *c,
483	*/	542	*/
484	static int replay_bud(struct ubifs_info c, struct bud_entry b)	543	static int replay_bud(struct ubifs_info c, struct bud_entry b)
485	{	544	{
		545	int is_last = is_last_bud(c, b->bud);
486	int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;	546	int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
487	int jhead = b->bud->jhead;
488	struct ubifs_scan_leb *sleb;	547	struct ubifs_scan_leb *sleb;
489	struct ubifs_scan_node *snod;	548	struct ubifs_scan_node *snod;
490		549
491	dbg_mnt("replay bud LEB %d, head %d, offs %d", lnum, jhead, offs);	550	dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
		551	lnum, b->bud->jhead, offs, is_last);
492		552
493	if (c->need_recovery)	553	if (c->need_recovery && is_last)
494	sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);	554	/*
		555	* Recover only last LEBs in the journal heads, because power
		556	* cuts may cause corruptions only in these LEBs, because only
		557	* these LEBs could possibly be written to at the power cut
		558	* time.
		559	*/
		560	sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf,
		561	b->bud->jhead != GCHD);
495	else	562	else
496	sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);	563	sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
497	if (IS_ERR(sleb))	564	if (IS_ERR(sleb))