Merge branch 'linux-next' of git://git.infradead.org/ubifs-2.6

* 'linux-next' of git://git.infradead.org/ubifs-2.6: UBIFS: fix recovery bug UBIFS: add R/O compatibility UBIFS: fix compiler warnings UBIFS: fully sort GCed nodes UBIFS: fix commentaries UBIFS: introduce a helpful variable UBIFS: use KERN_CONT UBIFS: fix lprops committing bug UBIFS: fix bogus assertion UBIFS: fix bug where page is marked uptodate when out of space UBIFS: amend key_hash return value UBIFS: improve find function interface UBIFS: list usage cleanup UBIFS: fix dbg_chk_lpt_sz()
author: Linus Torvalds <torvalds@linux-foundation.org> 2009-04-06 18:00:19 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2009-04-06 18:00:19 -0400
commit: e0724bf6e4a1f2e678d2b2aab01cae22e17862f0 (patch)
tree: 559a8fa8e7a92f8ae0e0a27d4e71f408fa7cec62 /fs/ubifs
parent: 38d9aefb5ce8f26358b0d5cd933cfa9e267105b1 (diff)
parent: de0975781a1a8bc92e07eb7681d10ef9bb5e6df9 (diff)
17 files changed, 482 insertions, 265 deletions
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index f393620890ee..af1914462f02 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -194,29 +194,26 @@ static int make_free_space(struct ubifs_info *c)
 }
 /**
- * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index.
+ * ubifs_calc_min_idx_lebs - calculate amount of LEBs for the index.
 * @c: UBIFS file-system description object
 *
- * This function calculates and returns the number of eraseblocks which should
+ * This function calculates and returns the number of LEBs which should be kept
- * be kept for index usage.
+ * for index usage.
 */
 int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
 {
-        int idx_lebs, eff_leb_size = c->leb_size - c->max_idx_node_sz;
+        int idx_lebs;
        long long idx_size;
        idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
        /* And make sure we have thrice the index size of space reserved */
-        idx_size = idx_size + (idx_size << 1);
+        idx_size += idx_size << 1;
        /*
         * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
         * pair, nor similarly the two variables for the new index size, so we
         * have to do this costly 64-bit division on fast-path.
         */
-        idx_size += eff_leb_size - 1;
+        idx_lebs = div_u64(idx_size + c->idx_leb_size - 1, c->idx_leb_size);
-        idx_lebs = div_u64(idx_size, eff_leb_size);
        /*
         * The index head is not available for the in-the-gaps method, so add an
         * extra LEB to compensate.
@@ -310,23 +307,23 @@ static int can_use_rp(struct ubifs_info *c)
 * do_budget_space - reserve flash space for index and data growth.
 * @c: UBIFS file-system description object
 *
- * This function makes sure UBIFS has enough free eraseblocks for index growth
+ * This function makes sure UBIFS has enough free LEBs for index growth and
- * and data.
+ * data.
 *
 * When budgeting index space, UBIFS reserves thrice as many LEBs as the index
 * would take if it was consolidated and written to the flash. This guarantees
 * that the "in-the-gaps" commit method always succeeds and UBIFS will always
 * be able to commit dirty index. So this function basically adds amount of
 * budgeted index space to the size of the current index, multiplies this by 3,
- * and makes sure this does not exceed the amount of free eraseblocks.
+ * and makes sure this does not exceed the amount of free LEBs.
 *
 * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
 *    be large, because UBIFS does not do any index consolidation as long as
 *    there is free space. IOW, the index may take a lot of LEBs, but the LEBs
 *    will contain a lot of dirt.
- * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be
+ * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW,
- *   consolidated to take up to @c->min_idx_lebs LEBs.
+ *    the index may be consolidated to take up to @c->min_idx_lebs LEBs.
 *
 * This function returns zero in case of success, and %-ENOSPC in case of
 * failure.
@@ -695,12 +692,12 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free)
 * This function calculates amount of free space to report to user-space.
 *
 * Because UBIFS may introduce substantial overhead (the index, node headers,
- * alignment, wastage at the end of eraseblocks, etc), it cannot report real
+ * alignment, wastage at the end of LEBs, etc), it cannot report real amount of
- * amount of free flash space it has (well, because not all dirty space is
+ * free flash space it has (well, because not all dirty space is reclaimable,
- * reclaimable, UBIFS does not actually know the real amount). If UBIFS did so,
+ * UBIFS does not actually know the real amount). If UBIFS did so, it would
- * it would bread user expectations about what free space is. Users seem to
+ * bread user expectations about what free space is. Users seem to accustomed
- * accustomed to assume that if the file-system reports N bytes of free space,
+ * to assume that if the file-system reports N bytes of free space, they would
- * they would be able to fit a file of N bytes to the FS. This almost works for
+ * be able to fit a file of N bytes to the FS. This almost works for
 * traditional file-systems, because they have way less overhead than UBIFS.
 * So, to keep users happy, UBIFS tries to take the overhead into account.
 */
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index e975bd82f38b..ce2cd8343618 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -479,9 +479,9 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
                                          "bad or corrupted node)");
                else {
                        for (i = 0; i < nlen && dent->name[i]; i++)
-                                printk("%c", dent->name[i]);
+                                printk(KERN_CONT "%c", dent->name[i]);
                }
-                printk("\n");
+                printk(KERN_CONT "\n");
                break;
        }
@@ -1214,7 +1214,7 @@ static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr)
                        /*
                         * Make sure the last key in our znode is less or
-                         * equivalent than the the key in zbranch which goes
+                         * equivalent than the key in the zbranch which goes
                         * after our pointing zbranch.
                         */
                        cmp = keys_cmp(c, max,
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 0ff89fe71e51..6d34dc7e33e1 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -430,6 +430,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
        struct ubifs_inode *ui = ubifs_inode(inode);
        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
        int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
+        int skipped_read = 0;
        struct page *page;
        ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
@@ -444,7 +445,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
        if (!PageUptodate(page)) {
                /* The page is not loaded from the flash */
-                if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+                if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) {
                        /*
                         * We change whole page so no need to load it. But we
                         * have to set the @PG_checked flag to make the further
@@ -453,7 +454,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
                         * the media.
                         */
                        SetPageChecked(page);
-                else {
+                        skipped_read = 1;
+                } else {
                        err = do_readpage(page);
                        if (err) {
                                unlock_page(page);
@@ -470,6 +472,14 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
        if (unlikely(err)) {
                ubifs_assert(err == -ENOSPC);
                /*
+                 * If we skipped reading the page because we were going to
+                 * write all of it, then it is not up to date.
+                 */
+                if (skipped_read) {
+                        ClearPageChecked(page);
+                        ClearPageUptodate(page);
+                }
+                /*
                 * Budgeting failed which means it would have to force
                 * write-back but didn't, because we set the @fast flag in the
                 * request. Write-back cannot be done now, while we have the
@@ -949,7 +959,7 @@ static int do_writepage(struct page *page, int len)
 * whole index and correct all inode sizes, which is long an unacceptable.
 *
 * To prevent situations like this, UBIFS writes pages back only if they are
- * within last synchronized inode size, i.e. the the size which has been
+ * within the last synchronized inode size, i.e. the size which has been
 * written to the flash media last time. Otherwise, UBIFS forces inode
 * write-back, thus making sure the on-flash inode contains current inode size,
 * and then keeps writing pages back.
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 717d79c97c5e..1d54383d1269 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -478,7 +478,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
 * ubifs_find_free_space - find a data LEB with free space.
 * @c: the UBIFS file-system description object
 * @min_space: minimum amount of required free space
- * @free: contains amount of free space in the LEB on exit
+ * @offs: contains offset of where free space starts on exit
 * @squeeze: whether to try to find space in a non-empty LEB first
 *
 * This function looks for an LEB with at least @min_space bytes of free space.
@@ -490,7 +490,7 @@ const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
 * failed to find a LEB with @min_space bytes of free space and other a negative
 * error codes in case of failure.
 */
-int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
+int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
                          int squeeze)
 {
        const struct ubifs_lprops *lprops;
@@ -558,10 +558,10 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
                spin_unlock(&c->space_lock);
        }
-        *free = lprops->free;
+        *offs = c->leb_size - lprops->free;
        ubifs_release_lprops(c);
-        if (*free == c->leb_size) {
+        if (*offs == 0) {
                /*
                 * Ensure that empty LEBs have been unmapped. They may not have
                 * been, for example, because of an unclean unmount.  Also
@@ -573,8 +573,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
                        return err;
        }
-        dbg_find("found LEB %d, free %d", lnum, *free);
+        dbg_find("found LEB %d, free %d", lnum, c->leb_size - *offs);
-        ubifs_assert(*free >= min_space);
+        ubifs_assert(*offs <= c->leb_size - min_space);
        return lnum;
 out:
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index a711d33b3d3e..f0f5f15d384e 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -47,7 +47,7 @@
 * have to waste large pieces of free space at the end of LEB B, because nodes
 * from LEB A would not fit. And the worst situation is when all nodes are of
 * maximum size. So dark watermark is the amount of free + dirty space in LEB
- * which are guaranteed to be reclaimable. If LEB has less space, the GC migh
+ * which are guaranteed to be reclaimable. If LEB has less space, the GC might
 * be unable to reclaim it. So, LEBs with free + dirty greater than dark
 * watermark are "good" LEBs from GC's point of few. The other LEBs are not so
 * good, and GC takes extra care when moving them.
@@ -57,14 +57,6 @@
 #include "ubifs.h"
 /*
- * GC tries to optimize the way it fit nodes to available space, and it sorts
- * nodes a little. The below constants are watermarks which define "large",
- * "medium", and "small" nodes.
- */
-#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4)
-#define SMALL_NODE_WM  UBIFS_MAX_DENT_NODE_SZ
-/*
 * GC may need to move more than one LEB to make progress. The below constants
 * define "soft" and "hard" limits on the number of LEBs the garbage collector
 * may move.
@@ -116,83 +108,222 @@ static int switch_gc_head(struct ubifs_info *c)
 }
 /**
- * joinup - bring data nodes for an inode together.
+ * list_sort - sort a list.
- * @c: UBIFS file-system description object
+ * @priv: private data, passed to @cmp
- * @sleb: describes scanned LEB
+ * @head: the list to sort
- * @inum: inode number
+ * @cmp: the elements comparison function
- * @blk: block number
- * @data: list to which to add data nodes
 *
- * This function looks at the first few nodes in the scanned LEB @sleb and adds
+ * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
- * them to @data if they are data nodes from @inum and have a larger block
+ * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
- * number than @blk. This function returns %0 on success and a negative error
+ * in ascending order.
- * code on failure.
+ *
+ * The comparison function @cmp is supposed to return a negative value if @a is
+ * than @b, and a positive value if @a is greater than @b. If @a and @b are
+ * equivalent, then it does not matter what this function returns.
 */
-static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum,
+static void list_sort(void *priv, struct list_head *head,
-                  unsigned int blk, struct list_head *data)
+                      int (*cmp)(void *priv, struct list_head *a,
+                                 struct list_head *b))
 {
-        int err, cnt = 6, lnum = sleb->lnum, offs;
+        struct list_head *p, *q, *e, *list, *tail, *oldhead;
-        struct ubifs_scan_node *snod, *tmp;
+        int insize, nmerges, psize, qsize, i;
-        union ubifs_key *key;
+        if (list_empty(head))
+                return;
+        list = head->next;
+        list_del(head);
+        insize = 1;
+        for (;;) {
+                p = oldhead = list;
+                list = tail = NULL;
+                nmerges = 0;
+                while (p) {
+                        nmerges++;
+                        q = p;
+                        psize = 0;
+                        for (i = 0; i < insize; i++) {
+                                psize++;
+                                q = q->next == oldhead ? NULL : q->next;
+                                if (!q)
+                                        break;
+                        }
-        list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
+                        qsize = insize;
-                key = &snod->key;
+                        while (psize > 0 || (qsize > 0 && q)) {
-                if (key_inum(c, key) == inum &&
+                                if (!psize) {
-                    key_type(c, key) == UBIFS_DATA_KEY &&
+                                        e = q;
-                    key_block(c, key) > blk) {
+                                        q = q->next;
-                        offs = snod->offs;
+                                        qsize--;
-                        err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0);
+                                        if (q == oldhead)
-                        if (err < 0)
+                                                q = NULL;
-                                return err;
+                                } else if (!qsize || !q) {
-                        list_del(&snod->list);
+                                        e = p;
-                        if (err) {
+                                        p = p->next;
-                                list_add_tail(&snod->list, data);
+                                        psize--;
-                                blk = key_block(c, key);
+                                        if (p == oldhead)
-                        } else
+                                                p = NULL;
-                                kfree(snod);
+                                } else if (cmp(priv, p, q) <= 0) {
-                        cnt = 6;
+                                        e = p;
-                } else if (--cnt == 0)
+                                        p = p->next;
+                                        psize--;
+                                        if (p == oldhead)
+                                                p = NULL;
+                                } else {
+                                        e = q;
+                                        q = q->next;
+                                        qsize--;
+                                        if (q == oldhead)
+                                                q = NULL;
+                                }
+                                if (tail)
+                                        tail->next = e;
+                                else
+                                        list = e;
+                                e->prev = tail;
+                                tail = e;
+                        }
+                        p = q;
+                }
+                tail->next = list;
+                list->prev = tail;
+                if (nmerges <= 1)
                        break;
+                insize *= 2;
        }
-        return 0;
+        head->next = list;
+        head->prev = list->prev;
+        list->prev->next = head;
+        list->prev = head;
 }
 /**
- * move_nodes - move nodes.
+ * data_nodes_cmp - compare 2 data nodes.
+ * @priv: UBIFS file-system description object
+ * @a: first data node
+ * @a: second data node
+ *
+ * This function compares data nodes @a and @b. Returns %1 if @a has greater
+ * inode or block number, and %-1 otherwise.
+ */
+int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+        ino_t inuma, inumb;
+        struct ubifs_info *c = priv;
+        struct ubifs_scan_node *sa, *sb;
+        cond_resched();
+        sa = list_entry(a, struct ubifs_scan_node, list);
+        sb = list_entry(b, struct ubifs_scan_node, list);
+        ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY);
+        ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY);
+        inuma = key_inum(c, &sa->key);
+        inumb = key_inum(c, &sb->key);
+        if (inuma == inumb) {
+                unsigned int blka = key_block(c, &sa->key);
+                unsigned int blkb = key_block(c, &sb->key);
+                if (blka <= blkb)
+                        return -1;
+        } else if (inuma <= inumb)
+                return -1;
+        return 1;
+}
+/*
+ * nondata_nodes_cmp - compare 2 non-data nodes.
+ * @priv: UBIFS file-system description object
+ * @a: first node
+ * @a: second node
+ *
+ * This function compares nodes @a and @b. It makes sure that inode nodes go
+ * first and sorted by length in descending order. Directory entry nodes go
+ * after inode nodes and are sorted in ascending hash valuer order.
+ */
+int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+        int typea, typeb;
+        ino_t inuma, inumb;
+        struct ubifs_info *c = priv;
+        struct ubifs_scan_node *sa, *sb;
+        cond_resched();
+        sa = list_entry(a, struct ubifs_scan_node, list);
+        sb = list_entry(b, struct ubifs_scan_node, list);
+        typea = key_type(c, &sa->key);
+        typeb = key_type(c, &sb->key);
+        ubifs_assert(typea != UBIFS_DATA_KEY && typeb != UBIFS_DATA_KEY);
+        /* Inodes go before directory entries */
+        if (typea == UBIFS_INO_KEY) {
+                if (typeb == UBIFS_INO_KEY)
+                        return sb->len - sa->len;
+                return -1;
+        }
+        if (typeb == UBIFS_INO_KEY)
+                return 1;
+        ubifs_assert(typea == UBIFS_DENT_KEY && typeb == UBIFS_DENT_KEY);
+        inuma = key_inum(c, &sa->key);
+        inumb = key_inum(c, &sb->key);
+        if (inuma == inumb) {
+                uint32_t hasha = key_hash(c, &sa->key);
+                uint32_t hashb = key_hash(c, &sb->key);
+                if (hasha <= hashb)
+                        return -1;
+        } else if (inuma <= inumb)
+                return -1;
+        return 1;
+}
+/**
+ * sort_nodes - sort nodes for GC.
 * @c: UBIFS file-system description object
- * @sleb: describes nodes to move
+ * @sleb: describes nodes to sort and contains the result on exit
+ * @nondata: contains non-data nodes on exit
+ * @min: minimum node size is returned here
 *
- * This function moves valid nodes from data LEB described by @sleb to the GC
+ * This function sorts the list of inodes to garbage collect. First of all, it
- * journal head. The obsolete nodes are dropped.
+ * kills obsolete nodes and separates data and non-data nodes to the
+ * @sleb->nodes and @nondata lists correspondingly.
+ *
+ * Data nodes are then sorted in block number order - this is important for
+ * bulk-read; data nodes with lower inode number go before data nodes with
+ * higher inode number, and data nodes with lower block number go before data
+ * nodes with higher block number;
 *
- * When moving nodes we have to deal with classical bin-packing problem: the
+ * Non-data nodes are sorted as follows.
- * space in the current GC journal head LEB and in @c->gc_lnum are the "bins",
+ *   o First go inode nodes - they are sorted in descending length order.
- * where the nodes in the @sleb->nodes list are the elements which should be
+ *   o Then go directory entry nodes - they are sorted in hash order, which
- * fit optimally to the bins. This function uses the "first fit decreasing"
+ *     should supposedly optimize 'readdir()'. Direntry nodes with lower parent
- * strategy, although it does not really sort the nodes but just split them on
+ *     inode number go before direntry nodes with higher parent inode number,
- * 3 classes - large, medium, and small, so they are roughly sorted.
+ *     and direntry nodes with lower name hash values go before direntry nodes
+ *     with higher name hash values.
 *
- * This function returns zero in case of success, %-EAGAIN if commit is
+ * This function returns zero in case of success and a negative error code in
- * required, and other negative error codes in case of other failures.
+ * case of failure.
 */
-static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
+static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
+                      struct list_head *nondata, int *min)
 {
        struct ubifs_scan_node *snod, *tmp;
-        struct list_head data, large, medium, small;
-        struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
-        int avail, err, min = INT_MAX;
-        unsigned int blk = 0;
-        ino_t inum = 0;
-        INIT_LIST_HEAD(&data);
+        *min = INT_MAX;
-        INIT_LIST_HEAD(&large);
-        INIT_LIST_HEAD(&medium);
-        INIT_LIST_HEAD(&small);
-        while (!list_empty(&sleb->nodes)) {
+        /* Separate data nodes and non-data nodes */
-                struct list_head *lst = sleb->nodes.next;
+        list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
+                int err;
-                snod = list_entry(lst, struct ubifs_scan_node, list);
                ubifs_assert(snod->type != UBIFS_IDX_NODE);
                ubifs_assert(snod->type != UBIFS_REF_NODE);
@@ -201,53 +332,72 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
                err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum,
                                         snod->offs, 0);
                if (err < 0)
-                        goto out;
+                        return err;
-                list_del(lst);
                if (!err) {
                        /* The node is obsolete, remove it from the list */
+                        list_del(&snod->list);
                        kfree(snod);
                        continue;
                }
-                /*
+                if (snod->len < *min)
-                 * Sort the list of nodes so that data nodes go first, large
+                        *min = snod->len;
-                 * nodes go second, and small nodes go last.
-                 */
+                if (key_type(c, &snod->key) != UBIFS_DATA_KEY)
-                if (key_type(c, &snod->key) == UBIFS_DATA_KEY) {
+                        list_move_tail(&snod->list, nondata);
-                        if (inum != key_inum(c, &snod->key)) {
-                                if (inum) {
-                                        /*
-                                         * Try to move data nodes from the same
-                                         * inode together.
-                                         */
-                                        err = joinup(c, sleb, inum, blk, &data);
-                                        if (err)
-                                                goto out;
-                                }
-                                inum = key_inum(c, &snod->key);
-                                blk = key_block(c, &snod->key);
-                        }
-                        list_add_tail(lst, &data);
-                } else if (snod->len > MEDIUM_NODE_WM)
-                        list_add_tail(lst, &large);
-                else if (snod->len > SMALL_NODE_WM)
-                        list_add_tail(lst, &medium);
-                else
-                        list_add_tail(lst, &small);
-                /* And find the smallest node */
-                if (snod->len < min)
-                        min = snod->len;
        }
-        /*
+        /* Sort data and non-data nodes */
-         * Join the tree lists so that we'd have one roughly sorted list
+        list_sort(c, &sleb->nodes, &data_nodes_cmp);
-         * ('large' will be the head of the joined list).
+        list_sort(c, nondata, &nondata_nodes_cmp);
-         */
+        return 0;
-        list_splice(&data, &large);
+}
-        list_splice(&medium, large.prev);
-        list_splice(&small, large.prev);
+/**
+ * move_node - move a node.
+ * @c: UBIFS file-system description object
+ * @sleb: describes the LEB to move nodes from
+ * @snod: the mode to move
+ * @wbuf: write-buffer to move node to
+ *
+ * This function moves node @snod to @wbuf, changes TNC correspondingly, and
+ * destroys @snod. Returns zero in case of success and a negative error code in
+ * case of failure.
+ */
+static int move_node(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
+                     struct ubifs_scan_node *snod, struct ubifs_wbuf *wbuf)
+{
+        int err, new_lnum = wbuf->lnum, new_offs = wbuf->offs + wbuf->used;
+        cond_resched();
+        err = ubifs_wbuf_write_nolock(wbuf, snod->node, snod->len);
+        if (err)
+                return err;
+        err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
+                                snod->offs, new_lnum, new_offs,
+                                snod->len);
+        list_del(&snod->list);
+        kfree(snod);
+        return err;
+}
+/**
+ * move_nodes - move nodes.
+ * @c: UBIFS file-system description object
+ * @sleb: describes the LEB to move nodes from
+ *
+ * This function moves valid nodes from data LEB described by @sleb to the GC
+ * journal head. This function returns zero in case of success, %-EAGAIN if
+ * commit is required, and other negative error codes in case of other
+ * failures.
+ */
+static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
+{
+        int err, min;
+        LIST_HEAD(nondata);
+        struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
        if (wbuf->lnum == -1) {
                /*
@@ -256,42 +406,59 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
                 */
                err = switch_gc_head(c);
                if (err)
-                        goto out;
+                        return err;
        }
+        err = sort_nodes(c, sleb, &nondata, &min);
+        if (err)
+                goto out;
        /* Write nodes to their new location. Use the first-fit strategy */
        while (1) {
-                avail = c->leb_size - wbuf->offs - wbuf->used;
+                int avail;
-                list_for_each_entry_safe(snod, tmp, &large, list) {
+                struct ubifs_scan_node *snod, *tmp;
-                        int new_lnum, new_offs;
+                /* Move data nodes */
+                list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
+                        avail = c->leb_size - wbuf->offs - wbuf->used;
+                        if  (snod->len > avail)
+                                /*
+                                 * Do not skip data nodes in order to optimize
+                                 * bulk-read.
+                                 */
+                                break;
+                        err = move_node(c, sleb, snod, wbuf);
+                        if (err)
+                                goto out;
+                }
+                /* Move non-data nodes */
+                list_for_each_entry_safe(snod, tmp, &nondata, list) {
+                        avail = c->leb_size - wbuf->offs - wbuf->used;
                        if (avail < min)
                                break;
-                        if (snod->len > avail)
+                        if  (snod->len > avail) {
-                                /* This node does not fit */
+                                /*
+                                 * Keep going only if this is an inode with
+                                 * some data. Otherwise stop and switch the GC
+                                 * head. IOW, we assume that data-less inode
+                                 * nodes and direntry nodes are roughly of the
+                                 * same size.
+                                 */
+                                if (key_type(c, &snod->key) == UBIFS_DENT_KEY ||
+                                    snod->len == UBIFS_INO_NODE_SZ)
+                                        break;
                                continue;
+                        }
-                        cond_resched();
+                        err = move_node(c, sleb, snod, wbuf);
-                        new_lnum = wbuf->lnum;
-                        new_offs = wbuf->offs + wbuf->used;
-                        err = ubifs_wbuf_write_nolock(wbuf, snod->node,
-                                                      snod->len);
                        if (err)
                                goto out;
-                        err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
-                                                snod->offs, new_lnum, new_offs,
-                                                snod->len);
-                        if (err)
-                                goto out;
-                        avail = c->leb_size - wbuf->offs - wbuf->used;
-                        list_del(&snod->list);
-                        kfree(snod);
                }
-                if (list_empty(&large))
+                if (list_empty(&sleb->nodes) && list_empty(&nondata))
                        break;
                /*
@@ -306,10 +473,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
        return 0;
 out:
-        list_for_each_entry_safe(snod, tmp, &large, list) {
+        list_splice_tail(&nondata, &sleb->nodes);
-                list_del(&snod->list);
-                kfree(snod);
-        }
        return err;
 }
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index a11ca0958a23..64b5f3a309f5 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -114,7 +114,7 @@ static inline void zero_trun_node_unused(struct ubifs_trun_node *trun)
 */
 static int reserve_space(struct ubifs_info *c, int jhead, int len)
 {
-        int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze;
+        int err = 0, err1, retries = 0, avail, lnum, offs, squeeze;
        struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
        /*
@@ -139,10 +139,9 @@ again:
         * Write buffer wasn't seek'ed or there is no enough space - look for an
         * LEB with some empty space.
         */
-        lnum = ubifs_find_free_space(c, len, &free, squeeze);
+        lnum = ubifs_find_free_space(c, len, &offs, squeeze);
        if (lnum >= 0) {
                /* Found an LEB, add it to the journal head */
-                offs = c->leb_size - free;
                err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
                if (err)
                        goto out_return;
@@ -1366,7 +1365,7 @@ out_ro:
 * @host: host inode
 *
 * This function writes the updated version of an extended attribute inode and
- * the host inode tho the journal (to the base head). The host inode is written
+ * the host inode to the journal (to the base head). The host inode is written
 * after the extended attribute inode in order to guarantee that the extended
 * attribute will be flushed when the inode is synchronized by 'fsync()' and
 * consequently, the write-buffer is synchronized. This function returns zero
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index efb3430a2581..5fa27ea031ba 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -381,8 +381,8 @@ static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k)
 * @c: UBIFS file-system description object
 * @key: the key to get hash from
 */
-static inline int key_hash(const struct ubifs_info *c,
+static inline uint32_t key_hash(const struct ubifs_info *c,
-                           const union ubifs_key *key)
+                                const union ubifs_key *key)
 {
        return key->u32[1] & UBIFS_S_KEY_HASH_MASK;
 }
@@ -392,7 +392,7 @@ static inline int key_hash(const struct ubifs_info *c,
 * @c: UBIFS file-system description object
 * @k: the key to get hash from
 */
-static inline int key_hash_flash(const struct ubifs_info *c, const void *k)
+static inline uint32_t key_hash_flash(const struct ubifs_info *c, const void *k)
 {
        const union ubifs_key *key = k;
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 3e0aa7367556..56e33772a1ee 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -239,7 +239,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
        }
        /*
-         * Make sure the the amount of space in buds will not exceed
+         * Make sure the amount of space in buds will not exceed the
         * 'c->max_bud_bytes' limit, because we want to guarantee mount time
         * limits.
         *
@@ -367,7 +367,6 @@ static void remove_buds(struct ubifs_info *c)
                                bud->jhead, c->leb_size - bud->start,
                                c->cmt_bud_bytes);
                        rb_erase(p1, &c->buds);
-                        list_del(&bud->list);
                        /*
                         * If the commit does not finish, the recovery will need
                         * to replay the journal, in which case the old buds
@@ -375,7 +374,7 @@ static void remove_buds(struct ubifs_info *c)
                         * commit i.e. do not allow them to be garbage
                         * collected.
                         */
-                        list_add(&bud->list, &c->old_buds);
+                        list_move(&bud->list, &c->old_buds);
                }
        }
        spin_unlock(&c->buds_lock);
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 3216a1f277f8..8cbfb8248025 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -229,7 +229,7 @@ static int layout_cnodes(struct ubifs_info *c)
                while (offs + len > c->leb_size) {
                        alen = ALIGN(offs, c->min_io_size);
                        upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
-                        dbg_chk_lpt_sz(c, 2, alen - offs);
+                        dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
                        err = alloc_lpt_leb(c, &lnum);
                        if (err)
                                goto no_space;
@@ -272,7 +272,7 @@ static int layout_cnodes(struct ubifs_info *c)
                if (offs + c->lsave_sz > c->leb_size) {
                        alen = ALIGN(offs, c->min_io_size);
                        upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
-                        dbg_chk_lpt_sz(c, 2, alen - offs);
+                        dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
                        err = alloc_lpt_leb(c, &lnum);
                        if (err)
                                goto no_space;
@@ -292,7 +292,7 @@ static int layout_cnodes(struct ubifs_info *c)
                if (offs + c->ltab_sz > c->leb_size) {
                        alen = ALIGN(offs, c->min_io_size);
                        upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
-                        dbg_chk_lpt_sz(c, 2, alen - offs);
+                        dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
                        err = alloc_lpt_leb(c, &lnum);
                        if (err)
                                goto no_space;
@@ -416,14 +416,12 @@ static int write_cnodes(struct ubifs_info *c)
                                                       alen, UBI_SHORTTERM);
                                if (err)
                                        return err;
-                                dbg_chk_lpt_sz(c, 4, alen - wlen);
                        }
-                        dbg_chk_lpt_sz(c, 2, 0);
+                        dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
                        err = realloc_lpt_leb(c, &lnum);
                        if (err)
                                goto no_space;
-                        offs = 0;
+                        offs = from = 0;
-                        from = 0;
                        ubifs_assert(lnum >= c->lpt_first &&
                                     lnum <= c->lpt_last);
                        err = ubifs_leb_unmap(c, lnum);
@@ -477,11 +475,11 @@ static int write_cnodes(struct ubifs_info *c)
                                              UBI_SHORTTERM);
                        if (err)
                                return err;
-                        dbg_chk_lpt_sz(c, 2, alen - wlen);
+                        dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
                        err = realloc_lpt_leb(c, &lnum);
                        if (err)
                                goto no_space;
-                        offs = 0;
+                        offs = from = 0;
                        ubifs_assert(lnum >= c->lpt_first &&
                                     lnum <= c->lpt_last);
                        err = ubifs_leb_unmap(c, lnum);
@@ -504,11 +502,11 @@ static int write_cnodes(struct ubifs_info *c)
                                              UBI_SHORTTERM);
                        if (err)
                                return err;
-                        dbg_chk_lpt_sz(c, 2, alen - wlen);
+                        dbg_chk_lpt_sz(c, 2, c->leb_size - offs);
                        err = realloc_lpt_leb(c, &lnum);
                        if (err)
                                goto no_space;
-                        offs = 0;
+                        offs = from = 0;
                        ubifs_assert(lnum >= c->lpt_first &&
                                     lnum <= c->lpt_last);
                        err = ubifs_leb_unmap(c, lnum);
@@ -1756,10 +1754,16 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
 /**
 * dbg_chk_lpt_sz - check LPT does not write more than LPT size.
 * @c: the UBIFS file-system description object
- * @action: action
+ * @action: what to do
 * @len: length written
 *
 * This function returns %0 on success and a negative error code on failure.
+ * The @action argument may be one of:
+ *   o %0 - LPT debugging checking starts, initialize debugging variables;
+ *   o %1 - wrote an LPT node, increase LPT size by @len bytes;
+ *   o %2 - switched to a different LEB and wasted @len bytes;
+ *   o %3 - check that we've written the right number of bytes.
+ *   o %4 - wasted @len bytes;
 */
 int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
 {
@@ -1917,12 +1921,12 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
                                       lnum, offs);
                        err = ubifs_unpack_nnode(c, buf, &nnode);
                        for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
-                                printk("%d:%d", nnode.nbranch[i].lnum,
+                                printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum,
                                       nnode.nbranch[i].offs);
                                if (i != UBIFS_LPT_FANOUT - 1)
-                                        printk(", ");
+                                        printk(KERN_CONT ", ");
                        }
-                        printk("\n");
+                        printk(KERN_CONT "\n");
                        break;
                }
                case UBIFS_LPT_LTAB:
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 90acac603e63..10662975d2ef 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -425,59 +425,35 @@ static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
 * @lnum: LEB number of the LEB from which @buf was read
 * @offs: offset from which @buf was read
 *
- * This function scans @buf for more nodes and returns %0 is a node is found and
+ * This function ensures that the corrupted node at @offs is the last thing
- * %1 if no more nodes are found.
+ * written to a LEB. This function returns %1 if more data is not found and
+ * %0 if more data is found.
 */
 static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
                        int lnum, int offs)
 {
-        int skip, next_offs = 0;
+        struct ubifs_ch *ch = buf;
+        int skip, dlen = le32_to_cpu(ch->len);
-        if (len > UBIFS_DATA_NODE_SZ) {
+        /* Check for empty space after the corrupt node's common header */
-                struct ubifs_ch *ch = buf;
+        skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs;
-                int dlen = le32_to_cpu(ch->len);
+        if (is_empty(buf + skip, len - skip))
+                return 1;
-                if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ &&
+        /*
-                    dlen <= UBIFS_MAX_DATA_NODE_SZ)
+         * The area after the common header size is not empty, so the common
-                        /* The corrupt node looks like a data node */
+         * header must be intact. Check it.
-                        next_offs = ALIGN(offs + dlen, 8);
+         */
-        }
+        if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) {
+                dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs);
-        if (c->min_io_size == 1)
+                return 0;
-                skip = 8;
-        else
-                skip = ALIGN(offs + 1, c->min_io_size) - offs;
-        offs += skip;
-        buf += skip;
-        len -= skip;
-        while (len > 8) {
-                struct ubifs_ch *ch = buf;
-                uint32_t magic = le32_to_cpu(ch->magic);
-                int ret;
-                if (magic == UBIFS_NODE_MAGIC) {
-                        ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
-                        if (ret == SCANNED_A_NODE || ret > 0) {
-                                /*
-                                 * There is a small chance this is just data in
-                                 * a data node, so check that possibility. e.g.
-                                 * this is part of a file that itself contains
-                                 * a UBIFS image.
-                                 */
-                                if (next_offs && offs + le32_to_cpu(ch->len) <=
-                                    next_offs)
-                                        continue;
-                                dbg_rcvry("unexpected node at %d:%d", lnum,
-                                          offs);
-                                return 0;
-                        }
-                }
-                offs += 8;
-                buf += 8;
-                len -= 8;
        }
-        return 1;
+        /* Now we know the corrupt node's length we can skip over it */
+        skip = ALIGN(offs + dlen, c->min_io_size) - offs;
+        /* After which there should be empty space */
+        if (is_empty(buf + skip, len - skip))
+                return 1;
+        dbg_rcvry("unexpected data at %d:%d", lnum, offs + skip);
+        return 0;
 }
 /**
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index ce42a7b0ca5a..11cc80125a49 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -143,7 +143,7 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
                dirty -= c->leb_size - lp->free;
                /*
                 * If the replay order was perfect the dirty space would now be
-                 * zero. The order is not perfect because the the journal heads
+                 * zero. The order is not perfect because the journal heads
                 * race with each other. This is not a problem but is does mean
                 * that the dirty space may temporarily exceed c->leb_size
                 * during the replay.
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index e070c643d1bb..57085e43320f 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -193,6 +193,7 @@ static int create_default_filesystem(struct ubifs_info *c)
        if (tmp64 > DEFAULT_MAX_RP_SIZE)
                tmp64 = DEFAULT_MAX_RP_SIZE;
        sup->rp_size = cpu_to_le64(tmp64);
+        sup->ro_compat_version = cpu_to_le32(UBIFS_RO_COMPAT_VERSION);
        err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM);
        kfree(sup);
@@ -532,17 +533,39 @@ int ubifs_read_superblock(struct ubifs_info *c)
        if (IS_ERR(sup))
                return PTR_ERR(sup);
+        c->fmt_version = le32_to_cpu(sup->fmt_version);
+        c->ro_compat_version = le32_to_cpu(sup->ro_compat_version);
        /*
         * The software supports all previous versions but not future versions,
         * due to the unavailability of time-travelling equipment.
         */
-        c->fmt_version = le32_to_cpu(sup->fmt_version);
        if (c->fmt_version > UBIFS_FORMAT_VERSION) {
-                ubifs_err("on-flash format version is %d, but software only "
+                struct super_block *sb = c->vfs_sb;
-                          "supports up to version %d", c->fmt_version,
+                int mounting_ro = sb->s_flags & MS_RDONLY;
-                          UBIFS_FORMAT_VERSION);
-                err = -EINVAL;
+                ubifs_assert(!c->ro_media || mounting_ro);
-                goto out;
+                if (!mounting_ro ||
+                    c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
+                        ubifs_err("on-flash format version is w%d/r%d, but "
+                                  "software only supports up to version "
+                                  "w%d/r%d", c->fmt_version,
+                                  c->ro_compat_version, UBIFS_FORMAT_VERSION,
+                                  UBIFS_RO_COMPAT_VERSION);
+                        if (c->ro_compat_version <= UBIFS_RO_COMPAT_VERSION) {
+                                ubifs_msg("only R/O mounting is possible");
+                                err = -EROFS;
+                        } else
+                                err = -EINVAL;
+                        goto out;
+                }
+                /*
+                 * The FS is mounted R/O, and the media format is
+                 * R/O-compatible with the UBIFS implementation, so we can
+                 * mount.
+                 */
+                c->rw_incompat = 1;
        }
        if (c->fmt_version < 3) {
@@ -623,7 +646,6 @@ int ubifs_read_superblock(struct ubifs_info *c)
        c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS;
        c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs;
        c->main_first = c->leb_cnt - c->main_lebs;
-        c->report_rp_size = ubifs_reported_space(c, c->rp_size);
        err = validate_sb(c, sup);
 out:
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index e7bab52a1410..02feb59cefca 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -206,8 +206,7 @@ static int shrink_tnc_trees(int nr, int age, int *contention)
                 * Move this one to the end of the list to provide some
                 * fairness.
                 */
-                list_del(&c->infos_list);
+                list_move_tail(&c->infos_list, &ubifs_infos);
-                list_add_tail(&c->infos_list, &ubifs_infos);
                mutex_unlock(&c->umount_mutex);
                if (freed >= nr)
                        break;
@@ -263,8 +262,7 @@ static int kick_a_thread(void)
                        }
                        if (i == 1) {
-                                list_del(&c->infos_list);
+                                list_move_tail(&c->infos_list, &ubifs_infos);
-                                list_add_tail(&c->infos_list, &ubifs_infos);
                                spin_unlock(&ubifs_infos_lock);
                                ubifs_request_bg_commit(c);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index c5c98355459a..faa44f90608a 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -421,8 +421,8 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
                seq_printf(s, ",no_chk_data_crc");
        if (c->mount_opts.override_compr) {
-                seq_printf(s, ",compr=");
+                seq_printf(s, ",compr=%s",
-                seq_printf(s, ubifs_compr_name(c->mount_opts.compr_type));
+                           ubifs_compr_name(c->mount_opts.compr_type));
        }
        return 0;
@@ -700,6 +700,8 @@ static int init_constants_sb(struct ubifs_info *c)
        if (err)
                return err;
+        /* Initialize effective LEB size used in budgeting calculations */
+        c->idx_leb_size = c->leb_size - c->max_idx_node_sz;
        return 0;
 }
@@ -716,6 +718,7 @@ static void init_constants_master(struct ubifs_info *c)
        long long tmp64;
        c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+        c->report_rp_size = ubifs_reported_space(c, c->rp_size);
        /*
         * Calculate total amount of FS blocks. This number is not used
@@ -1201,7 +1204,7 @@ static int mount_ubifs(struct ubifs_info *c)
                        goto out_cbuf;
                /* Create background thread */
-                c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
+                c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
                if (IS_ERR(c->bgt)) {
                        err = PTR_ERR(c->bgt);
                        c->bgt = NULL;
@@ -1318,11 +1321,15 @@ static int mount_ubifs(struct ubifs_info *c)
                else {
                        c->need_recovery = 0;
                        ubifs_msg("recovery completed");
-                        /* GC LEB has to be empty and taken at this point */
+                        /*
-                        ubifs_assert(c->lst.taken_empty_lebs == 1);
+                         * GC LEB has to be empty and taken at this point. But
+                         * the journal head LEBs may also be accounted as
+                         * "empty taken" if they are empty.
+                         */
+                        ubifs_assert(c->lst.taken_empty_lebs > 0);
                }
        } else
-                ubifs_assert(c->lst.taken_empty_lebs == 1);
+                ubifs_assert(c->lst.taken_empty_lebs > 0);
        err = dbg_check_filesystem(c);
        if (err)
@@ -1344,8 +1351,9 @@ static int mount_ubifs(struct ubifs_info *c)
        x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
        ubifs_msg("journal size:       %lld bytes (%lld KiB, %lld MiB, %d "
                  "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
-        ubifs_msg("media format:       %d (latest is %d)",
+        ubifs_msg("media format:       w%d/r%d (latest is w%d/r%d)",
-                  c->fmt_version, UBIFS_FORMAT_VERSION);
+                  c->fmt_version, c->ro_compat_version,
+                  UBIFS_FORMAT_VERSION, UBIFS_RO_COMPAT_VERSION);
        ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
        ubifs_msg("reserved for root:  %llu bytes (%llu KiB)",
                c->report_rp_size, c->report_rp_size >> 10);
@@ -1485,6 +1493,15 @@ static int ubifs_remount_rw(struct ubifs_info *c)
 {
        int err, lnum;
+        if (c->rw_incompat) {
+                ubifs_err("the file-system is not R/W-compatible");
+                ubifs_msg("on-flash format version is w%d/r%d, but software "
+                          "only supports up to version w%d/r%d", c->fmt_version,
+                          c->ro_compat_version, UBIFS_FORMAT_VERSION,
+                          UBIFS_RO_COMPAT_VERSION);
+                return -EROFS;
+        }
        mutex_lock(&c->umount_mutex);
        dbg_save_space_info(c);
        c->remounting_rw = 1;
@@ -1554,7 +1571,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
        ubifs_create_buds_lists(c);
        /* Create background thread */
-        c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
+        c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
        if (IS_ERR(c->bgt)) {
                err = PTR_ERR(c->bgt);
                c->bgt = NULL;
@@ -1775,7 +1792,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
                c->bu.buf = NULL;
        }
-        ubifs_assert(c->lst.taken_empty_lebs == 1);
+        ubifs_assert(c->lst.taken_empty_lebs > 0);
        return 0;
 }
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index fa28a84c6a1b..f249f7b0d656 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -1252,7 +1252,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
         * splitting in the middle of the colliding sequence. Also, when
         * removing the leftmost key, we would have to correct the key of the
         * parent node, which would introduce additional complications. Namely,
-         * if we changed the the leftmost key of the parent znode, the garbage
+         * if we changed the leftmost key of the parent znode, the garbage
         * collector would be unable to find it (GC is doing this when GC'ing
         * indexing LEBs). Although we already have an additional RB-tree where
         * we save such changed znodes (see 'ins_clr_old_idx_znode()') until
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index b25fc36cf72f..3eee07e0c495 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -36,9 +36,31 @@
 /* UBIFS node magic number (must not have the padding byte first or last) */
 #define UBIFS_NODE_MAGIC  0x06101831
-/* UBIFS on-flash format version */
+/*
+ * UBIFS on-flash format version. This version is increased when the on-flash
+ * format is changing. If this happens, UBIFS is will support older versions as
+ * well. But older UBIFS code will not support newer formats. Format changes
+ * will be rare and only when absolutely necessary, e.g. to fix a bug or to add
+ * a new feature.
+ *
+ * UBIFS went into mainline kernel with format version 4. The older formats
+ * were development formats.
+ */
 #define UBIFS_FORMAT_VERSION 4
+/*
+ * Read-only compatibility version. If the UBIFS format is changed, older UBIFS
+ * implementations will not be able to mount newer formats in read-write mode.
+ * However, depending on the change, it may be possible to mount newer formats
+ * in R/O mode. This is indicated by the R/O compatibility version which is
+ * stored in the super-block.
+ *
+ * This is needed to support boot-loaders which only need R/O mounting. With
+ * this flag it is possible to do UBIFS format changes without a need to update
+ * boot-loaders.
+ */
+#define UBIFS_RO_COMPAT_VERSION 0
 /* Minimum logical eraseblock size in bytes */
 #define UBIFS_MIN_LEB_SZ (15*1024)
@@ -53,7 +75,7 @@
 /*
 * If compressed data length is less than %UBIFS_MIN_COMPRESS_DIFF bytes
- * shorter than uncompressed data length, UBIFS preferes to leave this data
+ * shorter than uncompressed data length, UBIFS prefers to leave this data
 * node uncompress, because it'll be read faster.
 */
 #define UBIFS_MIN_COMPRESS_DIFF 64
@@ -586,6 +608,7 @@ struct ubifs_pad_node {
 * @padding2: reserved for future, zeroes
 * @time_gran: time granularity in nanoseconds
 * @uuid: UUID generated when the file system image was created
+ * @ro_compat_version: UBIFS R/O compatibility version
 */
 struct ubifs_sb_node {
        struct ubifs_ch ch;
@@ -612,7 +635,8 @@ struct ubifs_sb_node {
        __le64 rp_size;
        __le32 time_gran;
        __u8 uuid[16];
-        __u8 padding2[3972];
+        __le32 ro_compat_version;
+        __u8 padding2[3968];
 } __attribute__ ((packed));
 /**
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 039a68bee29a..0a8341e14088 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -934,6 +934,7 @@ struct ubifs_debug_info;
 *          by @commit_sem
 * @cnt_lock: protects @highest_inum and @max_sqnum counters
 * @fmt_version: UBIFS on-flash format version
+ * @ro_compat_version: R/O compatibility version
 * @uuid: UUID from super block
 *
 * @lhead_lnum: log head logical eraseblock number
@@ -966,6 +967,7 @@ struct ubifs_debug_info;
 *                   recovery)
 * @bulk_read: enable bulk-reads
 * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
+ * @rw_incompat: the media is not R/W compatible
 *
 * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
 *             @calc_idx_sz
@@ -1015,6 +1017,8 @@ struct ubifs_debug_info;
 * @min_io_shift: number of bits in @min_io_size minus one
 * @leb_size: logical eraseblock size in bytes
 * @half_leb_size: half LEB size
+ * @idx_leb_size: how many bytes of an LEB are effectively available when it is
+ *                used to store indexing nodes (@leb_size - @max_idx_node_sz)
 * @leb_cnt: count of logical eraseblocks
 * @max_leb_cnt: maximum count of logical eraseblocks
 * @old_leb_cnt: count of logical eraseblocks before re-size
@@ -1132,8 +1136,8 @@ struct ubifs_debug_info;
 *             previous commit start
 * @uncat_list: list of un-categorized LEBs
 * @empty_list: list of empty LEBs
- * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size)
+ * @freeable_list: list of freeable non-index LEBs (free + dirty == @leb_size)
- * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size)
+ * @frdi_idx_list: list of freeable index LEBs (free + dirty == @leb_size)
 * @freeable_cnt: number of freeable LEBs in @freeable_list
 *
 * @ltab_lnum: LEB number of LPT's own lprops table
@@ -1177,6 +1181,7 @@ struct ubifs_info {
        unsigned long long cmt_no;
        spinlock_t cnt_lock;
        int fmt_version;
+        int ro_compat_version;
        unsigned char uuid[16];
        int lhead_lnum;
@@ -1205,6 +1210,7 @@ struct ubifs_info {
        unsigned int no_chk_data_crc:1;
        unsigned int bulk_read:1;
        unsigned int default_compr:2;
+        unsigned int rw_incompat:1;
        struct mutex tnc_mutex;
        struct ubifs_zbranch zroot;
@@ -1253,6 +1259,7 @@ struct ubifs_info {
        int min_io_shift;
        int leb_size;
        int half_leb_size;
+        int idx_leb_size;
        int leb_cnt;
        int max_leb_cnt;
        int old_leb_cnt;
@@ -1500,7 +1507,7 @@ long long ubifs_reported_space(const struct ubifs_info *c, long long free);
 long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
 /* find.c */
-int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
+int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
                          int squeeze);
 int ubifs_find_free_leb_for_idx(struct ubifs_info *c);
 int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
author	Linus Torvalds <torvalds@linux-foundation.org>	2009-04-06 18:00:19 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2009-04-06 18:00:19 -0400
commit	e0724bf6e4a1f2e678d2b2aab01cae22e17862f0 (patch)
tree	559a8fa8e7a92f8ae0e0a27d4e71f408fa7cec62 /fs/ubifs
parent	38d9aefb5ce8f26358b0d5cd933cfa9e267105b1 (diff)
parent	de0975781a1a8bc92e07eb7681d10ef9bb5e6df9 (diff)