59 files changed, 1348 insertions, 504 deletions
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index f039b104a98e..b03dd23feda8 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -43,23 +43,6 @@
 #include "fid.h"
 /**
- * v9fs_dentry_delete - called when dentry refcount equals 0
- * @dentry:  dentry in question
- *
- * By returning 1 here we should remove cacheing of unused
- * dentry components.
- *
- */
-static int v9fs_dentry_delete(const struct dentry *dentry)
-{
-        p9_debug(P9_DEBUG_VFS, " dentry: %s (%p)\n",
-                 dentry->d_name.name, dentry);
-        return 1;
-}
-/**
 * v9fs_cached_dentry_delete - called when dentry refcount equals 0
 * @dentry:  dentry in question
 *
@@ -134,6 +117,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = {
 };
 const struct dentry_operations v9fs_dentry_operations = {
-        .d_delete = v9fs_dentry_delete,
+        .d_delete = always_delete_dentry,
        .d_release = v9fs_dentry_release,
 };
diff --git a/fs/bio.c b/fs/bio.c
index 2bdb4e25ee77..33d79a4eb92d 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -601,7 +601,7 @@ EXPORT_SYMBOL(bio_get_nr_vecs);
 static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
                          *page, unsigned int len, unsigned int offset,
-                          unsigned short max_sectors)
+                          unsigned int max_sectors)
 {
        int retried_segments = 0;
        struct bio_vec *bvec;
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index f9d5094e1029..aa976eced2d2 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -9,12 +9,17 @@ config BTRFS_FS
        select XOR_BLOCKS
        help
-          Btrfs is a new filesystem with extents, writable snapshotting,
+          Btrfs is a general purpose copy-on-write filesystem with extents,
-          support for multiple devices and many more features.
+          writable snapshotting, support for multiple devices and many more
+          features focused on fault tolerance, repair and easy administration.
-          Btrfs is highly experimental, and THE DISK FORMAT IS NOT YET
+          The filesystem disk format is no longer unstable, and it's not
-          FINALIZED.  You should say N here unless you are interested in
+          expected to change unless there are strong reasons to do so. If there
-          testing Btrfs with non-critical data.
+          is a format change, file systems with a unchanged format will
+          continue to be mountable and usable by newer kernels.
+          For more information, please see the web pages at
+          http://btrfs.wiki.kernel.org.
          To compile this file system support as a module, choose M here. The
          module will be called btrfs.
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 8aec751fa464..c1e0b0caf9cc 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -495,6 +495,7 @@ static int __btrfs_start_workers(struct btrfs_workers *workers)
        spin_lock_irq(&workers->lock);
        if (workers->stopping) {
                spin_unlock_irq(&workers->lock);
+                ret = -EINVAL;
                goto fail_kthread;
        }
        list_add_tail(&worker->worker_list, &workers->idle_list);
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index e0aab4456974..b50764bef141 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -77,6 +77,15 @@
 * the integrity of (super)-block write requests, do not
 * enable the config option BTRFS_FS_CHECK_INTEGRITY to
 * include and compile the integrity check tool.
+ *
+ * Expect millions of lines of information in the kernel log with an
+ * enabled check_int_print_mask. Therefore set LOG_BUF_SHIFT in the
+ * kernel config to at least 26 (which is 64MB). Usually the value is
+ * limited to 21 (which is 2MB) in init/Kconfig. The file needs to be
+ * changed like this before LOG_BUF_SHIFT can be set to a high value:
+ * config LOG_BUF_SHIFT
+ *       int "Kernel log buffer size (16 => 64KB, 17 => 128KB)"
+ *       range 12 30
 */
 #include <linux/sched.h>
@@ -124,6 +133,7 @@
 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE                     0x00000400
 #define BTRFSIC_PRINT_MASK_NUM_COPIES                           0x00000800
 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS                0x00001000
+#define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE                0x00002000
 struct btrfsic_dev_state;
 struct btrfsic_state;
@@ -3015,6 +3025,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
            (rw & WRITE) && NULL != bio->bi_io_vec) {
                unsigned int i;
                u64 dev_bytenr;
+                u64 cur_bytenr;
                int bio_is_patched;
                char **mapped_datav;
@@ -3033,6 +3044,7 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
                                       GFP_NOFS);
                if (!mapped_datav)
                        goto leave;
+                cur_bytenr = dev_bytenr;
                for (i = 0; i < bio->bi_vcnt; i++) {
                        BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
                        mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
@@ -3044,16 +3056,13 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
                                kfree(mapped_datav);
                                goto leave;
                        }
-                        if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
+                        if (dev_state->state->print_mask &
-                             BTRFSIC_PRINT_MASK_VERBOSE) ==
+                            BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH_VERBOSE)
-                            (dev_state->state->print_mask &
-                             (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
-                              BTRFSIC_PRINT_MASK_VERBOSE)))
                                printk(KERN_INFO
-                                       "#%u: page=%p, len=%u, offset=%u\n",
+                                       "#%u: bytenr=%llu, len=%u, offset=%u\n",
-                                       i, bio->bi_io_vec[i].bv_page,
+                                       i, cur_bytenr, bio->bi_io_vec[i].bv_len,
-                                       bio->bi_io_vec[i].bv_len,
                                       bio->bi_io_vec[i].bv_offset);
+                        cur_bytenr += bio->bi_io_vec[i].bv_len;
                }
                btrfsic_process_written_block(dev_state, dev_bytenr,
                                              mapped_datav, bio->bi_vcnt,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f9aeb2759a64..54ab86127f7a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3613,9 +3613,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
                           struct btrfs_ordered_sum *sums);
 int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
                       struct bio *bio, u64 file_start, int contig);
-int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
-                        struct btrfs_root *root, struct btrfs_path *path,
-                        u64 isize);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
                             struct list_head *list, int search_commit);
 /* inode.c */
@@ -3744,9 +3741,6 @@ void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
 int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
 void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                             int skip_pinned);
-int btrfs_replace_extent_cache(struct inode *inode, struct extent_map *replace,
-                               u64 start, u64 end, int skip_pinned,
-                               int modified);
 extern const struct file_operations btrfs_file_operations;
 int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root, struct inode *inode,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 342f9fd411e3..2cfc3dfff64f 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -366,7 +366,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
        dev_replace->tgtdev = tgt_device;
        printk_in_rcu(KERN_INFO
-                      "btrfs: dev_replace from %s (devid %llu) to %s) started\n",
+                      "btrfs: dev_replace from %s (devid %llu) to %s started\n",
                      src_device->missing ? "<missing disk>" :
                        rcu_str_deref(src_device->name),
                      src_device->devid,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4c4ed0bb3da1..8072cfa8a3b1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3517,7 +3517,6 @@ int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
 int btrfs_commit_super(struct btrfs_root *root)
 {
        struct btrfs_trans_handle *trans;
-        int ret;
        mutex_lock(&root->fs_info->cleaner_mutex);
        btrfs_run_delayed_iputs(root);
@@ -3531,25 +3530,7 @@ int btrfs_commit_super(struct btrfs_root *root)
        trans = btrfs_join_transaction(root);
        if (IS_ERR(trans))
                return PTR_ERR(trans);
-        ret = btrfs_commit_transaction(trans, root);
+        return btrfs_commit_transaction(trans, root);
-        if (ret)
-                return ret;
-        /* run commit again to drop the original snapshot */
-        trans = btrfs_join_transaction(root);
-        if (IS_ERR(trans))
-                return PTR_ERR(trans);
-        ret = btrfs_commit_transaction(trans, root);
-        if (ret)
-                return ret;
-        ret = btrfs_write_and_wait_transaction(NULL, root);
-        if (ret) {
-                btrfs_error(root->fs_info, ret,
-                            "Failed to sync btree inode to disk.");
-                return ret;
-        }
-        ret = write_ctree_super(NULL, root, 0);
-        return ret;
 }
 int close_ctree(struct btrfs_root *root)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 856bc2b2192c..8e457fca0a0b 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1980,6 +1980,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
        struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
        int ret;
+        ASSERT(!(fs_info->sb->s_flags & MS_RDONLY));
        BUG_ON(!mirror_num);
        /* we can't repair anything in raid56 yet */
@@ -2036,6 +2037,9 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
        unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
        int ret = 0;
+        if (root->fs_info->sb->s_flags & MS_RDONLY)
+                return -EROFS;
        for (i = 0; i < num_pages; i++) {
                struct page *p = extent_buffer_page(eb, i);
                ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE,
@@ -2057,12 +2061,12 @@ static int clean_io_failure(u64 start, struct page *page)
        u64 private;
        u64 private_failure;
        struct io_failure_record *failrec;
-        struct btrfs_fs_info *fs_info;
+        struct inode *inode = page->mapping->host;
+        struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
        struct extent_state *state;
        int num_copies;
        int did_repair = 0;
        int ret;
-        struct inode *inode = page->mapping->host;
        private = 0;
        ret = count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
@@ -2085,6 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page)
                did_repair = 1;
                goto out;
        }
+        if (fs_info->sb->s_flags & MS_RDONLY)
+                goto out;
        spin_lock(&BTRFS_I(inode)->io_tree.lock);
        state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
@@ -2094,7 +2100,6 @@ static int clean_io_failure(u64 start, struct page *page)
        if (state && state->start <= failrec->start &&
            state->end >= failrec->start + failrec->len - 1) {
-                fs_info = BTRFS_I(inode)->root->fs_info;
                num_copies = btrfs_num_copies(fs_info, failrec->logical,
                                              failrec->len);
                if (num_copies > 1)  {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index da8d2f696ac5..f1a77449d032 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2129,7 +2129,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
                                                  old->extent_offset, fs_info,
                                                  path, record_one_backref,
                                                  old);
-                BUG_ON(ret < 0 && ret != -ENOENT);
+                if (ret < 0 && ret != -ENOENT)
+                        return false;
                /* no backref to be processed for this extent */
                if (!old->count) {
@@ -6186,8 +6187,7 @@ insert:
        write_unlock(&em_tree->lock);
 out:
-        if (em)
+        trace_btrfs_get_extent(root, em);
-                trace_btrfs_get_extent(root, em);
        if (path)
                btrfs_free_path(path);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 25a8f3812f14..69582d5b69d1 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -638,6 +638,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr)
                        WARN_ON(nr < 0);
                }
        }
+        list_splice_tail(&splice, &fs_info->ordered_roots);
        spin_unlock(&fs_info->ordered_root_lock);
 }
@@ -803,7 +804,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
                        btrfs_put_ordered_extent(ordered);
                        break;
                }
-                if (ordered->file_offset + ordered->len < start) {
+                if (ordered->file_offset + ordered->len <= start) {
                        btrfs_put_ordered_extent(ordered);
                        break;
                }
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 2544805544f0..561e2f16ba3e 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -938,8 +938,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check)
                                BTRFS_DEV_STAT_CORRUPTION_ERRS);
        }
-        if (sctx->readonly && !sctx->is_dev_replace)
+        if (sctx->readonly) {
-                goto did_not_correct_error;
+                ASSERT(!sctx->is_dev_replace);
+                goto out;
+        }
        if (!is_metadata && !have_csum) {
                struct scrub_fixup_nodatasum *fixup_nodatasum;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 57c16b46afbd..c6a872a8a468 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1480,7 +1480,7 @@ static void do_async_commit(struct work_struct *work)
         * We've got freeze protection passed with the transaction.
         * Tell lockdep about it.
         */
-        if (ac->newtrans->type < TRANS_JOIN_NOLOCK)
+        if (ac->newtrans->type & __TRANS_FREEZABLE)
                rwsem_acquire_read(
                     &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
                     0, 1, _THIS_IP_);
@@ -1521,7 +1521,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
         * Tell lockdep we've released the freeze rwsem, since the
         * async commit thread will be the one to unlock it.
         */
-        if (trans->type < TRANS_JOIN_NOLOCK)
+        if (ac->newtrans->type & __TRANS_FREEZABLE)
                rwsem_release(
                        &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
                        1, _THIS_IP_);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 744553c83fe2..9f7fc51ca334 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3697,7 +3697,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
                        ret = btrfs_truncate_inode_items(trans, log,
                                                         inode, 0, 0);
                } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
-                                              &BTRFS_I(inode)->runtime_flags)) {
+                                              &BTRFS_I(inode)->runtime_flags) ||
+                           inode_only == LOG_INODE_EXISTS) {
                        if (inode_only == LOG_INODE_ALL)
                                fast_search = true;
                        max_key.type = BTRFS_XATTR_ITEM_KEY;
@@ -3801,7 +3802,7 @@ log_extents:
                        err = ret;
                        goto out_unlock;
                }
-        } else {
+        } else if (inode_only == LOG_INODE_ALL) {
                struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
                struct extent_map *em, *n;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0db637097862..92303f42baaa 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5394,7 +5394,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio,
 {
        struct bio_vec *prev;
        struct request_queue *q = bdev_get_queue(bdev);
-        unsigned short max_sectors = queue_max_sectors(q);
+        unsigned int max_sectors = queue_max_sectors(q);
        struct bvec_merge_data bvm = {
                .bi_bdev = bdev,
                .bi_sector = sector,
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 277bd1be21fd..e081acbac2e7 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -56,29 +56,28 @@ static void configfs_d_iput(struct dentry * dentry,
        struct configfs_dirent *sd = dentry->d_fsdata;
        if (sd) {
-                BUG_ON(sd->s_dentry != dentry);
                /* Coordinate with configfs_readdir */
                spin_lock(&configfs_dirent_lock);
-                sd->s_dentry = NULL;
+                /* Coordinate with configfs_attach_attr where will increase
+                 * sd->s_count and update sd->s_dentry to new allocated one.
+                 * Only set sd->dentry to null when this dentry is the only
+                 * sd owner.
+                 * If not do so, configfs_d_iput may run just after
+                 * configfs_attach_attr and set sd->s_dentry to null
+                 * even it's still in use.
+                 */
+                if (atomic_read(&sd->s_count) <= 2)
+                        sd->s_dentry = NULL;
                spin_unlock(&configfs_dirent_lock);
                configfs_put(sd);
        }
        iput(inode);
 }
-/*
- * We _must_ delete our dentries on last dput, as the chain-to-parent
- * behavior is required to clear the parents of default_groups.
- */
-static int configfs_d_delete(const struct dentry *dentry)
-{
-        return 1;
-}
 const struct dentry_operations configfs_dentry_ops = {
        .d_iput         = configfs_d_iput,
-        /* simple_delete_dentry() isn't exported */
+        .d_delete       = always_delete_dentry,
-        .d_delete       = configfs_d_delete,
 };
 #ifdef CONFIG_LOCKDEP
@@ -426,8 +425,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
        struct configfs_attribute * attr = sd->s_element;
        int error;
+        spin_lock(&configfs_dirent_lock);
        dentry->d_fsdata = configfs_get(sd);
        sd->s_dentry = dentry;
+        spin_unlock(&configfs_dirent_lock);
        error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG,
                                configfs_init_file);
        if (error) {
diff --git a/fs/coredump.c b/fs/coredump.c
index 62406b6959b6..bc3fbcd32558 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -695,7 +695,7 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
        while (nr) {
                if (dump_interrupted())
                        return 0;
-                n = vfs_write(file, addr, nr, &pos);
+                n = __kernel_write(file, addr, nr, &pos);
                if (n <= 0)
                        return 0;
                file->f_pos = pos;
@@ -733,7 +733,7 @@ int dump_align(struct coredump_params *cprm, int align)
 {
        unsigned mod = cprm->written & (align - 1);
        if (align & (align - 1))
-                return -EINVAL;
+                return 0;
-        return mod ? dump_skip(cprm, align - mod) : 0;
+        return mod ? dump_skip(cprm, align - mod) : 1;
 }
 EXPORT_SYMBOL(dump_align);
diff --git a/fs/dcache.c b/fs/dcache.c
index 0a38ef8d7f00..4bdb300b16e2 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -88,35 +88,6 @@ EXPORT_SYMBOL(rename_lock);
 static struct kmem_cache *dentry_cache __read_mostly;
-/**
- * read_seqbegin_or_lock - begin a sequence number check or locking block
- * @lock: sequence lock
- * @seq : sequence number to be checked
- *
- * First try it once optimistically without taking the lock. If that fails,
- * take the lock. The sequence number is also used as a marker for deciding
- * whether to be a reader (even) or writer (odd).
- * N.B. seq must be initialized to an even number to begin with.
- */
-static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
-{
-        if (!(*seq & 1))        /* Even */
-                *seq = read_seqbegin(lock);
-        else                    /* Odd */
-                read_seqlock_excl(lock);
-}
-static inline int need_seqretry(seqlock_t *lock, int seq)
-{
-        return !(seq & 1) && read_seqretry(lock, seq);
-}
-static inline void done_seqretry(seqlock_t *lock, int seq)
-{
-        if (seq & 1)
-                read_sequnlock_excl(lock);
-}
 /*
 * This is the single most critical data structure when it comes
 * to the dcache: the hashtable for lookups. Somebody should try
@@ -125,8 +96,6 @@ static inline void done_seqretry(seqlock_t *lock, int seq)
 * This hash-function tries to avoid losing too many bits of hash
 * information, yet avoid using a prime hash-size or similar.
 */
-#define D_HASHBITS     d_hash_shift
-#define D_HASHMASK     d_hash_mask
 static unsigned int d_hash_mask __read_mostly;
 static unsigned int d_hash_shift __read_mostly;
@@ -137,8 +106,8 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
                                        unsigned int hash)
 {
        hash += (unsigned long) parent / L1_CACHE_BYTES;
-        hash = hash + (hash >> D_HASHBITS);
+        hash = hash + (hash >> d_hash_shift);
-        return dentry_hashtable + (hash & D_HASHMASK);
+        return dentry_hashtable + (hash & d_hash_mask);
 }
 /* Statistics gathering. */
@@ -469,7 +438,7 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
 {
        list_del(&dentry->d_u.d_child);
        /*
-         * Inform try_to_ascend() that we are no longer attached to the
+         * Inform d_walk() that we are no longer attached to the
         * dentry tree
         */
        dentry->d_flags |= DCACHE_DENTRY_KILLED;
@@ -1069,34 +1038,6 @@ void shrink_dcache_sb(struct super_block *sb)
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
-/*
- * This tries to ascend one level of parenthood, but
- * we can race with renaming, so we need to re-check
- * the parenthood after dropping the lock and check
- * that the sequence number still matches.
- */
-static struct dentry *try_to_ascend(struct dentry *old, unsigned seq)
-{
-        struct dentry *new = old->d_parent;
-        rcu_read_lock();
-        spin_unlock(&old->d_lock);
-        spin_lock(&new->d_lock);
-        /*
-         * might go back up the wrong parent if we have had a rename
-         * or deletion
-         */
-        if (new != old->d_parent ||
-                 (old->d_flags & DCACHE_DENTRY_KILLED) ||
-                 need_seqretry(&rename_lock, seq)) {
-                spin_unlock(&new->d_lock);
-                new = NULL;
-        }
-        rcu_read_unlock();
-        return new;
-}
 /**
 * enum d_walk_ret - action to talke during tree walk
 * @D_WALK_CONTINUE:    contrinue walk
@@ -1185,9 +1126,24 @@ resume:
         */
        if (this_parent != parent) {
                struct dentry *child = this_parent;
-                this_parent = try_to_ascend(this_parent, seq);
+                this_parent = child->d_parent;
-                if (!this_parent)
+                rcu_read_lock();
+                spin_unlock(&child->d_lock);
+                spin_lock(&this_parent->d_lock);
+                /*
+                 * might go back up the wrong parent if we have had a rename
+                 * or deletion
+                 */
+                if (this_parent != child->d_parent ||
+                         (child->d_flags & DCACHE_DENTRY_KILLED) ||
+                         need_seqretry(&rename_lock, seq)) {
+                        spin_unlock(&this_parent->d_lock);
+                        rcu_read_unlock();
                        goto rename_retry;
+                }
+                rcu_read_unlock();
                next = child->d_u.d_child.next;
                goto resume;
        }
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index 60a327863b11..e7cfbaf8d0e2 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -74,14 +74,16 @@ static int user_cmd(struct sk_buff *skb, struct genl_info *info)
        return 0;
 }
-static struct genl_ops dlm_nl_ops = {
+static struct genl_ops dlm_nl_ops[] = {
-        .cmd            = DLM_CMD_HELLO,
+        {
-        .doit           = user_cmd,
+                .cmd    = DLM_CMD_HELLO,
+                .doit   = user_cmd,
+        },
 };
 int __init dlm_netlink_init(void)
 {
-        return genl_register_family_with_ops(&family, &dlm_nl_ops, 1);
+        return genl_register_family_with_ops(&family, dlm_nl_ops);
 }
 void dlm_netlink_exit(void)
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index a8766b880c07..becc725a1953 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -83,19 +83,10 @@ static int efivarfs_d_hash(const struct dentry *dentry, struct qstr *qstr)
        return 0;
 }
-/*
- * Retaining negative dentries for an in-memory filesystem just wastes
- * memory and lookup time: arrange for them to be deleted immediately.
- */
-static int efivarfs_delete_dentry(const struct dentry *dentry)
-{
-        return 1;
-}
 static struct dentry_operations efivarfs_d_ops = {
        .d_compare = efivarfs_d_compare,
        .d_hash = efivarfs_d_hash,
-        .d_delete = efivarfs_delete_dentry,
+        .d_delete = always_delete_dentry,
 };
 static struct dentry *efivarfs_alloc_dentry(struct dentry *parent, char *name)
diff --git a/fs/exec.c b/fs/exec.c
index 977319fd77f3..7ea097f6b341 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1380,10 +1380,6 @@ int search_binary_handler(struct linux_binprm *bprm)
        if (retval)
                return retval;
-        retval = audit_bprm(bprm);
-        if (retval)
-                return retval;
        retval = -ENOENT;
 retry:
        read_lock(&binfmt_lock);
@@ -1431,6 +1427,7 @@ static int exec_binprm(struct linux_binprm *bprm)
        ret = search_binary_handler(bprm);
        if (ret >= 0) {
+                audit_bprm(bprm);
                trace_sched_process_exec(current, old_pid, bprm);
                ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
                current->did_exec = 1;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index e66a8009aff1..c8420f7e4db6 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1899,7 +1899,8 @@ static int gfs2_glock_iter_next(struct gfs2_glock_iter *gi)
                        gi->nhash = 0;
                }
        /* Skip entries for other sb and dead entries */
-        } while (gi->sdp != gi->gl->gl_sbd || __lockref_is_dead(&gl->gl_lockref));
+        } while (gi->sdp != gi->gl->gl_sbd ||
+                 __lockref_is_dead(&gi->gl->gl_lockref));
        return 0;
 }
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 1615df16cf4e..7119504159f1 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1171,8 +1171,11 @@ static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry,
        if (d != NULL)
                dentry = d;
        if (dentry->d_inode) {
-                if (!(*opened & FILE_OPENED))
+                if (!(*opened & FILE_OPENED)) {
+                        if (d == NULL)
+                                dget(dentry);
                        return finish_no_open(file, dentry);
+                }
                dput(d);
                return 0;
        }
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index c8423d6de6c3..2a6ba06bee6f 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -466,19 +466,19 @@ static void gdlm_cancel(struct gfs2_glock *gl)
 static void control_lvb_read(struct lm_lockstruct *ls, uint32_t *lvb_gen,
                             char *lvb_bits)
 {
-        uint32_t gen;
+        __le32 gen;
        memcpy(lvb_bits, ls->ls_control_lvb, GDLM_LVB_SIZE);
-        memcpy(&gen, lvb_bits, sizeof(uint32_t));
+        memcpy(&gen, lvb_bits, sizeof(__le32));
        *lvb_gen = le32_to_cpu(gen);
 }
 static void control_lvb_write(struct lm_lockstruct *ls, uint32_t lvb_gen,
                              char *lvb_bits)
 {
-        uint32_t gen;
+        __le32 gen;
        memcpy(ls->ls_control_lvb, lvb_bits, GDLM_LVB_SIZE);
        gen = cpu_to_le32(lvb_gen);
-        memcpy(ls->ls_control_lvb, &gen, sizeof(uint32_t));
+        memcpy(ls->ls_control_lvb, &gen, sizeof(__le32));
 }
 static int all_jid_bits_clear(char *lvb)
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 453b50eaddec..98236d0df3ca 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -667,7 +667,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
        struct buffer_head *bh;
        struct page *page;
        void *kaddr, *ptr;
-        struct gfs2_quota q, *qp;
+        struct gfs2_quota q;
        int err, nbytes;
        u64 size;
@@ -683,28 +683,25 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
                return err;
        err = -EIO;
-        qp = &q;
+        be64_add_cpu(&q.qu_value, change);
-        qp->qu_value = be64_to_cpu(qp->qu_value);
+        qd->qd_qb.qb_value = q.qu_value;
-        qp->qu_value += change;
-        qp->qu_value = cpu_to_be64(qp->qu_value);
-        qd->qd_qb.qb_value = qp->qu_value;
        if (fdq) {
                if (fdq->d_fieldmask & FS_DQ_BSOFT) {
-                        qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
+                        q.qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
-                        qd->qd_qb.qb_warn = qp->qu_warn;
+                        qd->qd_qb.qb_warn = q.qu_warn;
                }
                if (fdq->d_fieldmask & FS_DQ_BHARD) {
-                        qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
+                        q.qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
-                        qd->qd_qb.qb_limit = qp->qu_limit;
+                        qd->qd_qb.qb_limit = q.qu_limit;
                }
                if (fdq->d_fieldmask & FS_DQ_BCOUNT) {
-                        qp->qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
+                        q.qu_value = cpu_to_be64(fdq->d_bcount >> sdp->sd_fsb2bb_shift);
-                        qd->qd_qb.qb_value = qp->qu_value;
+                        qd->qd_qb.qb_value = q.qu_value;
                }
        }
        /* Write the quota into the quota file on disk */
-        ptr = qp;
+        ptr = &q;
        nbytes = sizeof(struct gfs2_quota);
 get_a_page:
        page = find_or_create_page(mapping, index, GFP_NOFS);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 4d83abdd5635..c8d6161bd682 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1127,7 +1127,7 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
                rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
                rgd->rd_free_clone = rgd->rd_free;
        }
-        if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
+        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) {
                rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd));
                gfs2_rgrp_ondisk2lvb(rgd->rd_rgl,
                                     rgd->rd_bits[0].bi_bh->b_data);
@@ -1161,7 +1161,7 @@ int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
        if (rgd->rd_flags & GFS2_RDF_UPTODATE)
                return 0;
-        if (be32_to_cpu(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
+        if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic)
                return gfs2_rgrp_bh_get(rgd);
        rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 25437280a207..db23ce1bd903 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -33,15 +33,6 @@ static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
 #define FILE_HOSTFS_I(file) HOSTFS_I(file_inode(file))
-static int hostfs_d_delete(const struct dentry *dentry)
-{
-        return 1;
-}
-static const struct dentry_operations hostfs_dentry_ops = {
-        .d_delete               = hostfs_d_delete,
-};
 /* Changed in hostfs_args before the kernel starts running */
 static char *root_ino = "";
 static int append = 0;
@@ -925,7 +916,7 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
        sb->s_blocksize_bits = 10;
        sb->s_magic = HOSTFS_SUPER_MAGIC;
        sb->s_op = &hostfs_sbops;
-        sb->s_d_op = &hostfs_dentry_ops;
+        sb->s_d_op = &simple_dentry_operations;
        sb->s_maxbytes = MAX_LFS_FILESIZE;
        /* NULL is printed as <NULL> by sprintf: avoid that. */
diff --git a/fs/libfs.c b/fs/libfs.c
index 5de06947ba5e..a1844244246f 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -47,10 +47,16 @@ EXPORT_SYMBOL(simple_statfs);
 * Retaining negative dentries for an in-memory filesystem just wastes
 * memory and lookup time: arrange for them to be deleted immediately.
 */
-static int simple_delete_dentry(const struct dentry *dentry)
+int always_delete_dentry(const struct dentry *dentry)
 {
        return 1;
 }
+EXPORT_SYMBOL(always_delete_dentry);
+const struct dentry_operations simple_dentry_operations = {
+        .d_delete = always_delete_dentry,
+};
+EXPORT_SYMBOL(simple_dentry_operations);
 /*
 * Lookup the data. This is trivial - if the dentry didn't already
@@ -58,10 +64,6 @@ static int simple_delete_dentry(const struct dentry *dentry)
 */
 struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
 {
-        static const struct dentry_operations simple_dentry_operations = {
-                .d_delete = simple_delete_dentry,
-        };
        if (dentry->d_name.len > NAME_MAX)
                return ERR_PTR(-ENAMETOOLONG);
        if (!dentry->d_sb->s_d_op)
diff --git a/fs/namei.c b/fs/namei.c
index e029a4cbff7d..8f77a8cea289 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2435,6 +2435,7 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
 */
 static inline int may_create(struct inode *dir, struct dentry *child)
 {
+        audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
        if (child->d_inode)
                return -EEXIST;
        if (IS_DEADDIR(dir))
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 1485e38daaa3..03c8d747be48 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1151,10 +1151,16 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
                goto out_free_page;
        }
-        kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
-        if (!uid_valid(kloginuid)) {
+        /* is userspace tring to explicitly UNSET the loginuid? */
-                length = -EINVAL;
+        if (loginuid == AUDIT_UID_UNSET) {
-                goto out_free_page;
+                kloginuid = INVALID_UID;
+        } else {
+                kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
+                if (!uid_valid(kloginuid)) {
+                        length = -EINVAL;
+                        goto out_free_page;
+                }
        }
        length = audit_set_loginuid(kloginuid);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 737e15615b04..cca93b6fb9a9 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -175,22 +175,6 @@ static const struct inode_operations proc_link_inode_operations = {
 };
 /*
- * As some entries in /proc are volatile, we want to 
- * get rid of unused dentries.  This could be made 
- * smarter: we could keep a "volatile" flag in the 
- * inode to indicate which ones to keep.
- */
-static int proc_delete_dentry(const struct dentry * dentry)
-{
-        return 1;
-}
-static const struct dentry_operations proc_dentry_operations =
-{
-        .d_delete       = proc_delete_dentry,
-};
-/*
 * Don't create negative dentries here, return -ENOENT by hand
 * instead.
 */
@@ -209,7 +193,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
                        inode = proc_get_inode(dir->i_sb, de);
                        if (!inode)
                                return ERR_PTR(-ENOMEM);
-                        d_set_d_op(dentry, &proc_dentry_operations);
+                        d_set_d_op(dentry, &simple_dentry_operations);
                        d_add(dentry, inode);
                        return NULL;
                }
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 49a7fff2e83a..9ae46b87470d 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -42,12 +42,6 @@ static const struct inode_operations ns_inode_operations = {
        .setattr        = proc_setattr,
 };
-static int ns_delete_dentry(const struct dentry *dentry)
-{
-        /* Don't cache namespace inodes when not in use */
-        return 1;
-}
 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
 {
        struct inode *inode = dentry->d_inode;
@@ -59,7 +53,7 @@ static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
 const struct dentry_operations ns_dentry_operations =
 {
-        .d_delete       = ns_delete_dentry,
+        .d_delete       = always_delete_dentry,
        .d_dname        = ns_dname,
 };
diff --git a/fs/quota/netlink.c b/fs/quota/netlink.c
index 16e8abb7709b..72d29177998e 100644
--- a/fs/quota/netlink.c
+++ b/fs/quota/netlink.c
@@ -9,13 +9,25 @@
 #include <net/netlink.h>
 #include <net/genetlink.h>
+static const struct genl_multicast_group quota_mcgrps[] = {
+        { .name = "events", },
+};
 /* Netlink family structure for quota */
 static struct genl_family quota_genl_family = {
-        .id = GENL_ID_GENERATE,
+        /*
+         * Needed due to multicast group ID abuse - old code assumed
+         * the family ID was also a valid multicast group ID (which
+         * isn't true) and userspace might thus rely on it. Assign a
+         * static ID for this group to make dealing with that easier.
+         */
+        .id = GENL_ID_VFS_DQUOT,
        .hdrsize = 0,
        .name = "VFS_DQUOT",
        .version = 1,
        .maxattr = QUOTA_NL_A_MAX,
+        .mcgrps = quota_mcgrps,
+        .n_mcgrps = ARRAY_SIZE(quota_mcgrps),
 };
 /**
@@ -78,7 +90,7 @@ void quota_send_warning(struct kqid qid, dev_t dev,
                goto attr_err_out;
        genlmsg_end(skb, msg_head);
-        genlmsg_multicast(skb, 0, quota_genl_family.id, GFP_NOFS);
+        genlmsg_multicast(&quota_genl_family, skb, 0, 0, GFP_NOFS);
        return;
 attr_err_out:
        printk(KERN_ERR "VFS: Not enough space to compose quota message!\n");
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 1cd2388ca5bd..1d641bb108d2 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -136,6 +136,7 @@ static int traverse(struct seq_file *m, loff_t offset)
 Eoverflow:
        m->op->stop(m, p);
        kfree(m->buf);
+        m->count = 0;
        m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
        return !m->buf ? -ENOMEM : -EAGAIN;
 }
@@ -232,10 +233,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
                        goto Fill;
                m->op->stop(m, p);
                kfree(m->buf);
+                m->count = 0;
                m->buf = kmalloc(m->size <<= 1, GFP_KERNEL);
                if (!m->buf)
                        goto Enomem;
-                m->count = 0;
                m->version = 0;
                pos = m->index;
                p = m->op->start(m, &pos);
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index c70111ebefd4..b6fa8657dcbc 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -25,6 +25,78 @@ config SQUASHFS
          If unsure, say N.
+choice
+        prompt "File decompression options"
+        depends on SQUASHFS
+        help
+          Squashfs now supports two options for decompressing file
+          data.  Traditionally Squashfs has decompressed into an
+          intermediate buffer and then memcopied it into the page cache.
+          Squashfs now supports the ability to decompress directly into
+          the page cache.
+          If unsure, select "Decompress file data into an intermediate buffer"
+config SQUASHFS_FILE_CACHE
+        bool "Decompress file data into an intermediate buffer"
+        help
+          Decompress file data into an intermediate buffer and then
+          memcopy it into the page cache.
+config SQUASHFS_FILE_DIRECT
+        bool "Decompress files directly into the page cache"
+        help
+          Directly decompress file data into the page cache.
+          Doing so can significantly improve performance because
+          it eliminates a memcpy and it also removes the lock contention
+          on the single buffer.
+endchoice
+choice
+        prompt "Decompressor parallelisation options"
+        depends on SQUASHFS
+        help
+          Squashfs now supports three parallelisation options for
+          decompression.  Each one exhibits various trade-offs between
+          decompression performance and CPU and memory usage.
+          If in doubt, select "Single threaded compression"
+config SQUASHFS_DECOMP_SINGLE
+        bool "Single threaded compression"
+        help
+          Traditionally Squashfs has used single-threaded decompression.
+          Only one block (data or metadata) can be decompressed at any
+          one time.  This limits CPU and memory usage to a minimum.
+config SQUASHFS_DECOMP_MULTI
+        bool "Use multiple decompressors for parallel I/O"
+        help
+          By default Squashfs uses a single decompressor but it gives
+          poor performance on parallel I/O workloads when using multiple CPU
+          machines due to waiting on decompressor availability.
+          If you have a parallel I/O workload and your system has enough memory,
+          using this option may improve overall I/O performance.
+          This decompressor implementation uses up to two parallel
+          decompressors per core.  It dynamically allocates decompressors
+          on a demand basis.
+config SQUASHFS_DECOMP_MULTI_PERCPU
+        bool "Use percpu multiple decompressors for parallel I/O"
+        help
+          By default Squashfs uses a single decompressor but it gives
+          poor performance on parallel I/O workloads when using multiple CPU
+          machines due to waiting on decompressor availability.
+          This decompressor implementation uses a maximum of one
+          decompressor per core.  It uses percpu variables to ensure
+          decompression is load-balanced across the cores.
+endchoice
 config SQUASHFS_XATTR
        bool "Squashfs XATTR support"
        depends on SQUASHFS
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index 110b0476f3b4..4132520b4ff2 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -5,6 +5,11 @@
 obj-$(CONFIG_SQUASHFS) += squashfs.o
 squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
 squashfs-y += namei.o super.o symlink.o decompressor.o
+squashfs-$(CONFIG_SQUASHFS_FILE_CACHE) += file_cache.o
+squashfs-$(CONFIG_SQUASHFS_FILE_DIRECT) += file_direct.o page_actor.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_SINGLE) += decompressor_single.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI) += decompressor_multi.o
+squashfs-$(CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU) += decompressor_multi_percpu.o
 squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
 squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
 squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 41d108ecc9be..0cea9b9236d0 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -36,6 +36,7 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 /*
 * Read the metadata block length, this is stored in the first two
@@ -86,16 +87,16 @@ static struct buffer_head *get_block_length(struct super_block *sb,
 * generated a larger block - this does occasionally happen with compression
 * algorithms).
 */
-int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
+int squashfs_read_data(struct super_block *sb, u64 index, int length,
-                        int length, u64 *next_index, int srclength, int pages)
+                u64 *next_index, struct squashfs_page_actor *output)
 {
        struct squashfs_sb_info *msblk = sb->s_fs_info;
        struct buffer_head **bh;
        int offset = index & ((1 << msblk->devblksize_log2) - 1);
        u64 cur_index = index >> msblk->devblksize_log2;
-        int bytes, compressed, b = 0, k = 0, page = 0, avail;
+        int bytes, compressed, b = 0, k = 0, avail, i;
-        bh = kcalloc(((srclength + msblk->devblksize - 1)
+        bh = kcalloc(((output->length + msblk->devblksize - 1)
                >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL);
        if (bh == NULL)
                return -ENOMEM;
@@ -111,9 +112,9 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                        *next_index = index + length;
                TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n",
-                        index, compressed ? "" : "un", length, srclength);
+                        index, compressed ? "" : "un", length, output->length);
-                if (length < 0 || length > srclength ||
+                if (length < 0 || length > output->length ||
                                (index + length) > msblk->bytes_used)
                        goto read_failure;
@@ -145,7 +146,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                TRACE("Block @ 0x%llx, %scompressed size %d\n", index,
                                compressed ? "" : "un", length);
-                if (length < 0 || length > srclength ||
+                if (length < 0 || length > output->length ||
                                        (index + length) > msblk->bytes_used)
                        goto block_release;
@@ -158,9 +159,15 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                ll_rw_block(READ, b - 1, bh + 1);
        }
+        for (i = 0; i < b; i++) {
+                wait_on_buffer(bh[i]);
+                if (!buffer_uptodate(bh[i]))
+                        goto block_release;
+        }
        if (compressed) {
-                length = squashfs_decompress(msblk, buffer, bh, b, offset,
+                length = squashfs_decompress(msblk, bh, b, offset, length,
-                         length, srclength, pages);
+                        output);
                if (length < 0)
                        goto read_failure;
        } else {
@@ -168,22 +175,20 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                 * Block is uncompressed.
                 */
                int in, pg_offset = 0;
+                void *data = squashfs_first_page(output);
                for (bytes = length; k < b; k++) {
                        in = min(bytes, msblk->devblksize - offset);
                        bytes -= in;
-                        wait_on_buffer(bh[k]);
-                        if (!buffer_uptodate(bh[k]))
-                                goto block_release;
                        while (in) {
                                if (pg_offset == PAGE_CACHE_SIZE) {
-                                        page++;
+                                        data = squashfs_next_page(output);
                                        pg_offset = 0;
                                }
                                avail = min_t(int, in, PAGE_CACHE_SIZE -
                                                pg_offset);
-                                memcpy(buffer[page] + pg_offset,
+                                memcpy(data + pg_offset, bh[k]->b_data + offset,
-                                                bh[k]->b_data + offset, avail);
+                                                avail);
                                in -= avail;
                                pg_offset += avail;
                                offset += avail;
@@ -191,6 +196,7 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                        offset = 0;
                        put_bh(bh[k]);
                }
+                squashfs_finish_page(output);
        }
        kfree(bh);
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index af0b73802592..1cb70a0b2168 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -56,6 +56,7 @@
 #include "squashfs_fs.h"
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
+#include "page_actor.h"
 /*
 * Look-up block in cache, and increment usage count.  If not in cache, read
@@ -119,9 +120,8 @@ struct squashfs_cache_entry *squashfs_cache_get(struct super_block *sb,
                        entry->error = 0;
                        spin_unlock(&cache->lock);
-                        entry->length = squashfs_read_data(sb, entry->data,
+                        entry->length = squashfs_read_data(sb, block, length,
-                                block, length, &entry->next_index,
+                                &entry->next_index, entry->actor);
-                                cache->block_size, cache->pages);
                        spin_lock(&cache->lock);
@@ -220,6 +220,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
                                kfree(cache->entry[i].data[j]);
                        kfree(cache->entry[i].data);
                }
+                kfree(cache->entry[i].actor);
        }
        kfree(cache->entry);
@@ -280,6 +281,13 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
                                goto cleanup;
                        }
                }
+                entry->actor = squashfs_page_actor_init(entry->data,
+                                                cache->pages, 0);
+                if (entry->actor == NULL) {
+                        ERROR("Failed to allocate %s cache entry\n", name);
+                        goto cleanup;
+                }
        }
        return cache;
@@ -410,6 +418,7 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
        int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
        int i, res;
        void *table, *buffer, **data;
+        struct squashfs_page_actor *actor;
        table = buffer = kmalloc(length, GFP_KERNEL);
        if (table == NULL)
@@ -421,19 +430,28 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
                goto failed;
        }
+        actor = squashfs_page_actor_init(data, pages, length);
+        if (actor == NULL) {
+                res = -ENOMEM;
+                goto failed2;
+        }
        for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
                data[i] = buffer;
-        res = squashfs_read_data(sb, data, block, length |
+        res = squashfs_read_data(sb, block, length |
-                SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, length, pages);
+                SQUASHFS_COMPRESSED_BIT_BLOCK, NULL, actor);
        kfree(data);
+        kfree(actor);
        if (res < 0)
                goto failed;
        return table;
+failed2:
+        kfree(data);
 failed:
        kfree(table);
        return ERR_PTR(res);
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index 3f6271d86abc..ac22fe73b0ad 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -30,6 +30,7 @@
 #include "squashfs_fs_sb.h"
 #include "decompressor.h"
 #include "squashfs.h"
+#include "page_actor.h"
 /*
 * This file (and decompressor.h) implements a decompressor framework for
@@ -37,29 +38,29 @@
 */
 static const struct squashfs_decompressor squashfs_lzma_unsupported_comp_ops = {
-        NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
+        NULL, NULL, NULL, NULL, LZMA_COMPRESSION, "lzma", 0
 };
 #ifndef CONFIG_SQUASHFS_LZO
 static const struct squashfs_decompressor squashfs_lzo_comp_ops = {
-        NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
+        NULL, NULL, NULL, NULL, LZO_COMPRESSION, "lzo", 0
 };
 #endif
 #ifndef CONFIG_SQUASHFS_XZ
 static const struct squashfs_decompressor squashfs_xz_comp_ops = {
-        NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
+        NULL, NULL, NULL, NULL, XZ_COMPRESSION, "xz", 0
 };
 #endif
 #ifndef CONFIG_SQUASHFS_ZLIB
 static const struct squashfs_decompressor squashfs_zlib_comp_ops = {
-        NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
+        NULL, NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
 };
 #endif
 static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
-        NULL, NULL, NULL, 0, "unknown", 0
+        NULL, NULL, NULL, NULL, 0, "unknown", 0
 };
 static const struct squashfs_decompressor *decompressor[] = {
@@ -83,10 +84,11 @@ const struct squashfs_decompressor *squashfs_lookup_decompressor(int id)
 }
-void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
+static void *get_comp_opts(struct super_block *sb, unsigned short flags)
 {
        struct squashfs_sb_info *msblk = sb->s_fs_info;
-        void *strm, *buffer = NULL;
+        void *buffer = NULL, *comp_opts;
+        struct squashfs_page_actor *actor = NULL;
        int length = 0;
        /*
@@ -94,23 +96,46 @@ void *squashfs_decompressor_init(struct super_block *sb, unsigned short flags)
         */
        if (SQUASHFS_COMP_OPTS(flags)) {
                buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
-                if (buffer == NULL)
+                if (buffer == NULL) {
-                        return ERR_PTR(-ENOMEM);
+                        comp_opts = ERR_PTR(-ENOMEM);
+                        goto out;
+                }
+                actor = squashfs_page_actor_init(&buffer, 1, 0);
+                if (actor == NULL) {
+                        comp_opts = ERR_PTR(-ENOMEM);
+                        goto out;
+                }
-                length = squashfs_read_data(sb, &buffer,
+                length = squashfs_read_data(sb,
-                        sizeof(struct squashfs_super_block), 0, NULL,
+                        sizeof(struct squashfs_super_block), 0, NULL, actor);
-                        PAGE_CACHE_SIZE, 1);
                if (length < 0) {
-                        strm = ERR_PTR(length);
+                        comp_opts = ERR_PTR(length);
-                        goto finished;
+                        goto out;
                }
        }
-        strm = msblk->decompressor->init(msblk, buffer, length);
+        comp_opts = squashfs_comp_opts(msblk, buffer, length);
-finished:
+out:
+        kfree(actor);
        kfree(buffer);
+        return comp_opts;
+}
+void *squashfs_decompressor_setup(struct super_block *sb, unsigned short flags)
+{
+        struct squashfs_sb_info *msblk = sb->s_fs_info;
+        void *stream, *comp_opts = get_comp_opts(sb, flags);
+        if (IS_ERR(comp_opts))
+                return comp_opts;
+        stream = squashfs_decompressor_create(msblk, comp_opts);
+        if (IS_ERR(stream))
+                kfree(comp_opts);
-        return strm;
+        return stream;
 }
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index 330073e29029..af0985321808 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -24,28 +24,22 @@
 */
 struct squashfs_decompressor {
-        void    *(*init)(struct squashfs_sb_info *, void *, int);
+        void    *(*init)(struct squashfs_sb_info *, void *);
+        void    *(*comp_opts)(struct squashfs_sb_info *, void *, int);
        void    (*free)(void *);
-        int     (*decompress)(struct squashfs_sb_info *, void **,
+        int     (*decompress)(struct squashfs_sb_info *, void *,
-                struct buffer_head **, int, int, int, int, int);
+                struct buffer_head **, int, int, int,
+                struct squashfs_page_actor *);
        int     id;
        char    *name;
        int     supported;
 };
-static inline void squashfs_decompressor_free(struct squashfs_sb_info *msblk,
+static inline void *squashfs_comp_opts(struct squashfs_sb_info *msblk,
-        void *s)
+                                                        void *buff, int length)
 {
-        if (msblk->decompressor)
+        return msblk->decompressor->comp_opts ?
-                msblk->decompressor->free(s);
+                msblk->decompressor->comp_opts(msblk, buff, length) : NULL;
-}
-static inline int squashfs_decompress(struct squashfs_sb_info *msblk,
-        void **buffer, struct buffer_head **bh, int b, int offset, int length,
-        int srclength, int pages)
-{
-        return msblk->decompressor->decompress(msblk, buffer, bh, b, offset,
-                length, srclength, pages);
 }
 #ifdef CONFIG_SQUASHFS_XZ
diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c
new file mode 100644
index 000000000000..d6008a636479
--- /dev/null
+++ b/fs/squashfs/decompressor_multi.c
@@ -0,0 +1,198 @@
+/*
+ *  Copyright (c) 2013
+ *  Minchan Kim <minchan@kernel.org>
+ *
+ *  This work is licensed under the terms of the GNU GPL, version 2. See
+ *  the COPYING file in the top-level directory.
+ */
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <linux/cpumask.h>
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+/*
+ * This file implements multi-threaded decompression in the
+ * decompressor framework
+ */
+/*
+ * The reason that multiply two is that a CPU can request new I/O
+ * while it is waiting previous request.
+ */
+#define MAX_DECOMPRESSOR        (num_online_cpus() * 2)
+int squashfs_max_decompressors(void)
+{
+        return MAX_DECOMPRESSOR;
+}
+struct squashfs_stream {
+        void                    *comp_opts;
+        struct list_head        strm_list;
+        struct mutex            mutex;
+        int                     avail_decomp;
+        wait_queue_head_t       wait;
+};
+struct decomp_stream {
+        void *stream;
+        struct list_head list;
+};
+static void put_decomp_stream(struct decomp_stream *decomp_strm,
+                                struct squashfs_stream *stream)
+{
+        mutex_lock(&stream->mutex);
+        list_add(&decomp_strm->list, &stream->strm_list);
+        mutex_unlock(&stream->mutex);
+        wake_up(&stream->wait);
+}
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+                                void *comp_opts)
+{
+        struct squashfs_stream *stream;
+        struct decomp_stream *decomp_strm = NULL;
+        int err = -ENOMEM;
+        stream = kzalloc(sizeof(*stream), GFP_KERNEL);
+        if (!stream)
+                goto out;
+        stream->comp_opts = comp_opts;
+        mutex_init(&stream->mutex);
+        INIT_LIST_HEAD(&stream->strm_list);
+        init_waitqueue_head(&stream->wait);
+        /*
+         * We should have a decompressor at least as default
+         * so if we fail to allocate new decompressor dynamically,
+         * we could always fall back to default decompressor and
+         * file system works.
+         */
+        decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
+        if (!decomp_strm)
+                goto out;
+        decomp_strm->stream = msblk->decompressor->init(msblk,
+                                                stream->comp_opts);
+        if (IS_ERR(decomp_strm->stream)) {
+                err = PTR_ERR(decomp_strm->stream);
+                goto out;
+        }
+        list_add(&decomp_strm->list, &stream->strm_list);
+        stream->avail_decomp = 1;
+        return stream;
+out:
+        kfree(decomp_strm);
+        kfree(stream);
+        return ERR_PTR(err);
+}
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+        struct squashfs_stream *stream = msblk->stream;
+        if (stream) {
+                struct decomp_stream *decomp_strm;
+                while (!list_empty(&stream->strm_list)) {
+                        decomp_strm = list_entry(stream->strm_list.prev,
+                                                struct decomp_stream, list);
+                        list_del(&decomp_strm->list);
+                        msblk->decompressor->free(decomp_strm->stream);
+                        kfree(decomp_strm);
+                        stream->avail_decomp--;
+                }
+                WARN_ON(stream->avail_decomp);
+                kfree(stream->comp_opts);
+                kfree(stream);
+        }
+}
+static struct decomp_stream *get_decomp_stream(struct squashfs_sb_info *msblk,
+                                        struct squashfs_stream *stream)
+{
+        struct decomp_stream *decomp_strm;
+        while (1) {
+                mutex_lock(&stream->mutex);
+                /* There is available decomp_stream */
+                if (!list_empty(&stream->strm_list)) {
+                        decomp_strm = list_entry(stream->strm_list.prev,
+                                struct decomp_stream, list);
+                        list_del(&decomp_strm->list);
+                        mutex_unlock(&stream->mutex);
+                        break;
+                }
+                /*
+                 * If there is no available decomp and already full,
+                 * let's wait for releasing decomp from other users.
+                 */
+                if (stream->avail_decomp >= MAX_DECOMPRESSOR)
+                        goto wait;
+                /* Let's allocate new decomp */
+                decomp_strm = kmalloc(sizeof(*decomp_strm), GFP_KERNEL);
+                if (!decomp_strm)
+                        goto wait;
+                decomp_strm->stream = msblk->decompressor->init(msblk,
+                                                stream->comp_opts);
+                if (IS_ERR(decomp_strm->stream)) {
+                        kfree(decomp_strm);
+                        goto wait;
+                }
+                stream->avail_decomp++;
+                WARN_ON(stream->avail_decomp > MAX_DECOMPRESSOR);
+                mutex_unlock(&stream->mutex);
+                break;
+wait:
+                /*
+                 * If system memory is tough, let's for other's
+                 * releasing instead of hurting VM because it could
+                 * make page cache thrashing.
+                 */
+                mutex_unlock(&stream->mutex);
+                wait_event(stream->wait,
+                        !list_empty(&stream->strm_list));
+        }
+        return decomp_strm;
+}
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+        int b, int offset, int length, struct squashfs_page_actor *output)
+{
+        int res;
+        struct squashfs_stream *stream = msblk->stream;
+        struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream);
+        res = msblk->decompressor->decompress(msblk, decomp_stream->stream,
+                bh, b, offset, length, output);
+        put_decomp_stream(decomp_stream, stream);
+        if (res < 0)
+                ERROR("%s decompression failed, data probably corrupt\n",
+                        msblk->decompressor->name);
+        return res;
+}
diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c
new file mode 100644
index 000000000000..23a9c28ad8ea
--- /dev/null
+++ b/fs/squashfs/decompressor_multi_percpu.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/percpu.h>
+#include <linux/buffer_head.h>
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+/*
+ * This file implements multi-threaded decompression using percpu
+ * variables, one thread per cpu core.
+ */
+struct squashfs_stream {
+        void            *stream;
+};
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+                                                void *comp_opts)
+{
+        struct squashfs_stream *stream;
+        struct squashfs_stream __percpu *percpu;
+        int err, cpu;
+        percpu = alloc_percpu(struct squashfs_stream);
+        if (percpu == NULL)
+                return ERR_PTR(-ENOMEM);
+        for_each_possible_cpu(cpu) {
+                stream = per_cpu_ptr(percpu, cpu);
+                stream->stream = msblk->decompressor->init(msblk, comp_opts);
+                if (IS_ERR(stream->stream)) {
+                        err = PTR_ERR(stream->stream);
+                        goto out;
+                }
+        }
+        kfree(comp_opts);
+        return (__force void *) percpu;
+out:
+        for_each_possible_cpu(cpu) {
+                stream = per_cpu_ptr(percpu, cpu);
+                if (!IS_ERR_OR_NULL(stream->stream))
+                        msblk->decompressor->free(stream->stream);
+        }
+        free_percpu(percpu);
+        return ERR_PTR(err);
+}
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+        struct squashfs_stream __percpu *percpu =
+                        (struct squashfs_stream __percpu *) msblk->stream;
+        struct squashfs_stream *stream;
+        int cpu;
+        if (msblk->stream) {
+                for_each_possible_cpu(cpu) {
+                        stream = per_cpu_ptr(percpu, cpu);
+                        msblk->decompressor->free(stream->stream);
+                }
+                free_percpu(percpu);
+        }
+}
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+        int b, int offset, int length, struct squashfs_page_actor *output)
+{
+        struct squashfs_stream __percpu *percpu =
+                        (struct squashfs_stream __percpu *) msblk->stream;
+        struct squashfs_stream *stream = get_cpu_ptr(percpu);
+        int res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+                offset, length, output);
+        put_cpu_ptr(stream);
+        if (res < 0)
+                ERROR("%s decompression failed, data probably corrupt\n",
+                        msblk->decompressor->name);
+        return res;
+}
+int squashfs_max_decompressors(void)
+{
+        return num_possible_cpus();
+}
diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c
new file mode 100644
index 000000000000..a6c75929a00e
--- /dev/null
+++ b/fs/squashfs/decompressor_single.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+#include <linux/types.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/buffer_head.h>
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "decompressor.h"
+#include "squashfs.h"
+/*
+ * This file implements single-threaded decompression in the
+ * decompressor framework
+ */
+struct squashfs_stream {
+        void            *stream;
+        struct mutex    mutex;
+};
+void *squashfs_decompressor_create(struct squashfs_sb_info *msblk,
+                                                void *comp_opts)
+{
+        struct squashfs_stream *stream;
+        int err = -ENOMEM;
+        stream = kmalloc(sizeof(*stream), GFP_KERNEL);
+        if (stream == NULL)
+                goto out;
+        stream->stream = msblk->decompressor->init(msblk, comp_opts);
+        if (IS_ERR(stream->stream)) {
+                err = PTR_ERR(stream->stream);
+                goto out;
+        }
+        kfree(comp_opts);
+        mutex_init(&stream->mutex);
+        return stream;
+out:
+        kfree(stream);
+        return ERR_PTR(err);
+}
+void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk)
+{
+        struct squashfs_stream *stream = msblk->stream;
+        if (stream) {
+                msblk->decompressor->free(stream->stream);
+                kfree(stream);
+        }
+}
+int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh,
+        int b, int offset, int length, struct squashfs_page_actor *output)
+{
+        int res;
+        struct squashfs_stream *stream = msblk->stream;
+        mutex_lock(&stream->mutex);
+        res = msblk->decompressor->decompress(msblk, stream->stream, bh, b,
+                offset, length, output);
+        mutex_unlock(&stream->mutex);
+        if (res < 0)
+                ERROR("%s decompression failed, data probably corrupt\n",
+                        msblk->decompressor->name);
+        return res;
+}
+int squashfs_max_decompressors(void)
+{
+        return 1;
+}
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index 8ca62c28fe12..e5c9689062ba 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -370,77 +370,15 @@ static int read_blocklist(struct inode *inode, int index, u64 *block)
        return le32_to_cpu(size);
 }
+/* Copy data into page cache  */
-static int squashfs_readpage(struct file *file, struct page *page)
+void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
+        int bytes, int offset)
 {
        struct inode *inode = page->mapping->host;
        struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
-        int bytes, i, offset = 0, sparse = 0;
-        struct squashfs_cache_entry *buffer = NULL;
        void *pageaddr;
+        int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
-        int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+        int start_index = page->index & ~mask, end_index = start_index | mask;
-        int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
-        int start_index = page->index & ~mask;
-        int end_index = start_index | mask;
-        int file_end = i_size_read(inode) >> msblk->block_log;
-        TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
-                                page->index, squashfs_i(inode)->start);
-        if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
-                                        PAGE_CACHE_SHIFT))
-                goto out;
-        if (index < file_end || squashfs_i(inode)->fragment_block ==
-                                        SQUASHFS_INVALID_BLK) {
-                /*
-                 * Reading a datablock from disk.  Need to read block list
-                 * to get location and block size.
-                 */
-                u64 block = 0;
-                int bsize = read_blocklist(inode, index, &block);
-                if (bsize < 0)
-                        goto error_out;
-                if (bsize == 0) { /* hole */
-                        bytes = index == file_end ?
-                                (i_size_read(inode) & (msblk->block_size - 1)) :
-                                 msblk->block_size;
-                        sparse = 1;
-                } else {
-                        /*
-                         * Read and decompress datablock.
-                         */
-                        buffer = squashfs_get_datablock(inode->i_sb,
-                                                                block, bsize);
-                        if (buffer->error) {
-                                ERROR("Unable to read page, block %llx, size %x"
-                                        "\n", block, bsize);
-                                squashfs_cache_put(buffer);
-                                goto error_out;
-                        }
-                        bytes = buffer->length;
-                }
-        } else {
-                /*
-                 * Datablock is stored inside a fragment (tail-end packed
-                 * block).
-                 */
-                buffer = squashfs_get_fragment(inode->i_sb,
-                                squashfs_i(inode)->fragment_block,
-                                squashfs_i(inode)->fragment_size);
-                if (buffer->error) {
-                        ERROR("Unable to read page, block %llx, size %x\n",
-                                squashfs_i(inode)->fragment_block,
-                                squashfs_i(inode)->fragment_size);
-                        squashfs_cache_put(buffer);
-                        goto error_out;
-                }
-                bytes = i_size_read(inode) & (msblk->block_size - 1);
-                offset = squashfs_i(inode)->fragment_offset;
-        }
        /*
         * Loop copying datablock into pages.  As the datablock likely covers
@@ -451,7 +389,7 @@ static int squashfs_readpage(struct file *file, struct page *page)
        for (i = start_index; i <= end_index && bytes > 0; i++,
                        bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
                struct page *push_page;
-                int avail = sparse ? 0 : min_t(int, bytes, PAGE_CACHE_SIZE);
+                int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0;
                TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
@@ -475,11 +413,75 @@ skip_page:
                if (i != page->index)
                        page_cache_release(push_page);
        }
+}
+/* Read datablock stored packed inside a fragment (tail-end packed block) */
+static int squashfs_readpage_fragment(struct page *page)
+{
+        struct inode *inode = page->mapping->host;
+        struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+        struct squashfs_cache_entry *buffer = squashfs_get_fragment(inode->i_sb,
+                squashfs_i(inode)->fragment_block,
+                squashfs_i(inode)->fragment_size);
+        int res = buffer->error;
+        if (res)
+                ERROR("Unable to read page, block %llx, size %x\n",
+                        squashfs_i(inode)->fragment_block,
+                        squashfs_i(inode)->fragment_size);
+        else
+                squashfs_copy_cache(page, buffer, i_size_read(inode) &
+                        (msblk->block_size - 1),
+                        squashfs_i(inode)->fragment_offset);
+        squashfs_cache_put(buffer);
+        return res;
+}
-        if (!sparse)
+static int squashfs_readpage_sparse(struct page *page, int index, int file_end)
-                squashfs_cache_put(buffer);
+{
+        struct inode *inode = page->mapping->host;
+        struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+        int bytes = index == file_end ?
+                        (i_size_read(inode) & (msblk->block_size - 1)) :
+                         msblk->block_size;
+        squashfs_copy_cache(page, NULL, bytes, 0);
        return 0;
+}
+static int squashfs_readpage(struct file *file, struct page *page)
+{
+        struct inode *inode = page->mapping->host;
+        struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+        int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+        int file_end = i_size_read(inode) >> msblk->block_log;
+        int res;
+        void *pageaddr;
+        TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
+                                page->index, squashfs_i(inode)->start);
+        if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
+                                        PAGE_CACHE_SHIFT))
+                goto out;
+        if (index < file_end || squashfs_i(inode)->fragment_block ==
+                                        SQUASHFS_INVALID_BLK) {
+                u64 block = 0;
+                int bsize = read_blocklist(inode, index, &block);
+                if (bsize < 0)
+                        goto error_out;
+                if (bsize == 0)
+                        res = squashfs_readpage_sparse(page, index, file_end);
+                else
+                        res = squashfs_readpage_block(page, block, bsize);
+        } else
+                res = squashfs_readpage_fragment(page);
+        if (!res)
+                return 0;
 error_out:
        SetPageError(page);
diff --git a/fs/squashfs/file_cache.c b/fs/squashfs/file_cache.c
new file mode 100644
index 000000000000..f2310d2a2019
--- /dev/null
+++ b/fs/squashfs/file_cache.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+/* Read separately compressed datablock and memcopy into page cache */
+int squashfs_readpage_block(struct page *page, u64 block, int bsize)
+{
+        struct inode *i = page->mapping->host;
+        struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+                block, bsize);
+        int res = buffer->error;
+        if (res)
+                ERROR("Unable to read page, block %llx, size %x\n", block,
+                        bsize);
+        else
+                squashfs_copy_cache(page, buffer, buffer->length, 0);
+        squashfs_cache_put(buffer);
+        return res;
+}
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
new file mode 100644
index 000000000000..2943b2bfae48
--- /dev/null
+++ b/fs/squashfs/file_direct.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+#include <linux/fs.h>
+#include <linux/vfs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/pagemap.h>
+#include <linux/mutex.h>
+#include "squashfs_fs.h"
+#include "squashfs_fs_sb.h"
+#include "squashfs_fs_i.h"
+#include "squashfs.h"
+#include "page_actor.h"
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+        int pages, struct page **page);
+/* Read separately compressed datablock directly into page cache */
+int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
+{
+        struct inode *inode = target_page->mapping->host;
+        struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
+        int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+        int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+        int start_index = target_page->index & ~mask;
+        int end_index = start_index | mask;
+        int i, n, pages, missing_pages, bytes, res = -ENOMEM;
+        struct page **page;
+        struct squashfs_page_actor *actor;
+        void *pageaddr;
+        if (end_index > file_end)
+                end_index = file_end;
+        pages = end_index - start_index + 1;
+        page = kmalloc(sizeof(void *) * pages, GFP_KERNEL);
+        if (page == NULL)
+                return res;
+        /*
+         * Create a "page actor" which will kmap and kunmap the
+         * page cache pages appropriately within the decompressor
+         */
+        actor = squashfs_page_actor_init_special(page, pages, 0);
+        if (actor == NULL)
+                goto out;
+        /* Try to grab all the pages covered by the Squashfs block */
+        for (missing_pages = 0, i = 0, n = start_index; i < pages; i++, n++) {
+                page[i] = (n == target_page->index) ? target_page :
+                        grab_cache_page_nowait(target_page->mapping, n);
+                if (page[i] == NULL) {
+                        missing_pages++;
+                        continue;
+                }
+                if (PageUptodate(page[i])) {
+                        unlock_page(page[i]);
+                        page_cache_release(page[i]);
+                        page[i] = NULL;
+                        missing_pages++;
+                }
+        }
+        if (missing_pages) {
+                /*
+                 * Couldn't get one or more pages, this page has either
+                 * been VM reclaimed, but others are still in the page cache
+                 * and uptodate, or we're racing with another thread in
+                 * squashfs_readpage also trying to grab them.  Fall back to
+                 * using an intermediate buffer.
+                 */
+                res = squashfs_read_cache(target_page, block, bsize, pages,
+                                                                page);
+                goto out;
+        }
+        /* Decompress directly into the page cache buffers */
+        res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
+        if (res < 0)
+                goto mark_errored;
+        /* Last page may have trailing bytes not filled */
+        bytes = res % PAGE_CACHE_SIZE;
+        if (bytes) {
+                pageaddr = kmap_atomic(page[pages - 1]);
+                memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
+                kunmap_atomic(pageaddr);
+        }
+        /* Mark pages as uptodate, unlock and release */
+        for (i = 0; i < pages; i++) {
+                flush_dcache_page(page[i]);
+                SetPageUptodate(page[i]);
+                unlock_page(page[i]);
+                if (page[i] != target_page)
+                        page_cache_release(page[i]);
+        }
+        kfree(actor);
+        kfree(page);
+        return 0;
+mark_errored:
+        /* Decompression failed, mark pages as errored.  Target_page is
+         * dealt with by the caller
+         */
+        for (i = 0; i < pages; i++) {
+                if (page[i] == target_page)
+                        continue;
+                flush_dcache_page(page[i]);
+                SetPageError(page[i]);
+                unlock_page(page[i]);
+                page_cache_release(page[i]);
+        }
+out:
+        kfree(actor);
+        kfree(page);
+        return res;
+}
+static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
+        int pages, struct page **page)
+{
+        struct inode *i = target_page->mapping->host;
+        struct squashfs_cache_entry *buffer = squashfs_get_datablock(i->i_sb,
+                                                 block, bsize);
+        int bytes = buffer->length, res = buffer->error, n, offset = 0;
+        void *pageaddr;
+        if (res) {
+                ERROR("Unable to read page, block %llx, size %x\n", block,
+                        bsize);
+                goto out;
+        }
+        for (n = 0; n < pages && bytes > 0; n++,
+                        bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+                int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+                if (page[n] == NULL)
+                        continue;
+                pageaddr = kmap_atomic(page[n]);
+                squashfs_copy_data(pageaddr, buffer, offset, avail);
+                memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+                kunmap_atomic(pageaddr);
+                flush_dcache_page(page[n]);
+                SetPageUptodate(page[n]);
+                unlock_page(page[n]);
+                if (page[n] != target_page)
+                        page_cache_release(page[n]);
+        }
+out:
+        squashfs_cache_put(buffer);
+        return res;
+}
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 00f4dfc5f088..244b9fbfff7b 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -31,13 +31,14 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 struct squashfs_lzo {
        void    *input;
        void    *output;
 };
-static void *lzo_init(struct squashfs_sb_info *msblk, void *buff, int len)
+static void *lzo_init(struct squashfs_sb_info *msblk, void *buff)
 {
        int block_size = max_t(int, msblk->block_size, SQUASHFS_METADATA_SIZE);
@@ -74,22 +75,16 @@ static void lzo_free(void *strm)
 }
-static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
+static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
-        struct buffer_head **bh, int b, int offset, int length, int srclength,
+        struct buffer_head **bh, int b, int offset, int length,
-        int pages)
+        struct squashfs_page_actor *output)
 {
-        struct squashfs_lzo *stream = msblk->stream;
+        struct squashfs_lzo *stream = strm;
-        void *buff = stream->input;
+        void *buff = stream->input, *data;
        int avail, i, bytes = length, res;
-        size_t out_len = srclength;
+        size_t out_len = output->length;
-        mutex_lock(&msblk->read_data_mutex);
        for (i = 0; i < b; i++) {
-                wait_on_buffer(bh[i]);
-                if (!buffer_uptodate(bh[i]))
-                        goto block_release;
                avail = min(bytes, msblk->devblksize - offset);
                memcpy(buff, bh[i]->b_data + offset, avail);
                buff += avail;
@@ -104,24 +99,24 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void **buffer,
                goto failed;
        res = bytes = (int)out_len;
-        for (i = 0, buff = stream->output; bytes && i < pages; i++) {
+        data = squashfs_first_page(output);
-                avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+        buff = stream->output;
-                memcpy(buffer[i], buff, avail);
+        while (data) {
-                buff += avail;
+                if (bytes <= PAGE_CACHE_SIZE) {
-                bytes -= avail;
+                        memcpy(data, buff, bytes);
+                        break;
+                } else {
+                        memcpy(data, buff, PAGE_CACHE_SIZE);
+                        buff += PAGE_CACHE_SIZE;
+                        bytes -= PAGE_CACHE_SIZE;
+                        data = squashfs_next_page(output);
+                }
        }
+        squashfs_finish_page(output);
-        mutex_unlock(&msblk->read_data_mutex);
        return res;
-block_release:
-        for (; i < b; i++)
-                put_bh(bh[i]);
 failed:
-        mutex_unlock(&msblk->read_data_mutex);
-        ERROR("lzo decompression failed, data probably corrupt\n");
        return -EIO;
 }
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
new file mode 100644
index 000000000000..5a1c11f56441
--- /dev/null
+++ b/fs/squashfs/page_actor.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/pagemap.h>
+#include "page_actor.h"
+/*
+ * This file contains implementations of page_actor for decompressing into
+ * an intermediate buffer, and for decompressing directly into the
+ * page cache.
+ *
+ * Calling code should avoid sleeping between calls to squashfs_first_page()
+ * and squashfs_finish_page().
+ */
+/* Implementation of page_actor for decompressing into intermediate buffer */
+static void *cache_first_page(struct squashfs_page_actor *actor)
+{
+        actor->next_page = 1;
+        return actor->buffer[0];
+}
+static void *cache_next_page(struct squashfs_page_actor *actor)
+{
+        if (actor->next_page == actor->pages)
+                return NULL;
+        return actor->buffer[actor->next_page++];
+}
+static void cache_finish_page(struct squashfs_page_actor *actor)
+{
+        /* empty */
+}
+struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
+        int pages, int length)
+{
+        struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+        if (actor == NULL)
+                return NULL;
+        actor->length = length ? : pages * PAGE_CACHE_SIZE;
+        actor->buffer = buffer;
+        actor->pages = pages;
+        actor->next_page = 0;
+        actor->squashfs_first_page = cache_first_page;
+        actor->squashfs_next_page = cache_next_page;
+        actor->squashfs_finish_page = cache_finish_page;
+        return actor;
+}
+/* Implementation of page_actor for decompressing directly into page cache. */
+static void *direct_first_page(struct squashfs_page_actor *actor)
+{
+        actor->next_page = 1;
+        return actor->pageaddr = kmap_atomic(actor->page[0]);
+}
+static void *direct_next_page(struct squashfs_page_actor *actor)
+{
+        if (actor->pageaddr)
+                kunmap_atomic(actor->pageaddr);
+        return actor->pageaddr = actor->next_page == actor->pages ? NULL :
+                kmap_atomic(actor->page[actor->next_page++]);
+}
+static void direct_finish_page(struct squashfs_page_actor *actor)
+{
+        if (actor->pageaddr)
+                kunmap_atomic(actor->pageaddr);
+}
+struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page,
+        int pages, int length)
+{
+        struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+        if (actor == NULL)
+                return NULL;
+        actor->length = length ? : pages * PAGE_CACHE_SIZE;
+        actor->page = page;
+        actor->pages = pages;
+        actor->next_page = 0;
+        actor->pageaddr = NULL;
+        actor->squashfs_first_page = direct_first_page;
+        actor->squashfs_next_page = direct_next_page;
+        actor->squashfs_finish_page = direct_finish_page;
+        return actor;
+}
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
new file mode 100644
index 000000000000..26dd82008b82
--- /dev/null
+++ b/fs/squashfs/page_actor.h
@@ -0,0 +1,81 @@
+#ifndef PAGE_ACTOR_H
+#define PAGE_ACTOR_H
+/*
+ * Copyright (c) 2013
+ * Phillip Lougher <phillip@squashfs.org.uk>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ */
+#ifndef CONFIG_SQUASHFS_FILE_DIRECT
+struct squashfs_page_actor {
+        void    **page;
+        int     pages;
+        int     length;
+        int     next_page;
+};
+static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
+        int pages, int length)
+{
+        struct squashfs_page_actor *actor = kmalloc(sizeof(*actor), GFP_KERNEL);
+        if (actor == NULL)
+                return NULL;
+        actor->length = length ? : pages * PAGE_CACHE_SIZE;
+        actor->page = page;
+        actor->pages = pages;
+        actor->next_page = 0;
+        return actor;
+}
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+        actor->next_page = 1;
+        return actor->page[0];
+}
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+        return actor->next_page == actor->pages ? NULL :
+                actor->page[actor->next_page++];
+}
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+        /* empty */
+}
+#else
+struct squashfs_page_actor {
+        union {
+                void            **buffer;
+                struct page     **page;
+        };
+        void    *pageaddr;
+        void    *(*squashfs_first_page)(struct squashfs_page_actor *);
+        void    *(*squashfs_next_page)(struct squashfs_page_actor *);
+        void    (*squashfs_finish_page)(struct squashfs_page_actor *);
+        int     pages;
+        int     length;
+        int     next_page;
+};
+extern struct squashfs_page_actor *squashfs_page_actor_init(void **, int, int);
+extern struct squashfs_page_actor *squashfs_page_actor_init_special(struct page
+                                                         **, int, int);
+static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
+{
+        return actor->squashfs_first_page(actor);
+}
+static inline void *squashfs_next_page(struct squashfs_page_actor *actor)
+{
+        return actor->squashfs_next_page(actor);
+}
+static inline void squashfs_finish_page(struct squashfs_page_actor *actor)
+{
+        actor->squashfs_finish_page(actor);
+}
+#endif
+#endif
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index d1266516ed08..9e1bb79f7e6f 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -28,8 +28,8 @@
 #define WARNING(s, args...)     pr_warning("SQUASHFS: "s, ## args)
 /* block.c */
-extern int squashfs_read_data(struct super_block *, void **, u64, int, u64 *,
+extern int squashfs_read_data(struct super_block *, u64, int, u64 *,
-                                int, int);
+                                struct squashfs_page_actor *);
 /* cache.c */
 extern struct squashfs_cache *squashfs_cache_init(char *, int, int);
@@ -48,7 +48,14 @@ extern void *squashfs_read_table(struct super_block *, u64, int);
 /* decompressor.c */
 extern const struct squashfs_decompressor *squashfs_lookup_decompressor(int);
-extern void *squashfs_decompressor_init(struct super_block *, unsigned short);
+extern void *squashfs_decompressor_setup(struct super_block *, unsigned short);
+/* decompressor_xxx.c */
+extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *);
+extern void squashfs_decompressor_destroy(struct squashfs_sb_info *);
+extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **,
+        int, int, int, struct squashfs_page_actor *);
+extern int squashfs_max_decompressors(void);
 /* export.c */
 extern __le64 *squashfs_read_inode_lookup_table(struct super_block *, u64, u64,
@@ -59,6 +66,13 @@ extern int squashfs_frag_lookup(struct super_block *, unsigned int, u64 *);
 extern __le64 *squashfs_read_fragment_index_table(struct super_block *,
                                u64, u64, unsigned int);
+/* file.c */
+void squashfs_copy_cache(struct page *, struct squashfs_cache_entry *, int,
+                                int);
+/* file_xxx.c */
+extern int squashfs_readpage_block(struct page *, u64, int);
 /* id.c */
 extern int squashfs_get_id(struct super_block *, unsigned int, unsigned int *);
 extern __le64 *squashfs_read_id_index_table(struct super_block *, u64, u64,
diff --git a/fs/squashfs/squashfs_fs_sb.h b/fs/squashfs/squashfs_fs_sb.h
index 52934a22f296..1da565cb50c3 100644
--- a/fs/squashfs/squashfs_fs_sb.h
+++ b/fs/squashfs/squashfs_fs_sb.h
@@ -50,6 +50,7 @@ struct squashfs_cache_entry {
        wait_queue_head_t       wait_queue;
        struct squashfs_cache   *cache;
        void                    **data;
+        struct squashfs_page_actor      *actor;
 };
 struct squashfs_sb_info {
@@ -63,10 +64,9 @@ struct squashfs_sb_info {
        __le64                                  *id_table;
        __le64                                  *fragment_index;
        __le64                                  *xattr_id_table;
-        struct mutex                            read_data_mutex;
        struct mutex                            meta_index_mutex;
        struct meta_index                       *meta_index;
-        void                                    *stream;
+        struct squashfs_stream                  *stream;
        __le64                                  *inode_lookup_table;
        u64                                     inode_table;
        u64                                     directory_table;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 60553a9053ca..202df6312d4e 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -98,7 +98,6 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
        msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
        msblk->devblksize_log2 = ffz(~msblk->devblksize);
-        mutex_init(&msblk->read_data_mutex);
        mutex_init(&msblk->meta_index_mutex);
        /*
@@ -206,13 +205,14 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
                goto failed_mount;
        /* Allocate read_page block */
-        msblk->read_page = squashfs_cache_init("data", 1, msblk->block_size);
+        msblk->read_page = squashfs_cache_init("data",
+                squashfs_max_decompressors(), msblk->block_size);
        if (msblk->read_page == NULL) {
                ERROR("Failed to allocate read_page block\n");
                goto failed_mount;
        }
-        msblk->stream = squashfs_decompressor_init(sb, flags);
+        msblk->stream = squashfs_decompressor_setup(sb, flags);
        if (IS_ERR(msblk->stream)) {
                err = PTR_ERR(msblk->stream);
                msblk->stream = NULL;
@@ -336,7 +336,7 @@ failed_mount:
        squashfs_cache_delete(msblk->block_cache);
        squashfs_cache_delete(msblk->fragment_cache);
        squashfs_cache_delete(msblk->read_page);
-        squashfs_decompressor_free(msblk, msblk->stream);
+        squashfs_decompressor_destroy(msblk);
        kfree(msblk->inode_lookup_table);
        kfree(msblk->fragment_index);
        kfree(msblk->id_table);
@@ -383,7 +383,7 @@ static void squashfs_put_super(struct super_block *sb)
                squashfs_cache_delete(sbi->block_cache);
                squashfs_cache_delete(sbi->fragment_cache);
                squashfs_cache_delete(sbi->read_page);
-                squashfs_decompressor_free(sbi, sbi->stream);
+                squashfs_decompressor_destroy(sbi);
                kfree(sbi->id_table);
                kfree(sbi->fragment_index);
                kfree(sbi->meta_index);
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index 1760b7d108f6..c609624e4b8a 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -32,44 +32,70 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
 struct squashfs_xz {
        struct xz_dec *state;
        struct xz_buf buf;
 };
-struct comp_opts {
+struct disk_comp_opts {
        __le32 dictionary_size;
        __le32 flags;
 };
-static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
+struct comp_opts {
-        int len)
+        int dict_size;
+};
+static void *squashfs_xz_comp_opts(struct squashfs_sb_info *msblk,
+        void *buff, int len)
 {
-        struct comp_opts *comp_opts = buff;
+        struct disk_comp_opts *comp_opts = buff;
-        struct squashfs_xz *stream;
+        struct comp_opts *opts;
-        int dict_size = msblk->block_size;
+        int err = 0, n;
-        int err, n;
+        opts = kmalloc(sizeof(*opts), GFP_KERNEL);
+        if (opts == NULL) {
+                err = -ENOMEM;
+                goto out2;
+        }
        if (comp_opts) {
                /* check compressor options are the expected length */
                if (len < sizeof(*comp_opts)) {
                        err = -EIO;
-                        goto failed;
+                        goto out;
                }
-                dict_size = le32_to_cpu(comp_opts->dictionary_size);
+                opts->dict_size = le32_to_cpu(comp_opts->dictionary_size);
                /* the dictionary size should be 2^n or 2^n+2^(n+1) */
-                n = ffs(dict_size) - 1;
+                n = ffs(opts->dict_size) - 1;
-                if (dict_size != (1 << n) && dict_size != (1 << n) +
+                if (opts->dict_size != (1 << n) && opts->dict_size != (1 << n) +
                                                (1 << (n + 1))) {
                        err = -EIO;
-                        goto failed;
+                        goto out;
                }
-        }
+        } else
+                /* use defaults */
+                opts->dict_size = max_t(int, msblk->block_size,
+                                                        SQUASHFS_METADATA_SIZE);
+        return opts;
+out:
+        kfree(opts);
+out2:
+        return ERR_PTR(err);
+}
-        dict_size = max_t(int, dict_size, SQUASHFS_METADATA_SIZE);
+static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff)
+{
+        struct comp_opts *comp_opts = buff;
+        struct squashfs_xz *stream;
+        int err;
        stream = kmalloc(sizeof(*stream), GFP_KERNEL);
        if (stream == NULL) {
@@ -77,7 +103,7 @@ static void *squashfs_xz_init(struct squashfs_sb_info *msblk, void *buff,
                goto failed;
        }
-        stream->state = xz_dec_init(XZ_PREALLOC, dict_size);
+        stream->state = xz_dec_init(XZ_PREALLOC, comp_opts->dict_size);
        if (stream->state == NULL) {
                kfree(stream);
                err = -ENOMEM;
@@ -103,42 +129,37 @@ static void squashfs_xz_free(void *strm)
 }
-static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
+static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
-        struct buffer_head **bh, int b, int offset, int length, int srclength,
+        struct buffer_head **bh, int b, int offset, int length,
-        int pages)
+        struct squashfs_page_actor *output)
 {
        enum xz_ret xz_err;
-        int avail, total = 0, k = 0, page = 0;
+        int avail, total = 0, k = 0;
-        struct squashfs_xz *stream = msblk->stream;
+        struct squashfs_xz *stream = strm;
-        mutex_lock(&msblk->read_data_mutex);
        xz_dec_reset(stream->state);
        stream->buf.in_pos = 0;
        stream->buf.in_size = 0;
        stream->buf.out_pos = 0;
        stream->buf.out_size = PAGE_CACHE_SIZE;
-        stream->buf.out = buffer[page++];
+        stream->buf.out = squashfs_first_page(output);
        do {
                if (stream->buf.in_pos == stream->buf.in_size && k < b) {
                        avail = min(length, msblk->devblksize - offset);
                        length -= avail;
-                        wait_on_buffer(bh[k]);
-                        if (!buffer_uptodate(bh[k]))
-                                goto release_mutex;
                        stream->buf.in = bh[k]->b_data + offset;
                        stream->buf.in_size = avail;
                        stream->buf.in_pos = 0;
                        offset = 0;
                }
-                if (stream->buf.out_pos == stream->buf.out_size
+                if (stream->buf.out_pos == stream->buf.out_size) {
-                                                        && page < pages) {
+                        stream->buf.out = squashfs_next_page(output);
-                        stream->buf.out = buffer[page++];
+                        if (stream->buf.out != NULL) {
-                        stream->buf.out_pos = 0;
+                                stream->buf.out_pos = 0;
-                        total += PAGE_CACHE_SIZE;
+                                total += PAGE_CACHE_SIZE;
+                        }
                }
                xz_err = xz_dec_run(stream->state, &stream->buf);
@@ -147,23 +168,14 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void **buffer,
                        put_bh(bh[k++]);
        } while (xz_err == XZ_OK);
-        if (xz_err != XZ_STREAM_END) {
+        squashfs_finish_page(output);
-                ERROR("xz_dec_run error, data probably corrupt\n");
-                goto release_mutex;
-        }
-        if (k < b) {
-                ERROR("xz_uncompress error, input remaining\n");
-                goto release_mutex;
-        }
-        total += stream->buf.out_pos;
+        if (xz_err != XZ_STREAM_END || k < b)
-        mutex_unlock(&msblk->read_data_mutex);
+                goto out;
-        return total;
-release_mutex:
+        return total + stream->buf.out_pos;
-        mutex_unlock(&msblk->read_data_mutex);
+out:
        for (; k < b; k++)
                put_bh(bh[k]);
@@ -172,6 +184,7 @@ release_mutex:
 const struct squashfs_decompressor squashfs_xz_comp_ops = {
        .init = squashfs_xz_init,
+        .comp_opts = squashfs_xz_comp_opts,
        .free = squashfs_xz_free,
        .decompress = squashfs_xz_uncompress,
        .id = XZ_COMPRESSION,
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 55d918fd2d86..8727caba6882 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -32,8 +32,9 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs.h"
 #include "decompressor.h"
+#include "page_actor.h"
-static void *zlib_init(struct squashfs_sb_info *dummy, void *buff, int len)
+static void *zlib_init(struct squashfs_sb_info *dummy, void *buff)
 {
        z_stream *stream = kmalloc(sizeof(z_stream), GFP_KERNEL);
        if (stream == NULL)
@@ -61,44 +62,37 @@ static void zlib_free(void *strm)
 }
-static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
+static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
-        struct buffer_head **bh, int b, int offset, int length, int srclength,
+        struct buffer_head **bh, int b, int offset, int length,
-        int pages)
+        struct squashfs_page_actor *output)
 {
-        int zlib_err, zlib_init = 0;
+        int zlib_err, zlib_init = 0, k = 0;
-        int k = 0, page = 0;
+        z_stream *stream = strm;
-        z_stream *stream = msblk->stream;
-        mutex_lock(&msblk->read_data_mutex);
-        stream->avail_out = 0;
+        stream->avail_out = PAGE_CACHE_SIZE;
+        stream->next_out = squashfs_first_page(output);
        stream->avail_in = 0;
        do {
                if (stream->avail_in == 0 && k < b) {
                        int avail = min(length, msblk->devblksize - offset);
                        length -= avail;
-                        wait_on_buffer(bh[k]);
-                        if (!buffer_uptodate(bh[k]))
-                                goto release_mutex;
                        stream->next_in = bh[k]->b_data + offset;
                        stream->avail_in = avail;
                        offset = 0;
                }
-                if (stream->avail_out == 0 && page < pages) {
+                if (stream->avail_out == 0) {
-                        stream->next_out = buffer[page++];
+                        stream->next_out = squashfs_next_page(output);
-                        stream->avail_out = PAGE_CACHE_SIZE;
+                        if (stream->next_out != NULL)
+                                stream->avail_out = PAGE_CACHE_SIZE;
                }
                if (!zlib_init) {
                        zlib_err = zlib_inflateInit(stream);
                        if (zlib_err != Z_OK) {
-                                ERROR("zlib_inflateInit returned unexpected "
+                                squashfs_finish_page(output);
-                                        "result 0x%x, srclength %d\n",
+                                goto out;
-                                        zlib_err, srclength);
-                                goto release_mutex;
                        }
                        zlib_init = 1;
                }
@@ -109,29 +103,21 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void **buffer,
                        put_bh(bh[k++]);
        } while (zlib_err == Z_OK);
-        if (zlib_err != Z_STREAM_END) {
+        squashfs_finish_page(output);
-                ERROR("zlib_inflate error, data probably corrupt\n");
-                goto release_mutex;
-        }
-        zlib_err = zlib_inflateEnd(stream);
+        if (zlib_err != Z_STREAM_END)
-        if (zlib_err != Z_OK) {
+                goto out;
-                ERROR("zlib_inflate error, data probably corrupt\n");
-                goto release_mutex;
-        }
-        if (k < b) {
+        zlib_err = zlib_inflateEnd(stream);
-                ERROR("zlib_uncompress error, data remaining\n");
+        if (zlib_err != Z_OK)
-                goto release_mutex;
+                goto out;
-        }
-        length = stream->total_out;
+        if (k < b)
-        mutex_unlock(&msblk->read_data_mutex);
+                goto out;
-        return length;
-release_mutex:
+        return stream->total_out;
-        mutex_unlock(&msblk->read_data_mutex);
+out:
        for (; k < b; k++)
                put_bh(bh[k]);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 1c02da8bb7df..3ef11b22e750 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -1137,6 +1137,7 @@ xfs_bmap_add_attrfork(
        int                     committed;      /* xaction was committed */
        int                     logflags;       /* logging flags */
        int                     error;          /* error return value */
+        int                     cancel_flags = 0;
        ASSERT(XFS_IFORK_Q(ip) == 0);
@@ -1147,19 +1148,20 @@ xfs_bmap_add_attrfork(
        if (rsvd)
                tp->t_flags |= XFS_TRANS_RESERVE;
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0);
-        if (error)
+        if (error) {
-                goto error0;
+                xfs_trans_cancel(tp, 0);
+                return error;
+        }
+        cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
                        XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
                        XFS_QMOPT_RES_REGBLKS);
-        if (error) {
+        if (error)
-                xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                goto trans_cancel;
-                xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES);
+        cancel_flags |= XFS_TRANS_ABORT;
-                return error;
-        }
        if (XFS_IFORK_Q(ip))
-                goto error1;
+                goto trans_cancel;
        if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {
                /*
                 * For inodes coming from pre-6.2 filesystems.
@@ -1169,7 +1171,7 @@ xfs_bmap_add_attrfork(
        }
        ASSERT(ip->i_d.di_anextents == 0);
-        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+        xfs_trans_ijoin(tp, ip, 0);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
        switch (ip->i_d.di_format) {
@@ -1191,7 +1193,7 @@ xfs_bmap_add_attrfork(
        default:
                ASSERT(0);
                error = XFS_ERROR(EINVAL);
-                goto error1;
+                goto trans_cancel;
        }
        ASSERT(ip->i_afp == NULL);
@@ -1219,7 +1221,7 @@ xfs_bmap_add_attrfork(
        if (logflags)
                xfs_trans_log_inode(tp, ip, logflags);
        if (error)
-                goto error2;
+                goto bmap_cancel;
        if (!xfs_sb_version_hasattr(&mp->m_sb) ||
           (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {
                __int64_t sbfields = 0;
@@ -1242,14 +1244,16 @@ xfs_bmap_add_attrfork(
        error = xfs_bmap_finish(&tp, &flist, &committed);
        if (error)
-                goto error2;
+                goto bmap_cancel;
-        return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-error2:
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        return error;
+bmap_cancel:
        xfs_bmap_cancel(&flist);
-error1:
+trans_cancel:
+        xfs_trans_cancel(tp, cancel_flags);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-error0:
-        xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
        return error;
 }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da88f167af78..02df7b408a26 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -41,6 +41,7 @@
 #include "xfs_fsops.h"
 #include "xfs_trace.h"
 #include "xfs_icache.h"
+#include "xfs_dinode.h"
 #ifdef HAVE_PERCPU_SB
@@ -718,8 +719,22 @@ xfs_mountfs(
         * Set the inode cluster size.
         * This may still be overridden by the file system
         * block size if it is larger than the chosen cluster size.
+         *
+         * For v5 filesystems, scale the cluster size with the inode size to
+         * keep a constant ratio of inode per cluster buffer, but only if mkfs
+         * has set the inode alignment value appropriately for larger cluster
+         * sizes.
         */
        mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE;
+        if (xfs_sb_version_hascrc(&mp->m_sb)) {
+                int     new_size = mp->m_inode_cluster_size;
+                new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
+                if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
+                        mp->m_inode_cluster_size = new_size;
+                xfs_info(mp, "Using inode cluster size of %d bytes",
+                         mp->m_inode_cluster_size);
+        }
        /*
         * Set inode alignment fields
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1d8101a10d8e..a466c5e5826e 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -112,7 +112,7 @@ typedef struct xfs_mount {
        __uint8_t               m_blkbb_log;    /* blocklog - BBSHIFT */
        __uint8_t               m_agno_log;     /* log #ag's */
        __uint8_t               m_agino_log;    /* #bits for agino in inum */
-        __uint16_t              m_inode_cluster_size;/* min inode buf size */
+        uint                    m_inode_cluster_size;/* min inode buf size */
        uint                    m_blockmask;    /* sb_blocksize-1 */
        uint                    m_blockwsize;   /* sb_blocksize in words */
        uint                    m_blockwmask;   /* blockwsize-1 */
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 1bba7f60d94c..50c3f5614288 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -111,12 +111,14 @@ xfs_trans_log_inode(
        /*
         * First time we log the inode in a transaction, bump the inode change
-         * counter if it is configured for this to occur.
+         * counter if it is configured for this to occur. We don't use
+         * inode_inc_version() because there is no need for extra locking around
+         * i_version as we already hold the inode locked exclusively for
+         * metadata modification.
         */
        if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) &&
            IS_I_VERSION(VFS_I(ip))) {
-                inode_inc_iversion(VFS_I(ip));
+                ip->i_d.di_changecount = ++VFS_I(ip)->i_version;
-                ip->i_d.di_changecount = VFS_I(ip)->i_version;
                flags |= XFS_ILOG_CORE;
        }
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c
index d53d9f0627a7..2fd59c0dae66 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/xfs_trans_resv.c
@@ -385,8 +385,7 @@ xfs_calc_ifree_reservation(
                xfs_calc_inode_res(mp, 1) +
                xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
                xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
-                MAX((__uint16_t)XFS_FSB_TO_B(mp, 1),
+                max_t(uint, XFS_FSB_TO_B(mp, 1), XFS_INODE_CLUSTER_SIZE(mp)) +
-                    XFS_INODE_CLUSTER_SIZE(mp)) +
                xfs_calc_buf_res(1, 0) +
                xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) +
                                 mp->m_in_maxlevels, 0) +