85 files changed, 803 insertions, 585 deletions
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 5e376bb93419..8defc6b3f9a2 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -40,7 +40,7 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
        int block, off;
        inode = iget_locked(sb, ino);
-        if (IS_ERR(inode))
+        if (!inode)
                return ERR_PTR(-ENOMEM);
        if (!(inode->i_state & I_NEW))
                return inode;
diff --git a/fs/bio.c b/fs/bio.c
index 94bbc04dba77..c5eae7251490 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1045,12 +1045,22 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
 int bio_uncopy_user(struct bio *bio)
 {
        struct bio_map_data *bmd = bio->bi_private;
-        int ret = 0;
+        struct bio_vec *bvec;
+        int ret = 0, i;
-        if (!bio_flagged(bio, BIO_NULL_MAPPED))
+        if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
-                ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
+                /*
-                                     bmd->nr_sgvecs, bio_data_dir(bio) == READ,
+                 * if we're in a workqueue, the request is orphaned, so
-                                     0, bmd->is_our_pages);
+                 * don't copy into a random user address space, just free.
+                 */
+                if (current->mm)
+                        ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
+                                             bmd->nr_sgvecs, bio_data_dir(bio) == READ,
+                                             0, bmd->is_our_pages);
+                else if (bmd->is_our_pages)
+                        bio_for_each_segment_all(bvec, bio, i)
+                                __free_page(bvec->bv_page);
+        }
        bio_free_map_data(bmd);
        bio_put(bio);
        return ret;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index eaf133384a8f..8bc5e8ccb091 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -36,16 +36,23 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb,
                                u64 extent_item_pos,
                                struct extent_inode_elem **eie)
 {
-        u64 data_offset;
+        u64 offset = 0;
-        u64 data_len;
        struct extent_inode_elem *e;
-        data_offset = btrfs_file_extent_offset(eb, fi);
+        if (!btrfs_file_extent_compression(eb, fi) &&
-        data_len = btrfs_file_extent_num_bytes(eb, fi);
+            !btrfs_file_extent_encryption(eb, fi) &&
+            !btrfs_file_extent_other_encoding(eb, fi)) {
+                u64 data_offset;
+                u64 data_len;
-        if (extent_item_pos < data_offset ||
+                data_offset = btrfs_file_extent_offset(eb, fi);
-            extent_item_pos >= data_offset + data_len)
+                data_len = btrfs_file_extent_num_bytes(eb, fi);
-                return 1;
+                if (extent_item_pos < data_offset ||
+                    extent_item_pos >= data_offset + data_len)
+                        return 1;
+                offset = extent_item_pos - data_offset;
+        }
        e = kmalloc(sizeof(*e), GFP_NOFS);
        if (!e)
@@ -53,7 +60,7 @@ static int check_extent_in_eb(struct btrfs_key *key, struct extent_buffer *eb,
        e->next = *eie;
        e->inum = key->objectid;
-        e->offset = key->offset + (extent_item_pos - data_offset);
+        e->offset = key->offset + offset;
        *eie = e;
        return 0;
@@ -189,7 +196,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
        struct extent_buffer *eb;
        struct btrfs_key key;
        struct btrfs_file_extent_item *fi;
-        struct extent_inode_elem *eie = NULL;
+        struct extent_inode_elem *eie = NULL, *old = NULL;
        u64 disk_byte;
        if (level != 0) {
@@ -223,6 +230,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
                if (disk_byte == wanted_disk_byte) {
                        eie = NULL;
+                        old = NULL;
                        if (extent_item_pos) {
                                ret = check_extent_in_eb(&key, eb, fi,
                                                *extent_item_pos,
@@ -230,18 +238,20 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
                                if (ret < 0)
                                        break;
                        }
-                        if (!ret) {
+                        if (ret > 0)
-                                ret = ulist_add(parents, eb->start,
+                                goto next;
-                                                (uintptr_t)eie, GFP_NOFS);
+                        ret = ulist_add_merge(parents, eb->start,
-                                if (ret < 0)
+                                              (uintptr_t)eie,
-                                        break;
+                                              (u64 *)&old, GFP_NOFS);
-                                if (!extent_item_pos) {
+                        if (ret < 0)
-                                        ret = btrfs_next_old_leaf(root, path,
+                                break;
-                                                        time_seq);
+                        if (!ret && extent_item_pos) {
-                                        continue;
+                                while (old->next)
-                                }
+                                        old = old->next;
+                                old->next = eie;
                        }
                }
+next:
                ret = btrfs_next_old_item(root, path, time_seq);
        }
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 5bf4c39e2ad6..ed504607d8ec 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1271,7 +1271,6 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
                BUG_ON(!eb_rewin);
        }
-        extent_buffer_get(eb_rewin);
        btrfs_tree_read_unlock(eb);
        free_extent_buffer(eb);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0236de711989..1204c8ef6f32 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7466,6 +7466,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
        int err = 0;
        int ret;
        int level;
+        bool root_dropped = false;
        path = btrfs_alloc_path();
        if (!path) {
@@ -7523,6 +7524,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                while (1) {
                        btrfs_tree_lock(path->nodes[level]);
                        btrfs_set_lock_blocking(path->nodes[level]);
+                        path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
                        ret = btrfs_lookup_extent_info(trans, root,
                                                path->nodes[level]->start,
@@ -7538,6 +7540,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                                break;
                        btrfs_tree_unlock(path->nodes[level]);
+                        path->locks[level] = 0;
                        WARN_ON(wc->refs[level] != 1);
                        level--;
                }
@@ -7552,11 +7555,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
        wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
        while (1) {
-                if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
-                        pr_debug("btrfs: drop snapshot early exit\n");
-                        err = -EAGAIN;
-                        goto out_end_trans;
-                }
                ret = walk_down_tree(trans, root, path, wc);
                if (ret < 0) {
@@ -7584,7 +7582,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                }
                BUG_ON(wc->level == 0);
-                if (btrfs_should_end_transaction(trans, tree_root)) {
+                if (btrfs_should_end_transaction(trans, tree_root) ||
+                    (!for_reloc && btrfs_need_cleaner_sleep(root))) {
                        ret = btrfs_update_root(trans, tree_root,
                                                &root->root_key,
                                                root_item);
@@ -7595,6 +7594,12 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                        }
                        btrfs_end_transaction_throttle(trans, tree_root);
+                        if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
+                                pr_debug("btrfs: drop snapshot early exit\n");
+                                err = -EAGAIN;
+                                goto out_free;
+                        }
                        trans = btrfs_start_transaction(tree_root, 0);
                        if (IS_ERR(trans)) {
                                err = PTR_ERR(trans);
@@ -7639,12 +7644,22 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
                free_extent_buffer(root->commit_root);
                btrfs_put_fs_root(root);
        }
+        root_dropped = true;
 out_end_trans:
        btrfs_end_transaction_throttle(trans, tree_root);
 out_free:
        kfree(wc);
        btrfs_free_path(path);
 out:
+        /*
+         * So if we need to stop dropping the snapshot for whatever reason we
+         * need to make sure to add it back to the dead root list so that we
+         * keep trying to do the work later.  This also cleans up roots if we
+         * don't have it in the radix (like when we recover after a power fail
+         * or unmount) so we don't leak memory.
+         */
+        if (root_dropped == false)
+                btrfs_add_dead_root(root);
        if (err)
                btrfs_std_error(root->fs_info, err);
        return err;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 583d98bd065e..fe443fece851 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4048,7 +4048,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        }
        while (!end) {
-                u64 offset_in_extent;
+                u64 offset_in_extent = 0;
                /* break if the extent we found is outside the range */
                if (em->start >= max || extent_map_end(em) < off)
@@ -4064,9 +4064,12 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                /*
                 * record the offset from the start of the extent
-                 * for adjusting the disk offset below
+                 * for adjusting the disk offset below.  Only do this if the
+                 * extent isn't compressed since our in ram offset may be past
+                 * what we have actually allocated on disk.
                 */
-                offset_in_extent = em_start - em->start;
+                if (!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
+                        offset_in_extent = em_start - em->start;
                em_end = extent_map_end(em);
                em_len = em_end - em_start;
                emflags = em->flags;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index a005fe2c072a..8e686a427ce2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -596,20 +596,29 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                if (no_splits)
                        goto next;
-                if (em->block_start < EXTENT_MAP_LAST_BYTE &&
+                if (em->start < start) {
-                    em->start < start) {
                        split->start = em->start;
                        split->len = start - em->start;
-                        split->orig_start = em->orig_start;
-                        split->block_start = em->block_start;
-                        if (compressed)
+                        if (em->block_start < EXTENT_MAP_LAST_BYTE) {
-                                split->block_len = em->block_len;
+                                split->orig_start = em->orig_start;
-                        else
+                                split->block_start = em->block_start;
-                                split->block_len = split->len;
-                        split->ram_bytes = em->ram_bytes;
+                                if (compressed)
-                        split->orig_block_len = max(split->block_len,
+                                        split->block_len = em->block_len;
-                                                    em->orig_block_len);
+                                else
+                                        split->block_len = split->len;
+                                split->orig_block_len = max(split->block_len,
+                                                em->orig_block_len);
+                                split->ram_bytes = em->ram_bytes;
+                        } else {
+                                split->orig_start = split->start;
+                                split->block_len = 0;
+                                split->block_start = em->block_start;
+                                split->orig_block_len = 0;
+                                split->ram_bytes = split->len;
+                        }
                        split->generation = gen;
                        split->bdev = em->bdev;
                        split->flags = flags;
@@ -620,8 +629,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                        split = split2;
                        split2 = NULL;
                }
-                if (em->block_start < EXTENT_MAP_LAST_BYTE &&
+                if (testend && em->start + em->len > start + len) {
-                    testend && em->start + em->len > start + len) {
                        u64 diff = start + len - em->start;
                        split->start = start + len;
@@ -630,18 +638,28 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
                        split->flags = flags;
                        split->compress_type = em->compress_type;
                        split->generation = gen;
-                        split->orig_block_len = max(em->block_len,
+                        if (em->block_start < EXTENT_MAP_LAST_BYTE) {
+                                split->orig_block_len = max(em->block_len,
                                                    em->orig_block_len);
-                        split->ram_bytes = em->ram_bytes;
-                        if (compressed) {
+                                split->ram_bytes = em->ram_bytes;
-                                split->block_len = em->block_len;
+                                if (compressed) {
-                                split->block_start = em->block_start;
+                                        split->block_len = em->block_len;
-                                split->orig_start = em->orig_start;
+                                        split->block_start = em->block_start;
+                                        split->orig_start = em->orig_start;
+                                } else {
+                                        split->block_len = split->len;
+                                        split->block_start = em->block_start
+                                                + diff;
+                                        split->orig_start = em->orig_start;
+                                }
                        } else {
-                                split->block_len = split->len;
+                                split->ram_bytes = split->len;
-                                split->block_start = em->block_start + diff;
+                                split->orig_start = split->start;
-                                split->orig_start = em->orig_start;
+                                split->block_len = 0;
+                                split->block_start = em->block_start;
+                                split->orig_block_len = 0;
                        }
                        ret = add_extent_mapping(em_tree, split, modified);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6d1b93c8aafb..021694c08181 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2166,16 +2166,23 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
                if (btrfs_file_extent_disk_bytenr(leaf, extent) != old->bytenr)
                        continue;
-                extent_offset = btrfs_file_extent_offset(leaf, extent);
+                /*
-                if (key.offset - extent_offset != offset)
+                 * 'offset' refers to the exact key.offset,
+                 * NOT the 'offset' field in btrfs_extent_data_ref, ie.
+                 * (key.offset - extent_offset).
+                 */
+                if (key.offset != offset)
                        continue;
+                extent_offset = btrfs_file_extent_offset(leaf, extent);
                num_bytes = btrfs_file_extent_num_bytes(leaf, extent);
                if (extent_offset >= old->extent_offset + old->offset +
                    old->len || extent_offset + num_bytes <=
                    old->extent_offset + old->offset)
                        continue;
+                ret = 0;
                break;
        }
@@ -2187,7 +2194,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
        backref->root_id = root_id;
        backref->inum = inum;
-        backref->file_pos = offset + extent_offset;
+        backref->file_pos = offset;
        backref->num_bytes = num_bytes;
        backref->extent_offset = extent_offset;
        backref->generation = btrfs_file_extent_generation(leaf, extent);
@@ -2210,7 +2217,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
        new->path = path;
        list_for_each_entry_safe(old, tmp, &new->head, list) {
-                ret = iterate_inodes_from_logical(old->bytenr, fs_info,
+                ret = iterate_inodes_from_logical(old->bytenr +
+                                                  old->extent_offset, fs_info,
                                                  path, record_one_backref,
                                                  old);
                BUG_ON(ret < 0 && ret != -ENOENT);
@@ -4391,9 +4399,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
        int mask = attr->ia_valid;
        int ret;
-        if (newsize == oldsize)
-                return 0;
        /*
         * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
         * special case where we need to update the times despite not having
@@ -5165,14 +5170,31 @@ next:
        }
        /* Reached end of directory/root. Bump pos past the last item. */
-        if (key_type == BTRFS_DIR_INDEX_KEY)
+        ctx->pos++;
-                /*
-                 * 32-bit glibc will use getdents64, but then strtol -
+        /*
-                 * so the last number we can serve is this.
+         * Stop new entries from being returned after we return the last
-                 */
+         * entry.
-                ctx->pos = 0x7fffffff;
+         *
-        else
+         * New directory entries are assigned a strictly increasing
-                ctx->pos++;
+         * offset.  This means that new entries created during readdir
+         * are *guaranteed* to be seen in the future by that readdir.
+         * This has broken buggy programs which operate on names as
+         * they're returned by readdir.  Until we re-use freed offsets
+         * we have this hack to stop new entries from being returned
+         * under the assumption that they'll never reach this huge
+         * offset.
+         *
+         * This is being careful not to overflow 32bit loff_t unless the
+         * last entry requires it because doing so has broken 32bit apps
+         * in the past.
+         */
+        if (key_type == BTRFS_DIR_INDEX_KEY) {
+                if (ctx->pos >= INT_MAX)
+                        ctx->pos = LLONG_MAX;
+                else
+                        ctx->pos = INT_MAX;
+        }
 nopos:
        ret = 0;
 err:
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 4ba2a69a60ad..64a157becbe5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2495,7 +2495,7 @@ again:
                        ret = scrub_extent(sctx, extent_logical, extent_len,
                                           extent_physical, extent_dev, flags,
                                           generation, extent_mirror_num,
-                                           extent_physical);
+                                           extent_logical - logical + physical);
                        if (ret)
                                goto out;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index d58cce77fc6c..af1931a5960d 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -983,12 +983,12 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
 * a dirty root struct and adds it into the list of dead roots that need to
 * be deleted
 */
-int btrfs_add_dead_root(struct btrfs_root *root)
+void btrfs_add_dead_root(struct btrfs_root *root)
 {
        spin_lock(&root->fs_info->trans_lock);
-        list_add_tail(&root->root_list, &root->fs_info->dead_roots);
+        if (list_empty(&root->root_list))
+                list_add_tail(&root->root_list, &root->fs_info->dead_roots);
        spin_unlock(&root->fs_info->trans_lock);
-        return 0;
 }
 /*
@@ -1925,7 +1925,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
        }
        root = list_first_entry(&fs_info->dead_roots,
                        struct btrfs_root, root_list);
-        list_del(&root->root_list);
+        list_del_init(&root->root_list);
        spin_unlock(&fs_info->trans_lock);
        pr_debug("btrfs: cleaner removing %llu\n",
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 005b0375d18c..defbc4269897 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -143,7 +143,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root);
-int btrfs_add_dead_root(struct btrfs_root *root);
+void btrfs_add_dead_root(struct btrfs_root *root);
 int btrfs_defrag_root(struct btrfs_root *root);
 int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2c6791493637..ff60d8978ae2 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3746,8 +3746,9 @@ next_slot:
        }
 log_extents:
+        btrfs_release_path(path);
+        btrfs_release_path(dst_path);
        if (fast_search) {
-                btrfs_release_path(dst_path);
                ret = btrfs_log_changed_extents(trans, root, inode, dst_path);
                if (ret) {
                        err = ret;
@@ -3764,8 +3765,6 @@ log_extents:
        }
        if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
-                btrfs_release_path(path);
-                btrfs_release_path(dst_path);
                ret = log_directory_changes(trans, root, inode, path, dst_path);
                if (ret) {
                        err = ret;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 45e57cc38200..fc6f4f3a1a9d 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -43,17 +43,18 @@ cifs_crypto_shash_md5_allocate(struct TCP_Server_Info *server)
        server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
        if (IS_ERR(server->secmech.md5)) {
                cifs_dbg(VFS, "could not allocate crypto md5\n");
-                return PTR_ERR(server->secmech.md5);
+                rc = PTR_ERR(server->secmech.md5);
+                server->secmech.md5 = NULL;
+                return rc;
        }
        size = sizeof(struct shash_desc) +
                        crypto_shash_descsize(server->secmech.md5);
        server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL);
        if (!server->secmech.sdescmd5) {
-                rc = -ENOMEM;
                crypto_free_shash(server->secmech.md5);
                server->secmech.md5 = NULL;
-                return rc;
+                return -ENOMEM;
        }
        server->secmech.sdescmd5->shash.tfm = server->secmech.md5;
        server->secmech.sdescmd5->shash.flags = 0x0;
@@ -421,7 +422,7 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp)
                if (blobptr + attrsize > blobend)
                        break;
                if (type == NTLMSSP_AV_NB_DOMAIN_NAME) {
-                        if (!attrsize)
+                        if (!attrsize || attrsize >= CIFS_MAX_DOMAINNAME_LEN)
                                break;
                        if (!ses->domainName) {
                                ses->domainName =
@@ -591,6 +592,7 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
 static int crypto_hmacmd5_alloc(struct TCP_Server_Info *server)
 {
+        int rc;
        unsigned int size;
        /* check if already allocated */
@@ -600,7 +602,9 @@ static int crypto_hmacmd5_alloc(struct TCP_Server_Info *server)
        server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
        if (IS_ERR(server->secmech.hmacmd5)) {
                cifs_dbg(VFS, "could not allocate crypto hmacmd5\n");
-                return PTR_ERR(server->secmech.hmacmd5);
+                rc = PTR_ERR(server->secmech.hmacmd5);
+                server->secmech.hmacmd5 = NULL;
+                return rc;
        }
        size = sizeof(struct shash_desc) +
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 4bdd547dbf6f..85ea98d139fc 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -147,18 +147,17 @@ cifs_read_super(struct super_block *sb)
                goto out_no_root;
        }
+        if (cifs_sb_master_tcon(cifs_sb)->nocase)
+                sb->s_d_op = &cifs_ci_dentry_ops;
+        else
+                sb->s_d_op = &cifs_dentry_ops;
        sb->s_root = d_make_root(inode);
        if (!sb->s_root) {
                rc = -ENOMEM;
                goto out_no_root;
        }
-        /* do that *after* d_make_root() - we want NULL ->d_op for root here */
-        if (cifs_sb_master_tcon(cifs_sb)->nocase)
-                sb->s_d_op = &cifs_ci_dentry_ops;
-        else
-                sb->s_d_op = &cifs_dentry_ops;
 #ifdef CONFIG_CIFS_NFSD_EXPORT
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) {
                cifs_dbg(FYI, "export ops supported\n");
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 1fdc37041057..52ca861ed35e 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -44,6 +44,7 @@
 #define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1)
 #define MAX_SERVER_SIZE 15
 #define MAX_SHARE_SIZE 80
+#define CIFS_MAX_DOMAINNAME_LEN 256 /* max domain name length */
 #define MAX_USERNAME_SIZE 256   /* reasonable maximum for current servers */
 #define MAX_PASSWORD_SIZE 512   /* max for windows seems to be 256 wide chars */
@@ -369,6 +370,9 @@ struct smb_version_operations {
        void (*generate_signingkey)(struct TCP_Server_Info *server);
        int (*calc_signature)(struct smb_rqst *rqst,
                                   struct TCP_Server_Info *server);
+        int (*query_mf_symlink)(const unsigned char *path, char *pbuf,
+                        unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb,
+                        unsigned int xid);
 };
 struct smb_version_values {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f7e584d047e2..b29a012bed33 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -497,5 +497,7 @@ void cifs_writev_complete(struct work_struct *work);
 struct cifs_writedata *cifs_writedata_alloc(unsigned int nr_pages,
                                                work_func_t complete);
 void cifs_writedata_release(struct kref *refcount);
+int open_query_close_cifs_symlink(const unsigned char *path, char *pbuf,
+                        unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb,
+                        unsigned int xid);
 #endif                  /* _CIFSPROTO_H */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index fa68813396b5..d67c550c4980 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1675,7 +1675,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                        if (string == NULL)
                                goto out_nomem;
-                        if (strnlen(string, 256) == 256) {
+                        if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN)
+                                        == CIFS_MAX_DOMAINNAME_LEN) {
                                printk(KERN_WARNING "CIFS: domain name too"
                                                    " long\n");
                                goto cifs_parse_mount_err;
@@ -2276,8 +2277,8 @@ cifs_put_smb_ses(struct cifs_ses *ses)
 #ifdef CONFIG_KEYS
-/* strlen("cifs:a:") + INET6_ADDRSTRLEN + 1 */
+/* strlen("cifs:a:") + CIFS_MAX_DOMAINNAME_LEN + 1 */
-#define CIFSCREDS_DESC_SIZE (7 + INET6_ADDRSTRLEN + 1)
+#define CIFSCREDS_DESC_SIZE (7 + CIFS_MAX_DOMAINNAME_LEN + 1)
 /* Populate username and pw fields from keyring if possible */
 static int
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 1e57f36ea1b2..7e36ae34e947 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -647,6 +647,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
                                     oflags, &oplock, &cfile->fid.netfid, xid);
                if (rc == 0) {
                        cifs_dbg(FYI, "posix reopen succeeded\n");
+                        oparms.reconnect = true;
                        goto reopen_success;
                }
                /*
diff --git a/fs/cifs/link.c b/fs/cifs/link.c
index b83c3f5646bd..562044f700e5 100644
--- a/fs/cifs/link.c
+++ b/fs/cifs/link.c
@@ -305,67 +305,89 @@ CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr)
 }
 int
-CIFSCheckMFSymlink(struct cifs_fattr *fattr,
+open_query_close_cifs_symlink(const unsigned char *path, char *pbuf,
-                   const unsigned char *path,
+                        unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb,
-                   struct cifs_sb_info *cifs_sb, unsigned int xid)
+                        unsigned int xid)
 {
        int rc;
        int oplock = 0;
        __u16 netfid = 0;
        struct tcon_link *tlink;
-        struct cifs_tcon *pTcon;
+        struct cifs_tcon *ptcon;
        struct cifs_io_parms io_parms;
-        u8 *buf;
-        char *pbuf;
-        unsigned int bytes_read = 0;
        int buf_type = CIFS_NO_BUFFER;
-        unsigned int link_len = 0;
        FILE_ALL_INFO file_info;
-        if (!CIFSCouldBeMFSymlink(fattr))
-                /* it's not a symlink */
-                return 0;
        tlink = cifs_sb_tlink(cifs_sb);
        if (IS_ERR(tlink))
                return PTR_ERR(tlink);
-        pTcon = tlink_tcon(tlink);
+        ptcon = tlink_tcon(tlink);
-        rc = CIFSSMBOpen(xid, pTcon, path, FILE_OPEN, GENERIC_READ,
+        rc = CIFSSMBOpen(xid, ptcon, path, FILE_OPEN, GENERIC_READ,
                         CREATE_NOT_DIR, &netfid, &oplock, &file_info,
                         cifs_sb->local_nls,
                         cifs_sb->mnt_cifs_flags &
                                CIFS_MOUNT_MAP_SPECIAL_CHR);
-        if (rc != 0)
+        if (rc != 0) {
-                goto out;
+                cifs_put_tlink(tlink);
+                return rc;
+        }
        if (file_info.EndOfFile != cpu_to_le64(CIFS_MF_SYMLINK_FILE_SIZE)) {
-                CIFSSMBClose(xid, pTcon, netfid);
+                CIFSSMBClose(xid, ptcon, netfid);
+                cifs_put_tlink(tlink);
                /* it's not a symlink */
-                goto out;
+                return rc;
        }
-        buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL);
-        if (!buf) {
-                rc = -ENOMEM;
-                goto out;
-        }
-        pbuf = buf;
        io_parms.netfid = netfid;
        io_parms.pid = current->tgid;
-        io_parms.tcon = pTcon;
+        io_parms.tcon = ptcon;
        io_parms.offset = 0;
        io_parms.length = CIFS_MF_SYMLINK_FILE_SIZE;
-        rc = CIFSSMBRead(xid, &io_parms, &bytes_read, &pbuf, &buf_type);
+        rc = CIFSSMBRead(xid, &io_parms, pbytes_read, &pbuf, &buf_type);
-        CIFSSMBClose(xid, pTcon, netfid);
+        CIFSSMBClose(xid, ptcon, netfid);
-        if (rc != 0) {
+        cifs_put_tlink(tlink);
-                kfree(buf);
+        return rc;
+}
+int
+CIFSCheckMFSymlink(struct cifs_fattr *fattr,
+                   const unsigned char *path,
+                   struct cifs_sb_info *cifs_sb, unsigned int xid)
+{
+        int rc = 0;
+        u8 *buf = NULL;
+        unsigned int link_len = 0;
+        unsigned int bytes_read = 0;
+        struct cifs_tcon *ptcon;
+        if (!CIFSCouldBeMFSymlink(fattr))
+                /* it's not a symlink */
+                return 0;
+        buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL);
+        if (!buf) {
+                rc = -ENOMEM;
                goto out;
        }
+        ptcon = tlink_tcon(cifs_sb_tlink(cifs_sb));
+        if ((ptcon->ses) && (ptcon->ses->server->ops->query_mf_symlink))
+                rc = ptcon->ses->server->ops->query_mf_symlink(path, buf,
+                                                 &bytes_read, cifs_sb, xid);
+        else
+                goto out;
+        if (rc != 0)
+                goto out;
+        if (bytes_read == 0) /* not a symlink */
+                goto out;
        rc = CIFSParseMFSymlink(buf, bytes_read, &link_len, NULL);
-        kfree(buf);
        if (rc == -EINVAL) {
                /* it's not a symlink */
                rc = 0;
@@ -381,7 +403,7 @@ CIFSCheckMFSymlink(struct cifs_fattr *fattr,
        fattr->cf_mode |= S_IFLNK | S_IRWXU | S_IRWXG | S_IRWXO;
        fattr->cf_dtype = DT_LNK;
 out:
-        cifs_put_tlink(tlink);
+        kfree(buf);
        return rc;
 }
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index ab8778469394..69d2c826a23b 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -111,6 +111,14 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
                        return;
        }
+        /*
+         * If we know that the inode will need to be revalidated immediately,
+         * then don't create a new dentry for it. We'll end up doing an on
+         * the wire call either way and this spares us an invalidation.
+         */
+        if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL)
+                return;
        dentry = d_alloc(parent, name);
        if (!dentry)
                return;
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 79358e341fd2..08dd37bb23aa 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -197,7 +197,7 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses,
                bytes_ret = 0;
        } else
                bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->domainName,
-                                            256, nls_cp);
+                                            CIFS_MAX_DOMAINNAME_LEN, nls_cp);
        bcc_ptr += 2 * bytes_ret;
        bcc_ptr += 2;  /* account for null terminator */
@@ -255,8 +255,8 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
        /* copy domain */
        if (ses->domainName != NULL) {
-                strncpy(bcc_ptr, ses->domainName, 256);
+                strncpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
-                bcc_ptr += strnlen(ses->domainName, 256);
+                bcc_ptr += strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
        } /* else we will send a null domain name
             so the server will default to its own domain */
        *bcc_ptr = 0;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 6457690731a2..60943978aec3 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -944,6 +944,7 @@ struct smb_version_operations smb1_operations = {
        .mand_lock = cifs_mand_lock,
        .mand_unlock_range = cifs_unlock_range,
        .push_mand_locks = cifs_push_mandatory_locks,
+        .query_mf_symlink = open_query_close_cifs_symlink,
 };
 struct smb_version_values smb1_values = {
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 301b191270b9..4f2300d020c7 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -42,6 +42,7 @@
 static int
 smb2_crypto_shash_allocate(struct TCP_Server_Info *server)
 {
+        int rc;
        unsigned int size;
        if (server->secmech.sdeschmacsha256 != NULL)
@@ -50,7 +51,9 @@ smb2_crypto_shash_allocate(struct TCP_Server_Info *server)
        server->secmech.hmacsha256 = crypto_alloc_shash("hmac(sha256)", 0, 0);
        if (IS_ERR(server->secmech.hmacsha256)) {
                cifs_dbg(VFS, "could not allocate crypto hmacsha256\n");
-                return PTR_ERR(server->secmech.hmacsha256);
+                rc = PTR_ERR(server->secmech.hmacsha256);
+                server->secmech.hmacsha256 = NULL;
+                return rc;
        }
        size = sizeof(struct shash_desc) +
@@ -87,7 +90,9 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
                server->secmech.sdeschmacsha256 = NULL;
                crypto_free_shash(server->secmech.hmacsha256);
                server->secmech.hmacsha256 = NULL;
-                return PTR_ERR(server->secmech.cmacaes);
+                rc = PTR_ERR(server->secmech.cmacaes);
+                server->secmech.cmacaes = NULL;
+                return rc;
        }
        size = sizeof(struct shash_desc) +
diff --git a/fs/dcache.c b/fs/dcache.c
index 87bdb5329c3c..83cfb834db03 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2724,6 +2724,17 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
        return memcpy(buffer, temp, sz);
 }
+char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+        char *end = buffer + buflen;
+        /* these dentries are never renamed, so d_lock is not needed */
+        if (prepend(&end, &buflen, " (deleted)", 11) ||
+            prepend_name(&end, &buflen, &dentry->d_name) ||
+            prepend(&end, &buflen, "/", 1))  
+                end = ERR_PTR(-ENAMETOOLONG);
+        return end;  
+}
 /*
 * Write full pathname from the root of the filesystem into the buffer.
 */
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 4888cb3fdef7..c7c83ff0f752 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -533,8 +533,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove);
 */
 void debugfs_remove_recursive(struct dentry *dentry)
 {
-        struct dentry *child;
+        struct dentry *child, *next, *parent;
-        struct dentry *parent;
        if (IS_ERR_OR_NULL(dentry))
                return;
@@ -544,61 +543,37 @@ void debugfs_remove_recursive(struct dentry *dentry)
                return;
        parent = dentry;
+ down:
        mutex_lock(&parent->d_inode->i_mutex);
+        list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) {
+                if (!debugfs_positive(child))
+                        continue;
-        while (1) {
+                /* perhaps simple_empty(child) makes more sense */
-                /*
-                 * When all dentries under "parent" has been removed,
-                 * walk up the tree until we reach our starting point.
-                 */
-                if (list_empty(&parent->d_subdirs)) {
-                        mutex_unlock(&parent->d_inode->i_mutex);
-                        if (parent == dentry)
-                                break;
-                        parent = parent->d_parent;
-                        mutex_lock(&parent->d_inode->i_mutex);
-                }
-                child = list_entry(parent->d_subdirs.next, struct dentry,
-                                d_u.d_child);
- next_sibling:
-                /*
-                 * If "child" isn't empty, walk down the tree and
-                 * remove all its descendants first.
-                 */
                if (!list_empty(&child->d_subdirs)) {
                        mutex_unlock(&parent->d_inode->i_mutex);
                        parent = child;
-                        mutex_lock(&parent->d_inode->i_mutex);
+                        goto down;
-                        continue;
                }
-                __debugfs_remove(child, parent);
+ up:
-                if (parent->d_subdirs.next == &child->d_u.d_child) {
+                if (!__debugfs_remove(child, parent))
-                        /*
+                        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
-                         * Try the next sibling.
-                         */
-                        if (child->d_u.d_child.next != &parent->d_subdirs) {
-                                child = list_entry(child->d_u.d_child.next,
-                                                   struct dentry,
-                                                   d_u.d_child);
-                                goto next_sibling;
-                        }
-                        /*
-                         * Avoid infinite loop if we fail to remove
-                         * one dentry.
-                         */
-                        mutex_unlock(&parent->d_inode->i_mutex);
-                        break;
-                }
-                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
        }
-        parent = dentry->d_parent;
+        mutex_unlock(&parent->d_inode->i_mutex);
+        child = parent;
+        parent = parent->d_parent;
        mutex_lock(&parent->d_inode->i_mutex);
-        __debugfs_remove(dentry, parent);
+        if (child != dentry) {
+                next = list_entry(child->d_u.d_child.next, struct dentry,
+                                        d_u.d_child);
+                goto up;
+        }
+        if (!__debugfs_remove(child, parent))
+                simple_release_fs(&debugfs_mount, &debugfs_mount_count);
        mutex_unlock(&parent->d_inode->i_mutex);
-        simple_release_fs(&debugfs_mount, &debugfs_mount_count);
 }
 EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 911649a47dd5..812149119fa3 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -686,7 +686,6 @@ static int device_close(struct inode *inode, struct file *file)
           device_remove_lockspace() */
        sigprocmask(SIG_SETMASK, &tmpsig, NULL);
-        recalc_sigpending();
        return 0;
 }
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index f3913eb2c474..d15ccf20f1b3 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -57,7 +57,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
        struct inode *inode;
        inode = iget_locked(super, ino);
-        if (IS_ERR(inode))
+        if (!inode)
                return ERR_PTR(-ENOMEM);
        if (!(inode->i_state & I_NEW))
                return inode;
diff --git a/fs/exec.c b/fs/exec.c
index 9c73def87642..fd774c7cb483 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -608,7 +608,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
                return -ENOMEM;
        lru_add_drain();
-        tlb_gather_mmu(&tlb, mm, 0);
+        tlb_gather_mmu(&tlb, mm, old_start, old_end);
        if (new_end > old_start) {
                /*
                 * when the old and new regions overlap clear from new_end.
@@ -625,7 +625,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
                free_pgd_range(&tlb, old_start, old_end, new_end,
                        vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
        }
-        tlb_finish_mmu(&tlb, new_end, old_end);
+        tlb_finish_mmu(&tlb, old_start, old_end);
        /*
         * Shrink the vma to just the new range.  Always succeeds.
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 998ea111e537..1194b1f0f839 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1780,11 +1780,11 @@ retry:
                inode->i_op = &ext3_file_inode_operations;
                inode->i_fop = &ext3_file_operations;
                ext3_set_aops(inode);
+                d_tmpfile(dentry, inode);
                err = ext3_orphan_add(handle, inode);
                if (err)
                        goto err_drop_inode;
                mark_inode_dirty(inode);
-                d_tmpfile(dentry, inode);
                unlock_new_inode(inode);
        }
        ext3_journal_stop(handle);
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 58339393fa6e..ddd715e42a5c 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -38,8 +38,8 @@ ext4_group_t ext4_get_group_number(struct super_block *sb,
        ext4_group_t group;
        if (test_opt2(sb, STD_GROUP_SIZE))
-                group = (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
+                group = (block -
-                         block) >>
+                         le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) >>
                        (EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3);
        else
                ext4_get_group_no_and_offset(sb, block, &group, NULL);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b577e45425b0..0ab26fbf3380 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2086,6 +2086,7 @@ extern int  ext4_sync_inode(handle_t *, struct inode *);
 extern void ext4_dirty_inode(struct inode *, int);
 extern int ext4_change_inode_journal_flag(struct inode *, int);
 extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
+extern int ext4_inode_attach_jinode(struct inode *inode);
 extern int ext4_can_truncate(struct inode *inode);
 extern void ext4_truncate(struct inode *);
 extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 72a3600aedbd..17ac112ab101 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -255,10 +255,10 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
        set_buffer_prio(bh);
        if (ext4_handle_valid(handle)) {
                err = jbd2_journal_dirty_metadata(handle, bh);
-                if (err) {
+                /* Errors can only happen if there is a bug */
-                        /* Errors can only happen if there is a bug */
+                if (WARN_ON_ONCE(err)) {
-                        handle->h_err = err;
+                        ext4_journal_abort_handle(where, line, __func__, bh,
-                        __ext4_journal_stop(where, line, handle);
+                                                  handle, err);
                }
        } else {
                if (inode)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7097b0f680e6..72ba4705d4fa 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2835,6 +2835,9 @@ again:
                                err = -EIO;
                                break;
                        }
+                        /* Yield here to deal with large extent trees.
+                         * Should be a no-op if we did IO above. */
+                        cond_resched();
                        if (WARN_ON(i + 1 > depth)) {
                                err = -EIO;
                                break;
@@ -4261,8 +4264,8 @@ got_allocated_blocks:
                /* not a good idea to call discard here directly,
                 * but otherwise we'd need to call it every free() */
                ext4_discard_preallocations(inode);
-                ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex),
+                ext4_free_blocks(handle, inode, NULL, newblock,
-                                 ext4_ext_get_actual_len(&newex), fb_flags);
+                                 EXT4_C2B(sbi, allocated_clusters), fb_flags);
                goto out2;
        }
@@ -4382,8 +4385,9 @@ out2:
        }
 out3:
-        trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated);
+        trace_ext4_ext_map_blocks_exit(inode, flags, map,
+                                       err ? err : allocated);
+        ext4_es_lru_add(inode);
        return err ? err : allocated;
 }
@@ -4405,9 +4409,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode)
        last_block = (inode->i_size + sb->s_blocksize - 1)
                        >> EXT4_BLOCK_SIZE_BITS(sb);
+retry:
        err = ext4_es_remove_extent(inode, last_block,
                                    EXT_MAX_BLOCKS - last_block);
+        if (err == -ENOMEM) {
+                cond_resched();
+                congestion_wait(BLK_RW_ASYNC, HZ/50);
+                goto retry;
+        }
+        if (err) {
+                ext4_std_error(inode->i_sb, err);
+                return;
+        }
        err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
+        ext4_std_error(inode->i_sb, err);
 }
 static void ext4_falloc_update_inode(struct inode *inode,
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index ee018d5f397e..91cb110da1b4 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -148,6 +148,8 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
                              ext4_lblk_t end);
 static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
                                       int nr_to_scan);
+static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
+                            struct ext4_inode_info *locked_ei);
 int __init ext4_init_es(void)
 {
@@ -439,7 +441,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
                 */
                if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
                        if (in_range(es->es_lblk, ee_block, ee_len)) {
-                                pr_warn("ES insert assertation failed for "
+                                pr_warn("ES insert assertion failed for "
                                        "inode: %lu we can find an extent "
                                        "at block [%d/%d/%llu/%c], but we "
                                        "want to add an delayed/hole extent "
@@ -458,7 +460,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
                 */
                if (es->es_lblk < ee_block ||
                    ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
-                        pr_warn("ES insert assertation failed for inode: %lu "
+                        pr_warn("ES insert assertion failed for inode: %lu "
                                "ex_status [%d/%d/%llu/%c] != "
                                "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
                                ee_block, ee_len, ee_start,
@@ -468,7 +470,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
                }
                if (ee_status ^ es_status) {
-                        pr_warn("ES insert assertation failed for inode: %lu "
+                        pr_warn("ES insert assertion failed for inode: %lu "
                                "ex_status [%d/%d/%llu/%c] != "
                                "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
                                ee_block, ee_len, ee_start,
@@ -481,7 +483,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
                 * that we don't want to add an written/unwritten extent.
                 */
                if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
-                        pr_warn("ES insert assertation failed for inode: %lu "
+                        pr_warn("ES insert assertion failed for inode: %lu "
                                "can't find an extent at block %d but we want "
                                "to add an written/unwritten extent "
                                "[%d/%d/%llu/%llx]\n", inode->i_ino,
@@ -519,7 +521,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
                         * We want to add a delayed/hole extent but this
                         * block has been allocated.
                         */
-                        pr_warn("ES insert assertation failed for inode: %lu "
+                        pr_warn("ES insert assertion failed for inode: %lu "
                                "We can find blocks but we want to add a "
                                "delayed/hole extent [%d/%d/%llu/%llx]\n",
                                inode->i_ino, es->es_lblk, es->es_len,
@@ -527,13 +529,13 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
                        return;
                } else if (ext4_es_is_written(es)) {
                        if (retval != es->es_len) {
-                                pr_warn("ES insert assertation failed for "
+                                pr_warn("ES insert assertion failed for "
                                        "inode: %lu retval %d != es_len %d\n",
                                        inode->i_ino, retval, es->es_len);
                                return;
                        }
                        if (map.m_pblk != ext4_es_pblock(es)) {
-                                pr_warn("ES insert assertation failed for "
+                                pr_warn("ES insert assertion failed for "
                                        "inode: %lu m_pblk %llu != "
                                        "es_pblk %llu\n",
                                        inode->i_ino, map.m_pblk,
@@ -549,7 +551,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode,
                }
        } else if (retval == 0) {
                if (ext4_es_is_written(es)) {
-                        pr_warn("ES insert assertation failed for inode: %lu "
+                        pr_warn("ES insert assertion failed for inode: %lu "
                                "We can't find the block but we want to add "
                                "an written extent [%d/%d/%llu/%llx]\n",
                                inode->i_ino, es->es_lblk, es->es_len,
@@ -632,10 +634,8 @@ out:
 }
 /*
- * ext4_es_insert_extent() adds a space to a extent status tree.
+ * ext4_es_insert_extent() adds information to an inode's extent
- *
+ * status tree.
- * ext4_es_insert_extent is called by ext4_da_write_begin and
- * ext4_es_remove_extent.
 *
 * Return 0 on success, error code on failure.
 */
@@ -667,7 +667,13 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
        err = __es_remove_extent(inode, lblk, end);
        if (err != 0)
                goto error;
+retry:
        err = __es_insert_extent(inode, &newes);
+        if (err == -ENOMEM && __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
+                                               EXT4_I(inode)))
+                goto retry;
+        if (err == -ENOMEM && !ext4_es_is_delayed(&newes))
+                err = 0;
 error:
        write_unlock(&EXT4_I(inode)->i_es_lock);
@@ -746,8 +752,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
        struct extent_status orig_es;
        ext4_lblk_t len1, len2;
        ext4_fsblk_t block;
-        int err = 0;
+        int err;
+retry:
+        err = 0;
        es = __es_tree_search(&tree->root, lblk);
        if (!es)
                goto out;
@@ -782,6 +790,10 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
                        if (err) {
                                es->es_lblk = orig_es.es_lblk;
                                es->es_len = orig_es.es_len;
+                                if ((err == -ENOMEM) &&
+                                    __ext4_es_shrink(EXT4_SB(inode->i_sb), 1,
+                                                     EXT4_I(inode)))
+                                        goto retry;
                                goto out;
                        }
                } else {
@@ -891,22 +903,14 @@ static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a,
                return -1;
 }
-static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
+static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
+                            struct ext4_inode_info *locked_ei)
 {
-        struct ext4_sb_info *sbi = container_of(shrink,
-                                        struct ext4_sb_info, s_es_shrinker);
        struct ext4_inode_info *ei;
        struct list_head *cur, *tmp;
        LIST_HEAD(skiped);
-        int nr_to_scan = sc->nr_to_scan;
        int ret, nr_shrunk = 0;
-        ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
-        trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
-        if (!nr_to_scan)
-                return ret;
        spin_lock(&sbi->s_es_lru_lock);
        /*
@@ -935,7 +939,7 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
                        continue;
                }
-                if (ei->i_es_lru_nr == 0)
+                if (ei->i_es_lru_nr == 0 || ei == locked_ei)
                        continue;
                write_lock(&ei->i_es_lock);
@@ -954,6 +958,27 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
        list_splice_tail(&skiped, &sbi->s_es_lru);
        spin_unlock(&sbi->s_es_lru_lock);
+        if (locked_ei && nr_shrunk == 0)
+                nr_shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
+        return nr_shrunk;
+}
+static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
+{
+        struct ext4_sb_info *sbi = container_of(shrink,
+                                        struct ext4_sb_info, s_es_shrinker);
+        int nr_to_scan = sc->nr_to_scan;
+        int ret, nr_shrunk;
+        ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
+        trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
+        if (!nr_to_scan)
+                return ret;
+        nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
        ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
        trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
        return ret;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 6f4cc567c382..319c9d26279a 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -219,7 +219,6 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 {
        struct super_block *sb = inode->i_sb;
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
-        struct ext4_inode_info *ei = EXT4_I(inode);
        struct vfsmount *mnt = filp->f_path.mnt;
        struct path path;
        char buf[64], *cp;
@@ -259,22 +258,10 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
         * Set up the jbd2_inode if we are opening the inode for
         * writing and the journal is present
         */
-        if (sbi->s_journal && !ei->jinode && (filp->f_mode & FMODE_WRITE)) {
+        if (filp->f_mode & FMODE_WRITE) {
-                struct jbd2_inode *jinode = jbd2_alloc_inode(GFP_KERNEL);
+                int ret = ext4_inode_attach_jinode(inode);
+                if (ret < 0)
-                spin_lock(&inode->i_lock);
+                        return ret;
-                if (!ei->jinode) {
-                        if (!jinode) {
-                                spin_unlock(&inode->i_lock);
-                                return -ENOMEM;
-                        }
-                        ei->jinode = jinode;
-                        jbd2_journal_init_jbd_inode(ei->jinode, inode);
-                        jinode = NULL;
-                }
-                spin_unlock(&inode->i_lock);
-                if (unlikely(jinode != NULL))
-                        jbd2_free_inode(jinode);
        }
        return dquot_file_open(inode, filp);
 }
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index f03598c6ffd3..8bf5999875ee 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -734,11 +734,8 @@ repeat_in_this_group:
                ino = ext4_find_next_zero_bit((unsigned long *)
                                              inode_bitmap_bh->b_data,
                                              EXT4_INODES_PER_GROUP(sb), ino);
-                if (ino >= EXT4_INODES_PER_GROUP(sb)) {
+                if (ino >= EXT4_INODES_PER_GROUP(sb))
-                        if (++group == ngroups)
+                        goto next_group;
-                                group = 0;
-                        continue;
-                }
                if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
                        ext4_error(sb, "reserved inode found cleared - "
                                   "inode=%lu", ino + 1);
@@ -769,6 +766,9 @@ repeat_in_this_group:
                        goto got; /* we grabbed the inode! */
                if (ino < EXT4_INODES_PER_GROUP(sb))
                        goto repeat_in_this_group;
+next_group:
+                if (++group == ngroups)
+                        group = 0;
        }
        err = -ENOSPC;
        goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0188e65e1f58..c2ca04e67a4f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -465,7 +465,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
        if (es_map->m_lblk != map->m_lblk ||
            es_map->m_flags != map->m_flags ||
            es_map->m_pblk != map->m_pblk) {
-                printk("ES cache assertation failed for inode: %lu "
+                printk("ES cache assertion failed for inode: %lu "
                       "es_cached ex [%d/%d/%llu/%x] != "
                       "found ex [%d/%d/%llu/%x] retval %d flags %x\n",
                       inode->i_ino, es_map->m_lblk, es_map->m_len,
@@ -514,10 +514,9 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                  "logical block %lu\n", inode->i_ino, flags, map->m_len,
                  (unsigned long) map->m_lblk);
-        ext4_es_lru_add(inode);
        /* Lookup extent status tree firstly */
        if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
+                ext4_es_lru_add(inode);
                if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) {
                        map->m_pblk = ext4_es_pblock(&es) +
                                        map->m_lblk - es.es_lblk;
@@ -556,14 +555,13 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
                int ret;
                unsigned long long status;
-#ifdef ES_AGGRESSIVE_TEST
+                if (unlikely(retval != map->m_len)) {
-                if (retval != map->m_len) {
+                        ext4_warning(inode->i_sb,
-                        printk("ES len assertation failed for inode: %lu "
+                                     "ES len assertion failed for inode "
-                               "retval %d != map->m_len %d "
+                                     "%lu: retval %d != map->m_len %d",
-                               "in %s (lookup)\n", inode->i_ino, retval,
+                                     inode->i_ino, retval, map->m_len);
-                               map->m_len, __func__);
+                        WARN_ON(1);
                }
-#endif
                status = map->m_flags & EXT4_MAP_UNWRITTEN ?
                                EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -657,14 +655,13 @@ found:
                int ret;
                unsigned long long status;
-#ifdef ES_AGGRESSIVE_TEST
+                if (unlikely(retval != map->m_len)) {
-                if (retval != map->m_len) {
+                        ext4_warning(inode->i_sb,
-                        printk("ES len assertation failed for inode: %lu "
+                                     "ES len assertion failed for inode "
-                               "retval %d != map->m_len %d "
+                                     "%lu: retval %d != map->m_len %d",
-                               "in %s (allocation)\n", inode->i_ino, retval,
+                                     inode->i_ino, retval, map->m_len);
-                               map->m_len, __func__);
+                        WARN_ON(1);
                }
-#endif
                /*
                 * If the extent has been zeroed out, we don't need to update
@@ -1529,11 +1526,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
                  "logical block %lu\n", inode->i_ino, map->m_len,
                  (unsigned long) map->m_lblk);
-        ext4_es_lru_add(inode);
        /* Lookup extent status tree firstly */
        if (ext4_es_lookup_extent(inode, iblock, &es)) {
+                ext4_es_lru_add(inode);
                if (ext4_es_is_hole(&es)) {
                        retval = 0;
                        down_read((&EXT4_I(inode)->i_data_sem));
@@ -1640,14 +1635,13 @@ add_delayed:
                int ret;
                unsigned long long status;
-#ifdef ES_AGGRESSIVE_TEST
+                if (unlikely(retval != map->m_len)) {
-                if (retval != map->m_len) {
+                        ext4_warning(inode->i_sb,
-                        printk("ES len assertation failed for inode: %lu "
+                                     "ES len assertion failed for inode "
-                               "retval %d != map->m_len %d "
+                                     "%lu: retval %d != map->m_len %d",
-                               "in %s (lookup)\n", inode->i_ino, retval,
+                                     inode->i_ino, retval, map->m_len);
-                               map->m_len, __func__);
+                        WARN_ON(1);
                }
-#endif
                status = map->m_flags & EXT4_MAP_UNWRITTEN ?
                                EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
@@ -2163,7 +2157,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
        mpd->io_submit.io_end->offset =
                                ((loff_t)map->m_lblk) << inode->i_blkbits;
-        while (map->m_len) {
+        do {
                err = mpage_map_one_extent(handle, mpd);
                if (err < 0) {
                        struct super_block *sb = inode->i_sb;
@@ -2201,7 +2195,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
                err = mpage_map_and_submit_buffers(mpd);
                if (err < 0)
                        return err;
-        }
+        } while (map->m_len);
        /* Update on-disk size after IO is submitted */
        disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
@@ -3539,6 +3533,18 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
                   offset;
        }
+        if (offset & (sb->s_blocksize - 1) ||
+            (offset + length) & (sb->s_blocksize - 1)) {
+                /*
+                 * Attach jinode to inode for jbd2 if we do any zeroing of
+                 * partial block
+                 */
+                ret = ext4_inode_attach_jinode(inode);
+                if (ret < 0)
+                        goto out_mutex;
+        }
        first_block_offset = round_up(offset, sb->s_blocksize);
        last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
@@ -3607,6 +3613,31 @@ out_mutex:
        return ret;
 }
+int ext4_inode_attach_jinode(struct inode *inode)
+{
+        struct ext4_inode_info *ei = EXT4_I(inode);
+        struct jbd2_inode *jinode;
+        if (ei->jinode || !EXT4_SB(inode->i_sb)->s_journal)
+                return 0;
+        jinode = jbd2_alloc_inode(GFP_KERNEL);
+        spin_lock(&inode->i_lock);
+        if (!ei->jinode) {
+                if (!jinode) {
+                        spin_unlock(&inode->i_lock);
+                        return -ENOMEM;
+                }
+                ei->jinode = jinode;
+                jbd2_journal_init_jbd_inode(ei->jinode, inode);
+                jinode = NULL;
+        }
+        spin_unlock(&inode->i_lock);
+        if (unlikely(jinode != NULL))
+                jbd2_free_inode(jinode);
+        return 0;
+}
 /*
 * ext4_truncate()
 *
@@ -3667,6 +3698,12 @@ void ext4_truncate(struct inode *inode)
                        return;
        }
+        /* If we zero-out tail of the page, we have to create jinode for jbd2 */
+        if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {
+                if (ext4_inode_attach_jinode(inode) < 0)
+                        return;
+        }
        if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
                credits = ext4_writepage_trans_blocks(inode);
        else
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 9491ac0590f7..c0427e2f6648 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -77,8 +77,10 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
        memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
        memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags));
        memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
-        memswap(&ei1->i_es_tree, &ei2->i_es_tree, sizeof(ei1->i_es_tree));
+        ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
-        memswap(&ei1->i_es_lru_nr, &ei2->i_es_lru_nr, sizeof(ei1->i_es_lru_nr));
+        ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
+        ext4_es_lru_del(inode1);
+        ext4_es_lru_del(inode2);
        isize = i_size_read(inode1);
        i_size_write(inode1, i_size_read(inode2));
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a9ff5e5137ca..4bbbf13bd743 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4740,11 +4740,16 @@ do_more:
                 * blocks being freed are metadata. these blocks shouldn't
                 * be used until this transaction is committed
                 */
+        retry:
                new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
                if (!new_entry) {
-                        ext4_mb_unload_buddy(&e4b);
+                        /*
-                        err = -ENOMEM;
+                         * We use a retry loop because
-                        goto error_return;
+                         * ext4_free_blocks() is not allowed to fail.
+                         */
+                        cond_resched();
+                        congestion_wait(BLK_RW_ASYNC, HZ/50);
+                        goto retry;
                }
                new_entry->efd_start_cluster = bit;
                new_entry->efd_group = block_group;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 234b834d5a97..35f55a0dbc4b 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2316,11 +2316,11 @@ retry:
                inode->i_op = &ext4_file_inode_operations;
                inode->i_fop = &ext4_file_operations;
                ext4_set_aops(inode);
+                d_tmpfile(dentry, inode);
                err = ext4_orphan_add(handle, inode);
                if (err)
                        goto err_drop_inode;
                mark_inode_dirty(inode);
-                d_tmpfile(dentry, inode);
                unlock_new_inode(inode);
        }
        if (handle)
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 48786cdb5e6c..6625d210fb45 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -25,6 +25,7 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
+#include <linux/ratelimit.h>
 #include "ext4_jbd2.h"
 #include "xattr.h"
@@ -55,7 +56,7 @@ void ext4_exit_pageio(void)
 static void buffer_io_error(struct buffer_head *bh)
 {
        char b[BDEVNAME_SIZE];
-        printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
+        printk_ratelimited(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
                        bdevname(bh->b_bdev, b),
                        (unsigned long long)bh->b_blocknr);
 }
@@ -308,6 +309,7 @@ ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
        return io_end;
 }
+/* BIO completion function for page writeback */
 static void ext4_end_bio(struct bio *bio, int error)
 {
        ext4_io_end_t *io_end = bio->bi_private;
@@ -318,18 +320,6 @@ static void ext4_end_bio(struct bio *bio, int error)
        if (test_bit(BIO_UPTODATE, &bio->bi_flags))
                error = 0;
-        if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
-                /*
-                 * Link bio into list hanging from io_end. We have to do it
-                 * atomically as bio completions can be racing against each
-                 * other.
-                 */
-                bio->bi_private = xchg(&io_end->bio, bio);
-        } else {
-                ext4_finish_bio(bio);
-                bio_put(bio);
-        }
        if (error) {
                struct inode *inode = io_end->inode;
@@ -341,7 +331,24 @@ static void ext4_end_bio(struct bio *bio, int error)
                             (unsigned long long)
                             bi_sector >> (inode->i_blkbits - 9));
        }
-        ext4_put_io_end_defer(io_end);
+        if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
+                /*
+                 * Link bio into list hanging from io_end. We have to do it
+                 * atomically as bio completions can be racing against each
+                 * other.
+                 */
+                bio->bi_private = xchg(&io_end->bio, bio);
+                ext4_put_io_end_defer(io_end);
+        } else {
+                /*
+                 * Drop io_end reference early. Inode can get freed once
+                 * we finish the bio.
+                 */
+                ext4_put_io_end_defer(io_end);
+                ext4_finish_bio(bio);
+                bio_put(bio);
+        }
 }
 void ext4_io_submit(struct ext4_io_submit *io)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 85b3dd60169b..b59373b625e9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1359,7 +1359,7 @@ static const struct mount_opts {
        {Opt_delalloc, EXT4_MOUNT_DELALLOC,
         MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
        {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
-         MOPT_EXT4_ONLY | MOPT_CLEAR | MOPT_EXPLICIT},
+         MOPT_EXT4_ONLY | MOPT_CLEAR},
        {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
         MOPT_EXT4_ONLY | MOPT_SET},
        {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
@@ -1702,12 +1702,6 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
        if (sbi->s_qf_names[GRPQUOTA])
                seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
-        if (test_opt(sb, USRQUOTA))
-                seq_puts(seq, ",usrquota");
-        if (test_opt(sb, GRPQUOTA))
-                seq_puts(seq, ",grpquota");
 #endif
 }
@@ -3489,7 +3483,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                }
                if (test_opt(sb, DIOREAD_NOLOCK)) {
                        ext4_msg(sb, KERN_ERR, "can't mount with "
-                                 "both data=journal and delalloc");
+                                 "both data=journal and dioread_nolock");
                        goto failed_mount;
                }
                if (test_opt(sb, DELALLOC))
@@ -3624,10 +3618,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
        sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
-        /* Do we have standard group size of blocksize * 8 blocks ? */
-        if (sbi->s_blocks_per_group == blocksize << 3)
-                set_opt2(sb, STD_GROUP_SIZE);
        for (i = 0; i < 4; i++)
                sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
        sbi->s_def_hash_version = es->s_def_hash_version;
@@ -3697,6 +3687,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                goto failed_mount;
        }
+        /* Do we have standard group size of clustersize * 8 blocks ? */
+        if (sbi->s_blocks_per_group == clustersize << 3)
+                set_opt2(sb, STD_GROUP_SIZE);
        /*
         * Test whether we have more sectors than will fit in sector_t,
         * and whether the max offset is addressable by the page cache.
@@ -4733,6 +4727,21 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
                goto restore_opts;
        }
+        if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+                if (test_opt2(sb, EXPLICIT_DELALLOC)) {
+                        ext4_msg(sb, KERN_ERR, "can't mount with "
+                                 "both data=journal and delalloc");
+                        err = -EINVAL;
+                        goto restore_opts;
+                }
+                if (test_opt(sb, DIOREAD_NOLOCK)) {
+                        ext4_msg(sb, KERN_ERR, "can't mount with "
+                                 "both data=journal and dioread_nolock");
+                        err = -EINVAL;
+                        goto restore_opts;
+                }
+        }
        if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
                ext4_abort(sb, "Abort forced by user");
@@ -5487,6 +5496,7 @@ static void __exit ext4_exit_fs(void)
        kset_unregister(ext4_kset);
        ext4_exit_system_zone();
        ext4_exit_pageio();
+        ext4_exit_es();
 }
 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 6599222536eb..65343c3741ff 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -730,14 +730,14 @@ static int __init fcntl_init(void)
         * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
         * is defined as O_NONBLOCK on some platforms and not on others.
         */
-        BUILD_BUG_ON(19 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+        BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
                O_RDONLY        | O_WRONLY      | O_RDWR        |
                O_CREAT         | O_EXCL        | O_NOCTTY      |
                O_TRUNC         | O_APPEND      | /* O_NONBLOCK | */
                __O_SYNC        | O_DSYNC       | FASYNC        |
                O_DIRECT        | O_LARGEFILE   | O_DIRECTORY   |
                O_NOFOLLOW      | O_NOATIME     | O_CLOEXEC     |
-                __FMODE_EXEC    | O_PATH
+                __FMODE_EXEC    | O_PATH        | __O_TMPFILE
                ));
        fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 0eda52738ec4..72a5d5b04494 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1223,30 +1223,46 @@ static int fuse_direntplus_link(struct file *file,
                if (name.name[1] == '.' && name.len == 2)
                        return 0;
        }
+        if (invalid_nodeid(o->nodeid))
+                return -EIO;
+        if (!fuse_valid_type(o->attr.mode))
+                return -EIO;
        fc = get_fuse_conn(dir);
        name.hash = full_name_hash(name.name, name.len);
        dentry = d_lookup(parent, &name);
-        if (dentry && dentry->d_inode) {
+        if (dentry) {
                inode = dentry->d_inode;
-                if (get_node_id(inode) == o->nodeid) {
+                if (!inode) {
+                        d_drop(dentry);
+                } else if (get_node_id(inode) != o->nodeid ||
+                           ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
+                        err = d_invalidate(dentry);
+                        if (err)
+                                goto out;
+                } else if (is_bad_inode(inode)) {
+                        err = -EIO;
+                        goto out;
+                } else {
                        struct fuse_inode *fi;
                        fi = get_fuse_inode(inode);
                        spin_lock(&fc->lock);
                        fi->nlookup++;
                        spin_unlock(&fc->lock);
+                        fuse_change_attributes(inode, &o->attr,
+                                               entry_attr_timeout(o),
+                                               attr_version);
                        /*
                         * The other branch to 'found' comes via fuse_iget()
                         * which bumps nlookup inside
                         */
                        goto found;
                }
-                err = d_invalidate(dentry);
-                if (err)
-                        goto out;
                dput(dentry);
-                dentry = NULL;
        }
        dentry = d_alloc(parent, &name);
@@ -1259,25 +1275,30 @@ static int fuse_direntplus_link(struct file *file,
        if (!inode)
                goto out;
-        alias = d_materialise_unique(dentry, inode);
+        if (S_ISDIR(inode->i_mode)) {
-        err = PTR_ERR(alias);
+                mutex_lock(&fc->inst_mutex);
-        if (IS_ERR(alias))
+                alias = fuse_d_add_directory(dentry, inode);
-                goto out;
+                mutex_unlock(&fc->inst_mutex);
+                err = PTR_ERR(alias);
+                if (IS_ERR(alias)) {
+                        iput(inode);
+                        goto out;
+                }
+        } else {
+                alias = d_splice_alias(inode, dentry);
+        }
        if (alias) {
                dput(dentry);
                dentry = alias;
        }
 found:
-        fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o),
-                               attr_version);
        fuse_change_entry_timeout(dentry, o);
        err = 0;
 out:
-        if (dentry)
+        dput(dentry);
-                dput(dentry);
        return err;
 }
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 9435384562a2..544a809819c3 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1838,14 +1838,14 @@ int __init gfs2_glock_init(void)
        glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
                                          WQ_HIGHPRI | WQ_FREEZABLE, 0);
-        if (IS_ERR(glock_workqueue))
+        if (!glock_workqueue)
-                return PTR_ERR(glock_workqueue);
+                return -ENOMEM;
        gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
                                                WQ_MEM_RECLAIM | WQ_FREEZABLE,
                                                0);
-        if (IS_ERR(gfs2_delete_workqueue)) {
+        if (!gfs2_delete_workqueue) {
                destroy_workqueue(glock_workqueue);
-                return PTR_ERR(gfs2_delete_workqueue);
+                return -ENOMEM;
        }
        register_shrinker(&glock_shrinker);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 5f2e5224c51c..e2e0a90396e7 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -47,7 +47,8 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
 * None of the buffers should be dirty, locked, or pinned.
 */
-static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
+static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
+                             unsigned int nr_revokes)
 {
        struct gfs2_sbd *sdp = gl->gl_sbd;
        struct list_head *head = &gl->gl_ail_list;
@@ -57,7 +58,9 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
        gfs2_log_lock(sdp);
        spin_lock(&sdp->sd_ail_lock);
-        list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) {
+        list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) {
+                if (nr_revokes == 0)
+                        break;
                bh = bd->bd_bh;
                if (bh->b_state & b_state) {
                        if (fsync)
@@ -65,6 +68,7 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
                        gfs2_ail_error(gl, bh);
                }
                gfs2_trans_add_revoke(sdp, bd);
+                nr_revokes--;
        }
        GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count));
        spin_unlock(&sdp->sd_ail_lock);
@@ -91,7 +95,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
        WARN_ON_ONCE(current->journal_info);
        current->journal_info = &tr;
-        __gfs2_ail_flush(gl, 0);
+        __gfs2_ail_flush(gl, 0, tr.tr_revokes);
        gfs2_trans_end(sdp);
        gfs2_log_flush(sdp, NULL);
@@ -101,15 +105,19 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
 {
        struct gfs2_sbd *sdp = gl->gl_sbd;
        unsigned int revokes = atomic_read(&gl->gl_ail_count);
+        unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
        int ret;
        if (!revokes)
                return;
-        ret = gfs2_trans_begin(sdp, 0, revokes);
+        while (revokes > max_revokes)
+                max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
+        ret = gfs2_trans_begin(sdp, 0, max_revokes);
        if (ret)
                return;
-        __gfs2_ail_flush(gl, fsync);
+        __gfs2_ail_flush(gl, fsync, max_revokes);
        gfs2_trans_end(sdp);
        gfs2_log_flush(sdp, NULL);
 }
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index bbb2715171cd..64915eeae5a7 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -594,7 +594,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
                }
                gfs2_glock_dq_uninit(ghs);
                if (IS_ERR(d))
-                        return PTR_RET(d);
+                        return PTR_ERR(d);
                return error;
        } else if (error != -ENOENT) {
                goto fail_gunlock;
@@ -1750,6 +1750,10 @@ static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name,
        struct gfs2_holder gh;
        int ret;
+        /* For selinux during lookup */
+        if (gfs2_glock_is_locked_by_me(ip->i_gl))
+                return generic_getxattr(dentry, name, data, size);
        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh);
        ret = gfs2_glock_nq(&gh);
        if (ret == 0) {
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index e04d0e09ee7b..7b0f5043cf24 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -155,7 +155,7 @@ static int __init init_gfs2_fs(void)
                goto fail_wq;
        gfs2_control_wq = alloc_workqueue("gfs2_control",
-                               WQ_NON_REENTRANT | WQ_UNBOUND | WQ_FREEZABLE, 0);
+                                          WQ_UNBOUND | WQ_FREEZABLE, 0);
        if (!gfs2_control_wq)
                goto fail_recovery;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a3f868ae3fd4..d19b30ababf1 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -463,6 +463,14 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
        return inode;
 }
+/*
+ * Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never
+ * be taken from reclaim -- unlike regular filesystems. This needs an
+ * annotation because huge_pmd_share() does an allocation under
+ * i_mmap_mutex.
+ */
+struct lock_class_key hugetlbfs_i_mmap_mutex_key;
 static struct inode *hugetlbfs_get_inode(struct super_block *sb,
                                        struct inode *dir,
                                        umode_t mode, dev_t dev)
@@ -474,6 +482,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
                struct hugetlbfs_inode_info *info;
                inode->i_ino = get_next_ino();
                inode_init_owner(inode, dir, mode);
+                lockdep_set_class(&inode->i_mapping->i_mmap_mutex,
+                                &hugetlbfs_i_mmap_mutex_key);
                inode->i_mapping->a_ops = &hugetlbfs_aops;
                inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
                inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
@@ -916,14 +926,8 @@ static int get_hstate_idx(int page_size_log)
        return h - hstates;
 }
-static char *hugetlb_dname(struct dentry *dentry, char *buffer, int buflen)
-{
-        return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)",
-                                dentry->d_name.name);
-}
 static struct dentry_operations anon_ops = {
-        .d_dname = hugetlb_dname
+        .d_dname = simple_dname
 };
 /*
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 01bfe7662751..41e491b8e5d7 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -64,12 +64,17 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
                                   nlm_init->protocol, nlm_version,
                                   nlm_init->hostname, nlm_init->noresvport,
                                   nlm_init->net);
-        if (host == NULL) {
+        if (host == NULL)
-                lockd_down(nlm_init->net);
+                goto out_nohost;
-                return ERR_PTR(-ENOLCK);
+        if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL)
-        }
+                goto out_nobind;
        return host;
+out_nobind:
+        nlmclnt_release_host(host);
+out_nohost:
+        lockd_down(nlm_init->net);
+        return ERR_PTR(-ENOLCK);
 }
 EXPORT_SYMBOL_GPL(nlmclnt_init);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 9760ecb9b60f..acd394716349 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -125,14 +125,15 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
 {
        struct nlm_args *argp = &req->a_args;
        struct nlm_lock *lock = &argp->lock;
+        char *nodename = req->a_host->h_rpcclnt->cl_nodename;
        nlmclnt_next_cookie(&argp->cookie);
        memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh));
-        lock->caller  = utsname()->nodename;
+        lock->caller  = nodename;
        lock->oh.data = req->a_owner;
        lock->oh.len  = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
                                (unsigned int)fl->fl_u.nfs_fl.owner->pid,
-                                utsname()->nodename);
+                                nodename);
        lock->svid = fl->fl_u.nfs_fl.owner->pid;
        lock->fl.fl_start = fl->fl_start;
        lock->fl.fl_end = fl->fl_end;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 067778b0ccc9..e066a3902973 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -951,6 +951,7 @@ nlmsvc_retry_blocked(void)
        unsigned long   timeout = MAX_SCHEDULE_TIMEOUT;
        struct nlm_block *block;
+        spin_lock(&nlm_blocked_lock);
        while (!list_empty(&nlm_blocked) && !kthread_should_stop()) {
                block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
@@ -960,6 +961,7 @@ nlmsvc_retry_blocked(void)
                        timeout = block->b_when - jiffies;
                        break;
                }
+                spin_unlock(&nlm_blocked_lock);
                dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
                        block, block->b_when);
@@ -969,7 +971,9 @@ nlmsvc_retry_blocked(void)
                        retry_deferred_block(block);
                } else
                        nlmsvc_grant_blocked(block);
+                spin_lock(&nlm_blocked_lock);
        }
+        spin_unlock(&nlm_blocked_lock);
        return timeout;
 }
diff --git a/fs/namei.c b/fs/namei.c
index 8b61d103a8a7..89a612e392eb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3671,15 +3671,11 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
        if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
                return -EINVAL;
        /*
-         * To use null names we require CAP_DAC_READ_SEARCH
+         * Using empty names is equivalent to using AT_SYMLINK_FOLLOW
-         * This ensures that not everyone will be able to create
+         * on /proc/self/fd/<fd>.
-         * handlink using the passed filedescriptor.
         */
-        if (flags & AT_EMPTY_PATH) {
+        if (flags & AT_EMPTY_PATH)
-                if (!capable(CAP_DAC_READ_SEARCH))
-                        return -ENOENT;
                how = LOOKUP_EMPTY;
-        }
        if (flags & AT_SYMLINK_FOLLOW)
                how |= LOOKUP_FOLLOW;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b1ca9ba0b0a..a45ba4f267fe 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1429,7 +1429,7 @@ struct vfsmount *collect_mounts(struct path *path)
                         CL_COPY_ALL | CL_PRIVATE);
        namespace_unlock();
        if (IS_ERR(tree))
-                return NULL;
+                return ERR_CAST(tree);
        return &tree->mnt;
 }
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index af6e806044d7..941246f2b43d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -463,7 +463,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
                unlock_new_inode(inode);
        } else
                nfs_refresh_inode(inode, fattr);
-                nfs_setsecurity(inode, fattr, label);
        dprintk("NFS: nfs_fhget(%s/%Ld fh_crc=0x%08x ct=%d)\n",
                inode->i_sb->s_id,
                (long long)NFS_FILEID(inode),
@@ -963,9 +962,15 @@ EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
 static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
-        
+        int ret;
        if (mapping->nrpages != 0) {
-                int ret = invalidate_inode_pages2(mapping);
+                if (S_ISREG(inode->i_mode)) {
+                        ret = nfs_sync_mapping(mapping);
+                        if (ret < 0)
+                                return ret;
+                }
+                ret = invalidate_inode_pages2(mapping);
                if (ret < 0)
                        return ret;
        }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index cf11799297c4..108a774095f7 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3071,15 +3071,13 @@ struct rpc_clnt *
 nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name,
                            struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
+        struct rpc_clnt *client = NFS_CLIENT(dir);
        int status;
-        struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir));
        status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr, NULL);
-        if (status < 0) {
+        if (status < 0)
-                rpc_shutdown_client(client);
                return ERR_PTR(status);
-        }
+        return (client == NFS_CLIENT(dir)) ? rpc_clone_client(client) : client;
-        return client;
 }
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0abfb8466e79..3850b018815f 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -999,6 +999,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
        __be32 *p;
        __be32 *q;
        int len;
+        uint32_t bmval_len = 2;
        uint32_t bmval0 = 0;
        uint32_t bmval1 = 0;
        uint32_t bmval2 = 0;
@@ -1010,7 +1011,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
         * = 40 bytes, plus any contribution from variable-length fields
         *            such as owner/group.
         */
-        len = 20;
+        len = 8;
        /* Sigh */
        if (iap->ia_valid & ATTR_SIZE)
@@ -1040,8 +1041,6 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
                }
                len += 4 + (XDR_QUADLEN(owner_grouplen) << 2);
        }
-        if (label)
-                len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2);
        if (iap->ia_valid & ATTR_ATIME_SET)
                len += 16;
        else if (iap->ia_valid & ATTR_ATIME)
@@ -1050,15 +1049,22 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
                len += 16;
        else if (iap->ia_valid & ATTR_MTIME)
                len += 4;
+        if (label) {
+                len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2);
+                bmval_len = 3;
+        }
+        len += bmval_len << 2;
        p = reserve_space(xdr, len);
        /*
         * We write the bitmap length now, but leave the bitmap and the attribute
         * buffer length to be backfilled at the end of this routine.
         */
-        *p++ = cpu_to_be32(3);
+        *p++ = cpu_to_be32(bmval_len);
        q = p;
-        p += 4;
+        /* Skip bitmap entries + attrlen */
+        p += bmval_len + 1;
        if (iap->ia_valid & ATTR_SIZE) {
                bmval0 |= FATTR4_WORD0_SIZE;
@@ -1112,10 +1118,11 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
                                len, ((char *)p - (char *)q) + 4);
                BUG();
        }
-        len = (char *)p - (char *)q - 16;
        *q++ = htonl(bmval0);
        *q++ = htonl(bmval1);
-        *q++ = htonl(bmval2);
+        if (bmval_len == 3)
+                *q++ = htonl(bmval2);
+        len = (char *)p - (char *)(q + 1);
        *q = htonl(len);
 /* out: */
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 71fdc0dfa0d2..f6db66d8f647 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2478,6 +2478,10 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server,
        if (server->flags & NFS_MOUNT_NOAC)
                sb_mntdata.mntflags |= MS_SYNCHRONOUS;
+        if (mount_info->cloned != NULL && mount_info->cloned->sb != NULL)
+                if (mount_info->cloned->sb->s_flags & MS_SYNCHRONOUS)
+                        sb_mntdata.mntflags |= MS_SYNCHRONOUS;
        /* Get a superblock - note that we may end up sharing one that already exists */
        s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata);
        if (IS_ERR(s)) {
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a7cee864e7b2..419572f33b72 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1293,7 +1293,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
         * According to RFC3010, this takes precedence over all other errors.
         */
        status = nfserr_minor_vers_mismatch;
-        if (args->minorversion > nfsd_supported_minorversion)
+        if (nfsd_minorversion(args->minorversion, NFSD_TEST) <= 0)
                goto out;
        status = nfs41_check_op_ordering(args);
@@ -1524,7 +1524,7 @@ static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
 {
        return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\
-                1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\
+                1 + 1 + 2 + /* eir_flags, spr_how, spo_must_enforce & _allow */\
                2 + /*eir_server_owner.so_minor_id */\
                /* eir_server_owner.so_major_id<> */\
                XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 280acef6f0dc..43f42290e5df 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1264,6 +1264,8 @@ static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp)
        struct svc_cred *cr = &rqstp->rq_cred;
        u32 service;
+        if (!cr->cr_gss_mech)
+                return false;
        service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor);
        return service == RPC_GSS_SVC_INTEGRITY ||
               service == RPC_GSS_SVC_PRIVACY;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0c0f3ea90de5..c2a4701d7286 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3360,7 +3360,8 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
                8 /* eir_clientid */ +
                4 /* eir_sequenceid */ +
                4 /* eir_flags */ +
-                4 /* spr_how (SP4_NONE) */ +
+                4 /* spr_how */ +
+                8 /* spo_must_enforce, spo_must_allow */ +
                8 /* so_minor_id */ +
                4 /* so_major_id.len */ +
                (XDR_QUADLEN(major_id_sz) * 4) +
@@ -3372,8 +3373,6 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
        WRITE32(exid->seqid);
        WRITE32(exid->flags);
-        /* state_protect4_r. Currently only support SP4_NONE */
-        BUG_ON(exid->spa_how != SP4_NONE);
        WRITE32(exid->spa_how);
        switch (exid->spa_how) {
        case SP4_NONE:
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 2bbd94e51efc..30f34ab02137 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -53,7 +53,6 @@ struct readdir_cd {
 extern struct svc_program       nfsd_program;
 extern struct svc_version       nfsd_version2, nfsd_version3,
                                nfsd_version4;
-extern u32                      nfsd_supported_minorversion;
 extern struct mutex             nfsd_mutex;
 extern spinlock_t               nfsd_drc_lock;
 extern unsigned long            nfsd_drc_max_mem;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 6b9f48ca4c25..760c85a6f534 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -116,7 +116,10 @@ struct svc_program		nfsd_program = {
 };
-u32 nfsd_supported_minorversion = 1;
+static bool nfsd_supported_minorversions[NFSD_SUPPORTED_MINOR_VERSION + 1] = {
+        [0] = 1,
+        [1] = 1,
+};
 int nfsd_vers(int vers, enum vers_op change)
 {
@@ -151,15 +154,13 @@ int nfsd_minorversion(u32 minorversion, enum vers_op change)
                return -1;
        switch(change) {
        case NFSD_SET:
-                nfsd_supported_minorversion = minorversion;
+                nfsd_supported_minorversions[minorversion] = true;
                break;
        case NFSD_CLEAR:
-                if (minorversion == 0)
+                nfsd_supported_minorversions[minorversion] = false;
-                        return -1;
-                nfsd_supported_minorversion = minorversion - 1;
                break;
        case NFSD_TEST:
-                return minorversion <= nfsd_supported_minorversion;
+                return nfsd_supported_minorversions[minorversion];
        case NFSD_AVAIL:
                return minorversion <= NFSD_SUPPORTED_MINOR_VERSION;
        }
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8ff6a0019b0b..c827acb0e943 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -830,9 +830,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
                        flags = O_WRONLY|O_LARGEFILE;
        }
        *filp = dentry_open(&path, flags, current_cred());
-        if (IS_ERR(*filp))
+        if (IS_ERR(*filp)) {
                host_err = PTR_ERR(*filp);
-        else {
+                *filp = NULL;
+        } else {
                host_err = ima_file_check(*filp, may_flags);
                if (may_flags & NFSD_MAY_64BIT_COOKIE)
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index dc9a913784ab..2d8be51f90dc 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -345,8 +345,7 @@ static void nilfs_end_bio_write(struct bio *bio, int err)
        if (err == -EOPNOTSUPP) {
                set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
-                bio_put(bio);
+                /* to be detected by nilfs_segbuf_submit_bio() */
-                /* to be detected by submit_seg_bio() */
        }
        if (!uptodate)
@@ -377,12 +376,12 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
        bio->bi_private = segbuf;
        bio_get(bio);
        submit_bio(mode, bio);
+        segbuf->sb_nbio++;
        if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
                bio_put(bio);
                err = -EOPNOTSUPP;
                goto failed;
        }
-        segbuf->sb_nbio++;
        bio_put(bio);
        wi->bio = NULL;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 79736a28d84f..2abf97b2a592 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1757,7 +1757,7 @@ try_again:
                goto out;
        } else if (ret == 1) {
                clusters_need = wc->w_clen;
-                ret = ocfs2_refcount_cow(inode, filp, di_bh,
+                ret = ocfs2_refcount_cow(inode, di_bh,
                                         wc->w_cpos, wc->w_clen, UINT_MAX);
                if (ret) {
                        mlog_errno(ret);
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index eb760d8acd50..30544ce8e9f7 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2153,11 +2153,9 @@ int ocfs2_empty_dir(struct inode *inode)
 {
        int ret;
        struct ocfs2_empty_dir_priv priv = {
-                .ctx.actor = ocfs2_empty_dir_filldir
+                .ctx.actor = ocfs2_empty_dir_filldir,
        };
-        memset(&priv, 0, sizeof(priv));
        if (ocfs2_dir_indexed(inode)) {
                ret = ocfs2_empty_dir_dx(inode, &priv);
                if (ret)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 41000f223ca4..3261d71319ee 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -370,7 +370,7 @@ static int ocfs2_cow_file_pos(struct inode *inode,
        if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
                goto out;
-        return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1);
+        return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
 out:
        return status;
@@ -899,7 +899,7 @@ static int ocfs2_zero_extend_get_range(struct inode *inode,
                zero_clusters = last_cpos - zero_cpos;
        if (needs_cow) {
-                rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos,
+                rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos,
                                        zero_clusters, UINT_MAX);
                if (rc) {
                        mlog_errno(rc);
@@ -2078,7 +2078,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
        *meta_level = 1;
-        ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX);
+        ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
        if (ret)
                mlog_errno(ret);
 out:
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 96f9ac237e86..0a992737dcaf 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -537,7 +537,7 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
        extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
        return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks +
-               ocfs2_quota_trans_credits(sb) + bits_wanted;
+               ocfs2_quota_trans_credits(sb);
 }
 static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index f1fc172175b6..452068b45749 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -69,7 +69,7 @@ static int __ocfs2_move_extent(handle_t *handle,
        u64 ino = ocfs2_metadata_cache_owner(context->et.et_ci);
        u64 old_blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cpos);
-        ret = ocfs2_duplicate_clusters_by_page(handle, context->file, cpos,
+        ret = ocfs2_duplicate_clusters_by_page(handle, inode, cpos,
                                               p_cpos, new_p_cpos, len);
        if (ret) {
                mlog_errno(ret);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 998b17eda09d..a70d604593b6 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -49,7 +49,6 @@
 struct ocfs2_cow_context {
        struct inode *inode;
-        struct file *file;
        u32 cow_start;
        u32 cow_len;
        struct ocfs2_extent_tree data_et;
@@ -66,7 +65,7 @@ struct ocfs2_cow_context {
                            u32 *num_clusters,
                            unsigned int *extent_flags);
        int (*cow_duplicate_clusters)(handle_t *handle,
-                                      struct file *file,
+                                      struct inode *inode,
                                      u32 cpos, u32 old_cluster,
                                      u32 new_cluster, u32 new_len);
 };
@@ -2922,14 +2921,12 @@ static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
 }
 int ocfs2_duplicate_clusters_by_page(handle_t *handle,
-                                     struct file *file,
+                                     struct inode *inode,
                                     u32 cpos, u32 old_cluster,
                                     u32 new_cluster, u32 new_len)
 {
        int ret = 0, partial;
-        struct inode *inode = file_inode(file);
+        struct super_block *sb = inode->i_sb;
-        struct ocfs2_caching_info *ci = INODE_CACHE(inode);
-        struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
        u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
        struct page *page;
        pgoff_t page_index;
@@ -2965,6 +2962,11 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
                        to = map_end & (PAGE_CACHE_SIZE - 1);
                page = find_or_create_page(mapping, page_index, GFP_NOFS);
+                if (!page) {
+                        ret = -ENOMEM;
+                        mlog_errno(ret);
+                        break;
+                }
                /*
                 * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
@@ -2973,13 +2975,6 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
                if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
                        BUG_ON(PageDirty(page));
-                if (PageReadahead(page)) {
-                        page_cache_async_readahead(mapping,
-                                                   &file->f_ra, file,
-                                                   page, page_index,
-                                                   readahead_pages);
-                }
                if (!PageUptodate(page)) {
                        ret = block_read_full_page(page, ocfs2_get_block);
                        if (ret) {
@@ -2999,7 +2994,8 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
                        }
                }
-                ocfs2_map_and_dirty_page(inode, handle, from, to,
+                ocfs2_map_and_dirty_page(inode,
+                                         handle, from, to,
                                         page, 0, &new_block);
                mark_page_accessed(page);
 unlock:
@@ -3015,12 +3011,11 @@ unlock:
 }
 int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
-                                    struct file *file,
+                                    struct inode *inode,
                                    u32 cpos, u32 old_cluster,
                                    u32 new_cluster, u32 new_len)
 {
        int ret = 0;
-        struct inode *inode = file_inode(file);
        struct super_block *sb = inode->i_sb;
        struct ocfs2_caching_info *ci = INODE_CACHE(inode);
        int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
@@ -3145,7 +3140,7 @@ static int ocfs2_replace_clusters(handle_t *handle,
        /*If the old clusters is unwritten, no need to duplicate. */
        if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
-                ret = context->cow_duplicate_clusters(handle, context->file,
+                ret = context->cow_duplicate_clusters(handle, context->inode,
                                                      cpos, old, new, len);
                if (ret) {
                        mlog_errno(ret);
@@ -3423,35 +3418,12 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
        return ret;
 }
-static void ocfs2_readahead_for_cow(struct inode *inode,
-                                    struct file *file,
-                                    u32 start, u32 len)
-{
-        struct address_space *mapping;
-        pgoff_t index;
-        unsigned long num_pages;
-        int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits;
-        if (!file)
-                return;
-        mapping = file->f_mapping;
-        num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT;
-        if (!num_pages)
-                num_pages = 1;
-        index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT;
-        page_cache_sync_readahead(mapping, &file->f_ra, file,
-                                  index, num_pages);
-}
 /*
 * Starting at cpos, try to CoW write_len clusters.  Don't CoW
 * past max_cpos.  This will stop when it runs into a hole or an
 * unrefcounted extent.
 */
 static int ocfs2_refcount_cow_hunk(struct inode *inode,
-                                   struct file *file,
                                   struct buffer_head *di_bh,
                                   u32 cpos, u32 write_len, u32 max_cpos)
 {
@@ -3480,8 +3452,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
        BUG_ON(cow_len == 0);
-        ocfs2_readahead_for_cow(inode, file, cow_start, cow_len);
        context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
        if (!context) {
                ret = -ENOMEM;
@@ -3503,7 +3473,6 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode,
        context->ref_root_bh = ref_root_bh;
        context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
        context->get_clusters = ocfs2_di_get_clusters;
-        context->file = file;
        ocfs2_init_dinode_extent_tree(&context->data_et,
                                      INODE_CACHE(inode), di_bh);
@@ -3532,7 +3501,6 @@ out:
 * clusters between cpos and cpos+write_len are safe to modify.
 */
 int ocfs2_refcount_cow(struct inode *inode,
-                       struct file *file,
                       struct buffer_head *di_bh,
                       u32 cpos, u32 write_len, u32 max_cpos)
 {
@@ -3552,7 +3520,7 @@ int ocfs2_refcount_cow(struct inode *inode,
                        num_clusters = write_len;
                if (ext_flags & OCFS2_EXT_REFCOUNTED) {
-                        ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos,
+                        ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
                                                      num_clusters, max_cpos);
                        if (ret) {
                                mlog_errno(ret);
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 7754608c83a4..6422bbcdb525 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -53,7 +53,7 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
                                          int *credits,
                                          int *ref_blocks);
 int ocfs2_refcount_cow(struct inode *inode,
-                       struct file *filep, struct buffer_head *di_bh,
+                       struct buffer_head *di_bh,
                       u32 cpos, u32 write_len, u32 max_cpos);
 typedef int (ocfs2_post_refcount_func)(struct inode *inode,
@@ -85,11 +85,11 @@ int ocfs2_refcount_cow_xattr(struct inode *inode,
                             u32 cpos, u32 write_len,
                             struct ocfs2_post_refcount *post);
 int ocfs2_duplicate_clusters_by_page(handle_t *handle,
-                                     struct file *file,
+                                     struct inode *inode,
                                     u32 cpos, u32 old_cluster,
                                     u32 new_cluster, u32 new_len);
 int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
-                                    struct file *file,
+                                    struct inode *inode,
                                    u32 cpos, u32 old_cluster,
                                    u32 new_cluster, u32 new_len);
 int ocfs2_cow_sync_writeback(struct super_block *sb,
diff --git a/fs/open.c b/fs/open.c
index 9156cb050d08..7931f76acc2b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -823,7 +823,7 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
        int lookup_flags = 0;
        int acc_mode;
-        if (flags & O_CREAT)
+        if (flags & (O_CREAT | __O_TMPFILE))
                op->mode = (mode & S_IALLUGO) | S_IFREG;
        else
                op->mode = 0;
@@ -844,6 +844,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
                if ((flags & O_TMPFILE_MASK) != O_TMPFILE)
                        return -EINVAL;
                acc_mode = MAY_OPEN | ACC_MODE(flags);
+                if (!(acc_mode & MAY_WRITE))
+                        return -EINVAL;
        } else if (flags & O_PATH) {
                /*
                 * If we have O_PATH in the open flag. Then we
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 75f2890abbd8..0ff80f9b930f 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -230,8 +230,6 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx,
        if (!dir_emit_dots(file, ctx))
                goto out;
-        if (!dir_emit_dots(file, ctx))
-                goto out;
        files = get_files_struct(p);
        if (!files)
                goto out;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 94441a407337..737e15615b04 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -271,7 +271,7 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
                de = next;
        } while (de);
        spin_unlock(&proc_subdir_lock);
-        return 0;
+        return 1;
 }
 int proc_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 229e366598da..e0a790da726d 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -205,7 +205,9 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr
 static int proc_root_readdir(struct file *file, struct dir_context *ctx)
 {
        if (ctx->pos < FIRST_PROCESS_ENTRY) {
-                proc_readdir(file, ctx);
+                int error = proc_readdir(file, ctx);
+                if (unlikely(error <= 0))
+                        return error;
                ctx->pos = FIRST_PROCESS_ENTRY;
        }
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index dbf61f6174f0..107d026f5d6e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -730,8 +730,16 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
         * of how soft-dirty works.
         */
        pte_t ptent = *pte;
-        ptent = pte_wrprotect(ptent);
-        ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
+        if (pte_present(ptent)) {
+                ptent = pte_wrprotect(ptent);
+                ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
+        } else if (is_swap_pte(ptent)) {
+                ptent = pte_swp_clear_soft_dirty(ptent);
+        } else if (pte_file(ptent)) {
+                ptent = pte_file_clear_soft_dirty(ptent);
+        }
        set_pte_at(vma->vm_mm, addr, pte, ptent);
 #endif
 }
@@ -752,14 +760,15 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
        pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        for (; addr != end; pte++, addr += PAGE_SIZE) {
                ptent = *pte;
-                if (!pte_present(ptent))
-                        continue;
                if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
                        clear_soft_dirty(vma, addr, pte);
                        continue;
                }
+                if (!pte_present(ptent))
+                        continue;
                page = vm_normal_page(vma, addr, ptent);
                if (!page)
                        continue;
@@ -859,7 +868,7 @@ typedef struct {
 } pagemap_entry_t;
 struct pagemapread {
-        int pos, len;
+        int pos, len;           /* units: PM_ENTRY_BYTES, not bytes */
        pagemap_entry_t *buffer;
        bool v2;
 };
@@ -867,7 +876,7 @@ struct pagemapread {
 #define PAGEMAP_WALK_SIZE       (PMD_SIZE)
 #define PAGEMAP_WALK_MASK       (PMD_MASK)
-#define PM_ENTRY_BYTES      sizeof(u64)
+#define PM_ENTRY_BYTES      sizeof(pagemap_entry_t)
 #define PM_STATUS_BITS      3
 #define PM_STATUS_OFFSET    (64 - PM_STATUS_BITS)
 #define PM_STATUS_MASK      (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
@@ -930,8 +939,10 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
                flags = PM_PRESENT;
                page = vm_normal_page(vma, addr, pte);
        } else if (is_swap_pte(pte)) {
-                swp_entry_t entry = pte_to_swp_entry(pte);
+                swp_entry_t entry;
+                if (pte_swp_soft_dirty(pte))
+                        flags2 |= __PM_SOFT_DIRTY;
+                entry = pte_to_swp_entry(pte);
                frame = swp_type(entry) |
                        (swp_offset(entry) << MAX_SWAPFILES_SHIFT);
                flags = PM_SWAP;
@@ -1116,8 +1127,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
                goto out_task;
        pm.v2 = soft_dirty_cleared;
-        pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
+        pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
-        pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
+        pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY);
        ret = -ENOMEM;
        if (!pm.buffer)
                goto out_task;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 28503172f2e4..a1a16eb97c7b 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -223,7 +223,7 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz)
 * regions in the 1st kernel pointed to by PT_LOAD entries) into
 * virtually contiguous user-space in ELF layout.
 */
-#ifdef CONFIG_MMU
+#if defined(CONFIG_MMU) && !defined(CONFIG_S390)
 static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
 {
        size_t size = vma->vm_end - vma->vm_start;
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 33532f79b4f7..a958444a75fc 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -19,12 +19,13 @@
 /*
 * LOCKING:
 *
- * We rely on new Alexander Viro's super-block locking.
+ * These guys are evicted from procfs as the very first step in ->kill_sb().
 *
 */
-static int show_version(struct seq_file *m, struct super_block *sb)
+static int show_version(struct seq_file *m, void *unused)
 {
+        struct super_block *sb = m->private;
        char *format;
        if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) {
@@ -66,8 +67,9 @@ static int show_version(struct seq_file *m, struct super_block *sb)
 #define DJP( x ) le32_to_cpu( jp -> x )
 #define JF( x ) ( r -> s_journal -> x )
-static int show_super(struct seq_file *m, struct super_block *sb)
+static int show_super(struct seq_file *m, void *unused)
 {
+        struct super_block *sb = m->private;
        struct reiserfs_sb_info *r = REISERFS_SB(sb);
        seq_printf(m, "state: \t%s\n"
@@ -128,8 +130,9 @@ static int show_super(struct seq_file *m, struct super_block *sb)
        return 0;
 }
-static int show_per_level(struct seq_file *m, struct super_block *sb)
+static int show_per_level(struct seq_file *m, void *unused)
 {
+        struct super_block *sb = m->private;
        struct reiserfs_sb_info *r = REISERFS_SB(sb);
        int level;
@@ -186,8 +189,9 @@ static int show_per_level(struct seq_file *m, struct super_block *sb)
        return 0;
 }
-static int show_bitmap(struct seq_file *m, struct super_block *sb)
+static int show_bitmap(struct seq_file *m, void *unused)
 {
+        struct super_block *sb = m->private;
        struct reiserfs_sb_info *r = REISERFS_SB(sb);
        seq_printf(m, "free_block: %lu\n"
@@ -218,8 +222,9 @@ static int show_bitmap(struct seq_file *m, struct super_block *sb)
        return 0;
 }
-static int show_on_disk_super(struct seq_file *m, struct super_block *sb)
+static int show_on_disk_super(struct seq_file *m, void *unused)
 {
+        struct super_block *sb = m->private;
        struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
        struct reiserfs_super_block *rs = sb_info->s_rs;
        int hash_code = DFL(s_hash_function_code);
@@ -261,8 +266,9 @@ static int show_on_disk_super(struct seq_file *m, struct super_block *sb)
        return 0;
 }
-static int show_oidmap(struct seq_file *m, struct super_block *sb)
+static int show_oidmap(struct seq_file *m, void *unused)
 {
+        struct super_block *sb = m->private;
        struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
        struct reiserfs_super_block *rs = sb_info->s_rs;
        unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize);
@@ -291,8 +297,9 @@ static int show_oidmap(struct seq_file *m, struct super_block *sb)
        return 0;
 }
-static int show_journal(struct seq_file *m, struct super_block *sb)
+static int show_journal(struct seq_file *m, void *unused)
 {
+        struct super_block *sb = m->private;
        struct reiserfs_sb_info *r = REISERFS_SB(sb);
        struct reiserfs_super_block *rs = r->s_rs;
        struct journal_params *jp = &rs->s_v1.s_journal;
@@ -383,92 +390,24 @@ static int show_journal(struct seq_file *m, struct super_block *sb)
        return 0;
 }
-/* iterator */
-static int test_sb(struct super_block *sb, void *data)
-{
-        return data == sb;
-}
-static int set_sb(struct super_block *sb, void *data)
-{
-        return -ENOENT;
-}
-struct reiserfs_seq_private {
-        struct super_block *sb;
-        int (*show) (struct seq_file *, struct super_block *);
-};
-static void *r_start(struct seq_file *m, loff_t * pos)
-{
-        struct reiserfs_seq_private *priv = m->private;
-        loff_t l = *pos;
-        if (l)
-                return NULL;
-        if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, priv->sb)))
-                return NULL;
-        up_write(&priv->sb->s_umount);
-        return priv->sb;
-}
-static void *r_next(struct seq_file *m, void *v, loff_t * pos)
-{
-        ++*pos;
-        if (v)
-                deactivate_super(v);
-        return NULL;
-}
-static void r_stop(struct seq_file *m, void *v)
-{
-        if (v)
-                deactivate_super(v);
-}
-static int r_show(struct seq_file *m, void *v)
-{
-        struct reiserfs_seq_private *priv = m->private;
-        return priv->show(m, v);
-}
-static const struct seq_operations r_ops = {
-        .start = r_start,
-        .next = r_next,
-        .stop = r_stop,
-        .show = r_show,
-};
 static int r_open(struct inode *inode, struct file *file)
 {
-        struct reiserfs_seq_private *priv;
+        return single_open(file, PDE_DATA(inode), 
-        int ret = seq_open_private(file, &r_ops,
+                                proc_get_parent_data(inode));
-                                   sizeof(struct reiserfs_seq_private));
-        if (!ret) {
-                struct seq_file *m = file->private_data;
-                priv = m->private;
-                priv->sb = proc_get_parent_data(inode);
-                priv->show = PDE_DATA(inode);
-        }
-        return ret;
 }
 static const struct file_operations r_file_operations = {
        .open = r_open,
        .read = seq_read,
        .llseek = seq_lseek,
-        .release = seq_release_private,
+        .release = single_release,
-        .owner = THIS_MODULE,
 };
 static struct proc_dir_entry *proc_info_root = NULL;
 static const char proc_info_root_name[] = "fs/reiserfs";
 static void add_file(struct super_block *sb, char *name,
-                     int (*func) (struct seq_file *, struct super_block *))
+                     int (*func) (struct seq_file *, void *))
 {
        proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
                         &r_file_operations, func);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f8a23c3078f8..e2e202a07b31 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -499,6 +499,7 @@ int remove_save_link(struct inode *inode, int truncate)
 static void reiserfs_kill_sb(struct super_block *s)
 {
        if (REISERFS_SB(s)) {
+                reiserfs_proc_info_done(s);
                /*
                 * Force any pending inode evictions to occur now. Any
                 * inodes to be removed that have extended attributes
@@ -554,8 +555,6 @@ static void reiserfs_put_super(struct super_block *s)
                                 REISERFS_SB(s)->reserved_blocks);
        }
-        reiserfs_proc_info_done(s);
        reiserfs_write_unlock(s);
        mutex_destroy(&REISERFS_SB(s)->lock);
        kfree(s->s_fs_info);
diff --git a/fs/super.c b/fs/super.c
index 7465d4364208..68307c029228 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -336,19 +336,19 @@ EXPORT_SYMBOL(deactivate_super);
 *      and want to turn it into a full-blown active reference.  grab_super()
 *      is called with sb_lock held and drops it.  Returns 1 in case of
 *      success, 0 if we had failed (superblock contents was already dead or
- *      dying when grab_super() had been called).
+ *      dying when grab_super() had been called).  Note that this is only
+ *      called for superblocks not in rundown mode (== ones still on ->fs_supers
+ *      of their type), so increment of ->s_count is OK here.
 */
 static int grab_super(struct super_block *s) __releases(sb_lock)
 {
-        if (atomic_inc_not_zero(&s->s_active)) {
-                spin_unlock(&sb_lock);
-                return 1;
-        }
-        /* it's going away */
        s->s_count++;
        spin_unlock(&sb_lock);
-        /* wait for it to die */
        down_write(&s->s_umount);
+        if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) {
+                put_super(s);
+                return 1;
+        }
        up_write(&s->s_umount);
        put_super(s);
        return 0;
@@ -463,11 +463,6 @@ retry:
                                destroy_super(s);
                                s = NULL;
                        }
-                        down_write(&old->s_umount);
-                        if (unlikely(!(old->s_flags & MS_BORN))) {
-                                deactivate_locked_super(old);
-                                goto retry;
-                        }
                        return old;
                }
        }
@@ -660,10 +655,10 @@ restart:
                if (hlist_unhashed(&sb->s_instances))
                        continue;
                if (sb->s_bdev == bdev) {
-                        if (grab_super(sb)) /* drops sb_lock */
+                        if (!grab_super(sb))
-                                return sb;
-                        else
                                goto restart;
+                        up_write(&sb->s_umount);
+                        return sb;
                }
        }
        spin_unlock(&sb_lock);
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index aec3d5c98c94..09a1a25cd145 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -20,38 +20,64 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
                         const struct attribute_group *grp)
 {
        struct attribute *const* attr;
-        int i;
+        struct bin_attribute *const* bin_attr;
-        for (i = 0, attr = grp->attrs; *attr; i++, attr++)
+        if (grp->attrs)
-                sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
+                for (attr = grp->attrs; *attr; attr++)
+                        sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
+        if (grp->bin_attrs)
+                for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++)
+                        sysfs_remove_bin_file(kobj, *bin_attr);
 }
 static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
                        const struct attribute_group *grp, int update)
 {
        struct attribute *const* attr;
+        struct bin_attribute *const* bin_attr;
        int error = 0, i;
-        for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
+        if (grp->attrs) {
-                umode_t mode = 0;
+                for (i = 0, attr = grp->attrs; *attr && !error; i++, attr++) {
+                        umode_t mode = 0;
+                        /*
+                         * In update mode, we're changing the permissions or
+                         * visibility.  Do this by first removing then
+                         * re-adding (if required) the file.
+                         */
+                        if (update)
+                                sysfs_hash_and_remove(dir_sd, NULL,
+                                                      (*attr)->name);
+                        if (grp->is_visible) {
+                                mode = grp->is_visible(kobj, *attr, i);
+                                if (!mode)
+                                        continue;
+                        }
+                        error = sysfs_add_file_mode(dir_sd, *attr,
+                                                    SYSFS_KOBJ_ATTR,
+                                                    (*attr)->mode | mode);
+                        if (unlikely(error))
+                                break;
+                }
+                if (error) {
+                        remove_files(dir_sd, kobj, grp);
+                        goto exit;
+                }
+        }
-                /* in update mode, we're changing the permissions or
+        if (grp->bin_attrs) {
-                 * visibility.  Do this by first removing then
+                for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) {
-                 * re-adding (if required) the file */
+                        if (update)
-                if (update)
+                                sysfs_remove_bin_file(kobj, *bin_attr);
-                        sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
+                        error = sysfs_create_bin_file(kobj, *bin_attr);
-                if (grp->is_visible) {
+                        if (error)
-                        mode = grp->is_visible(kobj, *attr, i);
+                                break;
-                        if (!mode)
-                                continue;
                }
-                error = sysfs_add_file_mode(dir_sd, *attr, SYSFS_KOBJ_ATTR,
+                if (error)
-                                            (*attr)->mode | mode);
+                        remove_files(dir_sd, kobj, grp);
-                if (unlikely(error))
-                        break;
        }
-        if (error)
+exit:
-                remove_files(dir_sd, kobj, grp);
        return error;
 }
@@ -67,8 +93,8 @@ static int internal_create_group(struct kobject *kobj, int update,
        /* Updates may happen before the object has been instantiated */
        if (unlikely(update && !kobj->sd))
                return -EINVAL;
-        if (!grp->attrs) {
+        if (!grp->attrs && !grp->bin_attrs) {
-                WARN(1, "sysfs: attrs not set by subsystem for group: %s/%s\n",
+                WARN(1, "sysfs: (bin_)attrs not set by subsystem for group: %s/%s\n",
                        kobj->name, grp->name ? "" : grp->name);
                return -EINVAL;
        }
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index 07d735a80a0f..e5869b50dc41 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -39,6 +39,9 @@ typedef struct xfs_timestamp {
 * There is a very similar struct icdinode in xfs_inode which matches the
 * layout of the first 96 bytes of this structure, but is kept in native
 * format instead of big endian.
+ *
+ * Note: di_flushiter is only used by v1/2 inodes - it's effectively a zeroed
+ * padding field for v3 inodes.
 */
 typedef struct xfs_dinode {
        __be16          di_magic;       /* inode magic # = XFS_DINODE_MAGIC */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index b78481f99d9d..bb262c25c8de 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -896,7 +896,6 @@ xfs_dinode_to_disk(
        to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
        to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
        memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
-        to->di_flushiter = cpu_to_be16(from->di_flushiter);
        to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
        to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
        to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
@@ -924,6 +923,9 @@ xfs_dinode_to_disk(
                to->di_lsn = cpu_to_be64(from->di_lsn);
                memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
                uuid_copy(&to->di_uuid, &from->di_uuid);
+                to->di_flushiter = 0;
+        } else {
+                to->di_flushiter = cpu_to_be16(from->di_flushiter);
        }
 }
@@ -1029,10 +1031,14 @@ xfs_dinode_calc_crc(
 /*
 * Read the disk inode attributes into the in-core inode structure.
 *
- * If we are initialising a new inode and we are not utilising the
+ * For version 5 superblocks, if we are initialising a new inode and we are not
- * XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new inode core
+ * utilising the XFS_MOUNT_IKEEP inode cluster mode, we can simple build the new
- * with a random generation number. If we are keeping inodes around, we need to
+ * inode core with a random generation number. If we are keeping inodes around,
- * read the inode cluster to get the existing generation number off disk.
+ * we need to read the inode cluster to get the existing generation number off
+ * disk. Further, if we are using version 4 superblocks (i.e. v1/v2 inode
+ * format) then log recovery is dependent on the di_flushiter field being
+ * initialised from the current on-disk value and hence we must also read the
+ * inode off disk.
 */
 int
 xfs_iread(
@@ -1054,6 +1060,7 @@ xfs_iread(
        /* shortcut IO on inode allocation if possible */
        if ((iget_flags & XFS_IGET_CREATE) &&
+            xfs_sb_version_hascrc(&mp->m_sb) &&
            !(mp->m_flags & XFS_MOUNT_IKEEP)) {
                /* initialise the on-disk inode core */
                memset(&ip->i_d, 0, sizeof(ip->i_d));
@@ -2882,12 +2889,18 @@ xfs_iflush_int(
                        __func__, ip->i_ino, ip->i_d.di_forkoff, ip);
                goto corrupt_out;
        }
        /*
-         * bump the flush iteration count, used to detect flushes which
+         * Inode item log recovery for v1/v2 inodes are dependent on the
-         * postdate a log record during recovery. This is redundant as we now
+         * di_flushiter count for correct sequencing. We bump the flush
-         * log every change and hence this can't happen. Still, it doesn't hurt.
+         * iteration count so we can detect flushes which postdate a log record
+         * during recovery. This is redundant as we now log every change and
+         * hence this can't happen but we need to still do it to ensure
+         * backwards compatibility with old kernels that predate logging all
+         * inode changes.
         */
-        ip->i_d.di_flushiter++;
+        if (ip->i_d.di_version < 3)
+                ip->i_d.di_flushiter++;
        /*
         * Copy the dirty parts of the inode into the on-disk
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 6fcc910a50b9..7681b19aa5dc 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2592,8 +2592,16 @@ xlog_recover_inode_pass2(
                goto error;
        }
-        /* Skip replay when the on disk inode is newer than the log one */
+        /*
-        if (dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+         * di_flushiter is only valid for v1/2 inodes. All changes for v3 inodes
+         * are transactional and if ordering is necessary we can determine that
+         * more accurately by the LSN field in the V3 inode core. Don't trust
+         * the inode versions we might be changing them here - use the
+         * superblock flag to determine whether we need to look at di_flushiter
+         * to skip replay when the on disk inode is newer than the log one
+         */
+        if (!xfs_sb_version_hascrc(&mp->m_sb) &&
+            dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
                /*
                 * Deal with the wrap case, DI_MAX_FLUSH is less
                 * than smaller numbers
@@ -2608,6 +2616,7 @@ xlog_recover_inode_pass2(
                        goto error;
                }
        }
        /* Take the opportunity to reset the flush iteration count */
        dicp->di_flushiter = 0;