Merge tag 'v4.5-rc7' into x86/asm, to pick up SMAP fix

Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Ingo Molnar <mingo@kernel.org> 2016-03-07 03:27:30 -0500
committer: Ingo Molnar <mingo@kernel.org> 2016-03-07 03:27:30 -0500
commit: ec87e1cf7d8399d81d8965c6d852f8057a8dd687 (patch)
tree: 472a168fa4861090edf110c8a9712a5c15ea259f /fs
parent: 869ae76147ffdf21ad24f0e599303cd58a2bb39f (diff)
parent: f6cede5b49e822ebc41a099fe41ab4989f64e2cb (diff)
59 files changed, 686 insertions, 314 deletions
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 0548c53f41d5..22fc7c802d69 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -511,8 +511,6 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
        pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino,
                 page->index, to);
        BUG_ON(to > PAGE_CACHE_SIZE);
-        kmap(page);
-        data = page_address(page);
        bsize = AFFS_SB(sb)->s_data_blksize;
        tmp = page->index << PAGE_CACHE_SHIFT;
        bidx = tmp / bsize;
@@ -524,14 +522,15 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
                        return PTR_ERR(bh);
                tmp = min(bsize - boff, to - pos);
                BUG_ON(pos + tmp > to || tmp > bsize);
+                data = kmap_atomic(page);
                memcpy(data + pos, AFFS_DATA(bh) + boff, tmp);
+                kunmap_atomic(data);
                affs_brelse(bh);
                bidx++;
                pos += tmp;
                boff = 0;
        }
        flush_dcache_page(page);
-        kunmap(page);
        return 0;
 }
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 051ea4809c14..7d914c67a9d0 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -653,7 +653,7 @@ static unsigned long randomize_stack_top(unsigned long stack_top)
        if ((current->flags & PF_RANDOMIZE) &&
                !(current->personality & ADDR_NO_RANDOMIZE)) {
-                random_variable = (unsigned long) get_random_int();
+                random_variable = get_random_long();
                random_variable &= STACK_RND_MASK;
                random_variable <<= PAGE_SHIFT;
        }
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 39b3a174a425..826b164a4b5b 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1201,7 +1201,11 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                bdev->bd_disk = disk;
                bdev->bd_queue = disk->queue;
                bdev->bd_contains = bdev;
-                bdev->bd_inode->i_flags = disk->fops->direct_access ? S_DAX : 0;
+                if (IS_ENABLED(CONFIG_BLK_DEV_DAX) && disk->fops->direct_access)
+                        bdev->bd_inode->i_flags = S_DAX;
+                else
+                        bdev->bd_inode->i_flags = 0;
                if (!partno) {
                        ret = -ENXIO;
                        bdev->bd_part = disk_get_part(disk, partno);
@@ -1693,13 +1697,24 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
        return try_to_free_buffers(page);
 }
+static int blkdev_writepages(struct address_space *mapping,
+                             struct writeback_control *wbc)
+{
+        if (dax_mapping(mapping)) {
+                struct block_device *bdev = I_BDEV(mapping->host);
+                return dax_writeback_mapping_range(mapping, bdev, wbc);
+        }
+        return generic_writepages(mapping, wbc);
+}
 static const struct address_space_operations def_blk_aops = {
        .readpage       = blkdev_readpage,
        .readpages      = blkdev_readpages,
        .writepage      = blkdev_writepage,
        .write_begin    = blkdev_write_begin,
        .write_end      = blkdev_write_end,
-        .writepages     = generic_writepages,
+        .writepages     = blkdev_writepages,
        .releasepage    = blkdev_releasepage,
        .direct_IO      = blkdev_direct_IO,
        .is_dirty_writeback = buffer_check_dirty_writeback,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 151b7c71b868..d96f5cf38a2d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7986,6 +7986,7 @@ static void btrfs_endio_direct_read(struct bio *bio)
        kfree(dip);
+        dio_bio->bi_error = bio->bi_error;
        dio_end_io(dio_bio, bio->bi_error);
        if (io_bio->end_io)
@@ -8040,6 +8041,7 @@ static void btrfs_endio_direct_write(struct bio *bio)
        kfree(dip);
+        dio_bio->bi_error = bio->bi_error;
        dio_end_io(dio_bio, bio->bi_error);
        bio_put(bio);
 }
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 7cf8509deda7..2c849b08a91b 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -310,8 +310,16 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
                set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
                err = btrfs_insert_fs_root(root->fs_info, root);
+                /*
+                 * The root might have been inserted already, as before we look
+                 * for orphan roots, log replay might have happened, which
+                 * triggers a transaction commit and qgroup accounting, which
+                 * in turn reads and inserts fs roots while doing backref
+                 * walking.
+                 */
+                if (err == -EEXIST)
+                        err = 0;
                if (err) {
-                        BUG_ON(err == -EEXIST);
                        btrfs_free_fs_root(root);
                        break;
                }
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index c22213789090..19adeb0ef82a 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1756,6 +1756,10 @@ int ceph_pool_perm_check(struct ceph_inode_info *ci, int need)
        u32 pool;
        int ret, flags;
+        /* does not support pool namespace yet */
+        if (ci->i_pool_ns_len)
+                return -EIO;
        if (ceph_test_mount_opt(ceph_inode_to_client(&ci->vfs_inode),
                                NOPOOLPERM))
                return 0;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index cdbf8cf3d52c..6fe0ad26a7df 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2753,7 +2753,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
                             void *inline_data, int inline_len,
                             struct ceph_buffer *xattr_buf,
                             struct ceph_mds_session *session,
-                             struct ceph_cap *cap, int issued)
+                             struct ceph_cap *cap, int issued,
+                             u32 pool_ns_len)
        __releases(ci->i_ceph_lock)
        __releases(mdsc->snap_rwsem)
 {
@@ -2873,6 +2874,8 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
        if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
                /* file layout may have changed */
                ci->i_layout = grant->layout;
+                ci->i_pool_ns_len = pool_ns_len;
                /* size/truncate_seq? */
                queue_trunc = ceph_fill_file_size(inode, issued,
                                        le32_to_cpu(grant->truncate_seq),
@@ -3411,6 +3414,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
        u32  inline_len = 0;
        void *snaptrace;
        size_t snaptrace_len;
+        u32 pool_ns_len = 0;
        void *p, *end;
        dout("handle_caps from mds%d\n", mds);
@@ -3463,6 +3467,21 @@ void ceph_handle_caps(struct ceph_mds_session *session,
                p += inline_len;
        }
+        if (le16_to_cpu(msg->hdr.version) >= 8) {
+                u64 flush_tid;
+                u32 caller_uid, caller_gid;
+                u32 osd_epoch_barrier;
+                /* version >= 5 */
+                ceph_decode_32_safe(&p, end, osd_epoch_barrier, bad);
+                /* version >= 6 */
+                ceph_decode_64_safe(&p, end, flush_tid, bad);
+                /* version >= 7 */
+                ceph_decode_32_safe(&p, end, caller_uid, bad);
+                ceph_decode_32_safe(&p, end, caller_gid, bad);
+                /* version >= 8 */
+                ceph_decode_32_safe(&p, end, pool_ns_len, bad);
+        }
        /* lookup ino */
        inode = ceph_find_inode(sb, vino);
        ci = ceph_inode(inode);
@@ -3518,7 +3537,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
                                  &cap, &issued);
                handle_cap_grant(mdsc, inode, h,
                                 inline_version, inline_data, inline_len,
-                                 msg->middle, session, cap, issued);
+                                 msg->middle, session, cap, issued,
+                                 pool_ns_len);
                if (realm)
                        ceph_put_snap_realm(mdsc, realm);
                goto done_unlocked;
@@ -3542,7 +3562,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
                issued |= __ceph_caps_dirty(ci);
                handle_cap_grant(mdsc, inode, h,
                                 inline_version, inline_data, inline_len,
-                                 msg->middle, session, cap, issued);
+                                 msg->middle, session, cap, issued,
+                                 pool_ns_len);
                goto done_unlocked;
        case CEPH_CAP_OP_FLUSH_ACK:
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index fb4ba2e4e2a5..5849b88bbed3 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -396,6 +396,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
        ci->i_symlink = NULL;
        memset(&ci->i_dir_layout, 0, sizeof(ci->i_dir_layout));
+        ci->i_pool_ns_len = 0;
        ci->i_fragtree = RB_ROOT;
        mutex_init(&ci->i_fragtree_mutex);
@@ -756,6 +757,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
                if (ci->i_layout.fl_pg_pool != info->layout.fl_pg_pool)
                        ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
                ci->i_layout = info->layout;
+                ci->i_pool_ns_len = iinfo->pool_ns_len;
                queue_trunc = ceph_fill_file_size(inode, issued,
                                        le32_to_cpu(info->truncate_seq),
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index e7b130a637f9..911d64d865f1 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -100,6 +100,14 @@ static int parse_reply_info_in(void **p, void *end,
        } else
                info->inline_version = CEPH_INLINE_NONE;
+        if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) {
+                ceph_decode_32_safe(p, end, info->pool_ns_len, bad);
+                ceph_decode_need(p, end, info->pool_ns_len, bad);
+                *p += info->pool_ns_len;
+        } else {
+                info->pool_ns_len = 0;
+        }
        return 0;
 bad:
        return err;
@@ -2298,6 +2306,14 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
                ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
                                  CEPH_CAP_PIN);
+        /* deny access to directories with pool_ns layouts */
+        if (req->r_inode && S_ISDIR(req->r_inode->i_mode) &&
+            ceph_inode(req->r_inode)->i_pool_ns_len)
+                return -EIO;
+        if (req->r_locked_dir &&
+            ceph_inode(req->r_locked_dir)->i_pool_ns_len)
+                return -EIO;
        /* issue */
        mutex_lock(&mdsc->mutex);
        __register_request(mdsc, req, dir);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index ccf11ef0ca87..37712ccffcc6 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -44,6 +44,7 @@ struct ceph_mds_reply_info_in {
        u64 inline_version;
        u32 inline_len;
        char *inline_data;
+        u32 pool_ns_len;
 };
 /*
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 75b7d125ce66..9c458eb52245 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -287,6 +287,7 @@ struct ceph_inode_info {
        struct ceph_dir_layout i_dir_layout;
        struct ceph_file_layout i_layout;
+        size_t i_pool_ns_len;
        char *i_symlink;
        /* for dirs */
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 7dc886c9a78f..e956cba94338 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -175,7 +175,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
         * string to the length of the original string to allow for worst case.
         */
        md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN;
-        mountdata = kzalloc(md_len + 1, GFP_KERNEL);
+        mountdata = kzalloc(md_len + sizeof("ip=") + 1, GFP_KERNEL);
        if (mountdata == NULL) {
                rc = -ENOMEM;
                goto compose_mount_options_err;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index afa09fce8151..e682b36a210f 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -714,7 +714,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
        ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL);
        if (!ses->auth_key.response) {
-                rc = ENOMEM;
+                rc = -ENOMEM;
                ses->auth_key.len = 0;
                goto setup_ntlmv2_rsp_ret;
        }
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index c48ca13673e3..2eea40353e60 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1013,7 +1013,6 @@ const struct file_operations cifs_file_strict_ops = {
        .llseek = cifs_llseek,
        .unlocked_ioctl = cifs_ioctl,
        .clone_file_range = cifs_clone_file_range,
-        .clone_file_range = cifs_clone_file_range,
        .setlease = cifs_setlease,
        .fallocate = cifs_fallocate,
 };
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 68c4547528c4..83aac8ba50b0 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -31,19 +31,15 @@
 * so that it will fit. We use hash_64 to convert the value to 31 bits, and
 * then add 1, to ensure that we don't end up with a 0 as the value.
 */
-#if BITS_PER_LONG == 64
 static inline ino_t
 cifs_uniqueid_to_ino_t(u64 fileid)
 {
+        if ((sizeof(ino_t)) < (sizeof(u64)))
+                return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1;
        return (ino_t)fileid;
 }
-#else
-static inline ino_t
-cifs_uniqueid_to_ino_t(u64 fileid)
-{
-        return (ino_t)hash_64(fileid, (sizeof(ino_t) * 8) - 1) + 1;
-}
-#endif
 extern struct file_system_type cifs_fs_type;
 extern const struct address_space_operations cifs_addr_ops;
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 90b4f9f7de66..76fcb50295a3 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1396,11 +1396,10 @@ openRetry:
 * current bigbuf.
 */
 static int
-cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+discard_remaining_data(struct TCP_Server_Info *server)
 {
        unsigned int rfclen = get_rfc1002_length(server->smallbuf);
        int remaining = rfclen + 4 - server->total_read;
-        struct cifs_readdata *rdata = mid->callback_data;
        while (remaining > 0) {
                int length;
@@ -1414,10 +1413,20 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
                remaining -= length;
        }
-        dequeue_mid(mid, rdata->result);
        return 0;
 }
+static int
+cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
+{
+        int length;
+        struct cifs_readdata *rdata = mid->callback_data;
+        length = discard_remaining_data(server);
+        dequeue_mid(mid, rdata->result);
+        return length;
+}
 int
 cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
 {
@@ -1446,6 +1455,12 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
                return length;
        server->total_read += length;
+        if (server->ops->is_status_pending &&
+            server->ops->is_status_pending(buf, server, 0)) {
+                discard_remaining_data(server);
+                return -1;
+        }
        /* Was the SMB read successful? */
        rdata->result = server->ops->map_error(buf, false);
        if (rdata->result != 0) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4fbd92d2e113..a763cd3d9e7c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2999,8 +2999,7 @@ ip_rfc1001_connect(struct TCP_Server_Info *server)
        if (ses_init_buf) {
                ses_init_buf->trailer.session_req.called_len = 32;
-                if (server->server_RFC1001_name &&
+                if (server->server_RFC1001_name[0] != 0)
-                    server->server_RFC1001_name[0] != 0)
                        rfc1002mangle(ses_init_buf->trailer.
                                      session_req.called_name,
                                      server->server_RFC1001_name,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 10f8d5cf5681..42e1f440eb1e 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1106,21 +1106,25 @@ parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp,
 {
        char *data_offset;
        struct create_context *cc;
-        unsigned int next = 0;
+        unsigned int next;
+        unsigned int remaining;
        char *name;
        data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset);
+        remaining = le32_to_cpu(rsp->CreateContextsLength);
        cc = (struct create_context *)data_offset;
-        do {
+        while (remaining >= sizeof(struct create_context)) {
-                cc = (struct create_context *)((char *)cc + next);
                name = le16_to_cpu(cc->NameOffset) + (char *)cc;
-                if (le16_to_cpu(cc->NameLength) != 4 ||
+                if (le16_to_cpu(cc->NameLength) == 4 &&
-                    strncmp(name, "RqLs", 4)) {
+                    strncmp(name, "RqLs", 4) == 0)
-                        next = le32_to_cpu(cc->Next);
+                        return server->ops->parse_lease_buf(cc, epoch);
-                        continue;
-                }
+                next = le32_to_cpu(cc->Next);
-                return server->ops->parse_lease_buf(cc, epoch);
+                if (!next)
-        } while (next != 0);
+                        break;
+                remaining -= next;
+                cc = (struct create_context *)((char *)cc + next);
+        }
        return 0;
 }
diff --git a/fs/dax.c b/fs/dax.c
index fc2e3141138b..711172450da6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -79,15 +79,14 @@ struct page *read_dax_sector(struct block_device *bdev, sector_t n)
 }
 /*
- * dax_clear_blocks() is called from within transaction context from XFS,
+ * dax_clear_sectors() is called from within transaction context from XFS,
 * and hence this means the stack from this point must follow GFP_NOFS
 * semantics for all operations.
 */
-int dax_clear_blocks(struct inode *inode, sector_t block, long _size)
+int dax_clear_sectors(struct block_device *bdev, sector_t _sector, long _size)
 {
-        struct block_device *bdev = inode->i_sb->s_bdev;
        struct blk_dax_ctl dax = {
-                .sector = block << (inode->i_blkbits - 9),
+                .sector = _sector,
                .size = _size,
        };
@@ -109,7 +108,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long _size)
        wmb_pmem();
        return 0;
 }
-EXPORT_SYMBOL_GPL(dax_clear_blocks);
+EXPORT_SYMBOL_GPL(dax_clear_sectors);
 /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
 static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first,
@@ -485,11 +484,10 @@ static int dax_writeback_one(struct block_device *bdev,
 * end]. This is required by data integrity operations to ensure file data is
 * on persistent storage prior to completion of the operation.
 */
-int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
+int dax_writeback_mapping_range(struct address_space *mapping,
-                loff_t end)
+                struct block_device *bdev, struct writeback_control *wbc)
 {
        struct inode *inode = mapping->host;
-        struct block_device *bdev = inode->i_sb->s_bdev;
        pgoff_t start_index, end_index, pmd_index;
        pgoff_t indices[PAGEVEC_SIZE];
        struct pagevec pvec;
@@ -500,8 +498,11 @@ int dax_writeback_mapping_range(struct address_space *mapping, loff_t start,
        if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
                return -EIO;
-        start_index = start >> PAGE_CACHE_SHIFT;
+        if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
-        end_index = end >> PAGE_CACHE_SHIFT;
+                return 0;
+        start_index = wbc->range_start >> PAGE_CACHE_SHIFT;
+        end_index = wbc->range_end >> PAGE_CACHE_SHIFT;
        pmd_index = DAX_PMD_INDEX(start_index);
        rcu_read_lock();
diff --git a/fs/dcache.c b/fs/dcache.c
index 92d5140de851..2398f9f94337 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -269,9 +269,6 @@ static inline int dname_external(const struct dentry *dentry)
        return dentry->d_name.name != dentry->d_iname;
 }
-/*
- * Make sure other CPUs see the inode attached before the type is set.
- */
 static inline void __d_set_inode_and_type(struct dentry *dentry,
                                          struct inode *inode,
                                          unsigned type_flags)
@@ -279,28 +276,18 @@ static inline void __d_set_inode_and_type(struct dentry *dentry,
        unsigned flags;
        dentry->d_inode = inode;
-        smp_wmb();
        flags = READ_ONCE(dentry->d_flags);
        flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
        flags |= type_flags;
        WRITE_ONCE(dentry->d_flags, flags);
 }
-/*
- * Ideally, we want to make sure that other CPUs see the flags cleared before
- * the inode is detached, but this is really a violation of RCU principles
- * since the ordering suggests we should always set inode before flags.
- *
- * We should instead replace or discard the entire dentry - but that sucks
- * performancewise on mass deletion/rename.
- */
 static inline void __d_clear_type_and_inode(struct dentry *dentry)
 {
        unsigned flags = READ_ONCE(dentry->d_flags);
        flags &= ~(DCACHE_ENTRY_TYPE | DCACHE_FALLTHRU);
        WRITE_ONCE(dentry->d_flags, flags);
-        smp_wmb();
        dentry->d_inode = NULL;
 }
@@ -370,9 +357,11 @@ static void dentry_unlink_inode(struct dentry * dentry)
        __releases(dentry->d_inode->i_lock)
 {
        struct inode *inode = dentry->d_inode;
+        raw_write_seqcount_begin(&dentry->d_seq);
        __d_clear_type_and_inode(dentry);
        hlist_del_init(&dentry->d_u.d_alias);
-        dentry_rcuwalk_invalidate(dentry);
+        raw_write_seqcount_end(&dentry->d_seq);
        spin_unlock(&dentry->d_lock);
        spin_unlock(&inode->i_lock);
        if (!inode->i_nlink)
@@ -1758,8 +1747,9 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
        spin_lock(&dentry->d_lock);
        if (inode)
                hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
+        raw_write_seqcount_begin(&dentry->d_seq);
        __d_set_inode_and_type(dentry, inode, add_flags);
-        dentry_rcuwalk_invalidate(dentry);
+        raw_write_seqcount_end(&dentry->d_seq);
        spin_unlock(&dentry->d_lock);
        fsnotify_d_instantiate(dentry, inode);
 }
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 1b2f7ffc8b84..d6a9012d42ad 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -472,8 +472,8 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
                dio->io_error = -EIO;
        if (dio->is_async && dio->rw == READ && dio->should_dirty) {
-                bio_check_pages_dirty(bio);     /* transfers ownership */
                err = bio->bi_error;
+                bio_check_pages_dirty(bio);     /* transfers ownership */
        } else {
                bio_for_each_segment_all(bvec, bio, i) {
                        struct page *page = bvec->bv_page;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 2c88d683cd91..c1400b109805 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -80,23 +80,6 @@ static int ext2_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
        return ret;
 }
-static int ext2_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-        struct inode *inode = file_inode(vma->vm_file);
-        struct ext2_inode_info *ei = EXT2_I(inode);
-        int ret;
-        sb_start_pagefault(inode->i_sb);
-        file_update_time(vma->vm_file);
-        down_read(&ei->dax_sem);
-        ret = __dax_mkwrite(vma, vmf, ext2_get_block, NULL);
-        up_read(&ei->dax_sem);
-        sb_end_pagefault(inode->i_sb);
-        return ret;
-}
 static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
                struct vm_fault *vmf)
 {
@@ -124,7 +107,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
 static const struct vm_operations_struct ext2_dax_vm_ops = {
        .fault          = ext2_dax_fault,
        .pmd_fault      = ext2_dax_pmd_fault,
-        .page_mkwrite   = ext2_dax_mkwrite,
+        .page_mkwrite   = ext2_dax_fault,
        .pfn_mkwrite    = ext2_dax_pfn_mkwrite,
 };
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 338eefda70c6..6bd58e6ff038 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -737,8 +737,10 @@ static int ext2_get_blocks(struct inode *inode,
                 * so that it's not found by another thread before it's
                 * initialised
                 */
-                err = dax_clear_blocks(inode, le32_to_cpu(chain[depth-1].key),
+                err = dax_clear_sectors(inode->i_sb->s_bdev,
-                                                1 << inode->i_blkbits);
+                                le32_to_cpu(chain[depth-1].key) <<
+                                (inode->i_blkbits - 9),
+                                1 << inode->i_blkbits);
                if (err) {
                        mutex_unlock(&ei->truncate_mutex);
                        goto cleanup;
@@ -874,6 +876,14 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
 static int
 ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
+#ifdef CONFIG_FS_DAX
+        if (dax_mapping(mapping)) {
+                return dax_writeback_mapping_range(mapping,
+                                                   mapping->host->i_sb->s_bdev,
+                                                   wbc);
+        }
+#endif
        return mpage_writepages(mapping, wbc, ext2_get_block);
 }
@@ -1296,7 +1306,7 @@ void ext2_set_inode_flags(struct inode *inode)
                inode->i_flags |= S_NOATIME;
        if (flags & EXT2_DIRSYNC_FL)
                inode->i_flags |= S_DIRSYNC;
-        if (test_opt(inode->i_sb, DAX))
+        if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
                inode->i_flags |= S_DAX;
 }
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index ec0668a60678..fe1f50fe764f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -191,7 +191,6 @@ static int ext4_init_block_bitmap(struct super_block *sb,
        /* If checksum is bad mark all blocks used to prevent allocation
         * essentially implementing a per-group read-only flag. */
        if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-                ext4_error(sb, "Checksum bad for group %u", block_group);
                grp = ext4_get_group_info(sb, block_group);
                if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
                        percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -442,14 +441,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
        }
        ext4_lock_group(sb, block_group);
        if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
                err = ext4_init_block_bitmap(sb, bh, block_group, desc);
                set_bitmap_uptodate(bh);
                set_buffer_uptodate(bh);
                ext4_unlock_group(sb, block_group);
                unlock_buffer(bh);
-                if (err)
+                if (err) {
+                        ext4_error(sb, "Failed to init block bitmap for group "
+                                   "%u: %d", block_group, err);
                        goto out;
+                }
                goto verify;
        }
        ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index c8021208a7eb..38f7562489bb 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -467,3 +467,59 @@ uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size)
                return size;
        return 0;
 }
+/*
+ * Validate dentries for encrypted directories to make sure we aren't
+ * potentially caching stale data after a key has been added or
+ * removed.
+ */
+static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+        struct inode *dir = d_inode(dentry->d_parent);
+        struct ext4_crypt_info *ci = EXT4_I(dir)->i_crypt_info;
+        int dir_has_key, cached_with_key;
+        if (!ext4_encrypted_inode(dir))
+                return 0;
+        if (ci && ci->ci_keyring_key &&
+            (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+                                          (1 << KEY_FLAG_REVOKED) |
+                                          (1 << KEY_FLAG_DEAD))))
+                ci = NULL;
+        /* this should eventually be an flag in d_flags */
+        cached_with_key = dentry->d_fsdata != NULL;
+        dir_has_key = (ci != NULL);
+        /*
+         * If the dentry was cached without the key, and it is a
+         * negative dentry, it might be a valid name.  We can't check
+         * if the key has since been made available due to locking
+         * reasons, so we fail the validation so ext4_lookup() can do
+         * this check.
+         *
+         * We also fail the validation if the dentry was created with
+         * the key present, but we no longer have the key, or vice versa.
+         */
+        if ((!cached_with_key && d_is_negative(dentry)) ||
+            (!cached_with_key && dir_has_key) ||
+            (cached_with_key && !dir_has_key)) {
+#if 0                           /* Revalidation debug */
+                char buf[80];
+                char *cp = simple_dname(dentry, buf, sizeof(buf));
+                if (IS_ERR(cp))
+                        cp = (char *) "???";
+                pr_err("revalidate: %s %p %d %d %d\n", cp, dentry->d_fsdata,
+                       cached_with_key, d_is_negative(dentry),
+                       dir_has_key);
+#endif
+                return 0;
+        }
+        return 1;
+}
+const struct dentry_operations ext4_encrypted_d_ops = {
+        .d_revalidate = ext4_d_revalidate,
+};
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 1d1bca74f844..33f5e2a50cf8 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -111,6 +111,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
        int dir_has_error = 0;
        struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
+        if (ext4_encrypted_inode(inode)) {
+                err = ext4_get_encryption_info(inode);
+                if (err && err != -ENOKEY)
+                        return err;
+        }
        if (is_dx_dir(inode)) {
                err = ext4_dx_readdir(file, ctx);
                if (err != ERR_BAD_DX_DIR) {
@@ -157,8 +163,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
                                        index, 1);
                        file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
                        bh = ext4_bread(NULL, inode, map.m_lblk, 0);
-                        if (IS_ERR(bh))
+                        if (IS_ERR(bh)) {
-                                return PTR_ERR(bh);
+                                err = PTR_ERR(bh);
+                                bh = NULL;
+                                goto errout;
+                        }
                }
                if (!bh) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0662b285dc8a..157b458a69d4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2302,6 +2302,7 @@ struct page *ext4_encrypt(struct inode *inode,
 int ext4_decrypt(struct page *page);
 int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
                           ext4_fsblk_t pblk, ext4_lblk_t len);
+extern const struct dentry_operations ext4_encrypted_d_ops;
 #ifdef CONFIG_EXT4_FS_ENCRYPTION
 int ext4_init_crypto(void);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0ffabaf90aa5..3753ceb0b0dd 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3928,7 +3928,7 @@ static int
 convert_initialized_extent(handle_t *handle, struct inode *inode,
                           struct ext4_map_blocks *map,
                           struct ext4_ext_path **ppath, int flags,
-                           unsigned int allocated, ext4_fsblk_t newblock)
+                           unsigned int allocated)
 {
        struct ext4_ext_path *path = *ppath;
        struct ext4_extent *ex;
@@ -4347,7 +4347,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
                            (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
                                allocated = convert_initialized_extent(
                                                handle, inode, map, &path,
-                                                flags, allocated, newblock);
+                                                flags, allocated);
                                goto out2;
                        } else if (!ext4_ext_is_unwritten(ex))
                                goto out;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 1126436dada1..4cd318f31cbe 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -262,23 +262,8 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
        return result;
 }
-static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-        int err;
-        struct inode *inode = file_inode(vma->vm_file);
-        sb_start_pagefault(inode->i_sb);
-        file_update_time(vma->vm_file);
-        down_read(&EXT4_I(inode)->i_mmap_sem);
-        err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
-        up_read(&EXT4_I(inode)->i_mmap_sem);
-        sb_end_pagefault(inode->i_sb);
-        return err;
-}
 /*
- * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault()
 * handler we check for races agaist truncate. Note that since we cycle through
 * i_mmap_sem, we are sure that also any hole punching that began before we
 * were called is finished by now and so if it included part of the file we
@@ -311,7 +296,7 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
 static const struct vm_operations_struct ext4_dax_vm_ops = {
        .fault          = ext4_dax_fault,
        .pmd_fault      = ext4_dax_pmd_fault,
-        .page_mkwrite   = ext4_dax_mkwrite,
+        .page_mkwrite   = ext4_dax_fault,
        .pfn_mkwrite    = ext4_dax_pfn_mkwrite,
 };
 #else
@@ -350,6 +335,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
        struct super_block *sb = inode->i_sb;
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        struct vfsmount *mnt = filp->f_path.mnt;
+        struct inode *dir = filp->f_path.dentry->d_parent->d_inode;
        struct path path;
        char buf[64], *cp;
        int ret;
@@ -393,6 +379,14 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
                if (ext4_encryption_info(inode) == NULL)
                        return -ENOKEY;
        }
+        if (ext4_encrypted_inode(dir) &&
+            !ext4_is_child_context_consistent_with_parent(dir, inode)) {
+                ext4_warning(inode->i_sb,
+                             "Inconsistent encryption contexts: %lu/%lu\n",
+                             (unsigned long) dir->i_ino,
+                             (unsigned long) inode->i_ino);
+                return -EPERM;
+        }
        /*
         * Set up the jbd2_inode if we are opening the inode for
         * writing and the journal is present
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3fcfd50a2e8a..acc0ad56bf2f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -76,7 +76,6 @@ static int ext4_init_inode_bitmap(struct super_block *sb,
        /* If checksum is bad mark all blocks and inodes use to prevent
         * allocation, essentially implementing a per-group read-only flag. */
        if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
-                ext4_error(sb, "Checksum bad for group %u", block_group);
                grp = ext4_get_group_info(sb, block_group);
                if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
                        percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -191,8 +190,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
                set_buffer_verified(bh);
                ext4_unlock_group(sb, block_group);
                unlock_buffer(bh);
-                if (err)
+                if (err) {
+                        ext4_error(sb, "Failed to init inode bitmap for group "
+                                   "%u: %d", block_group, err);
                        goto out;
+                }
                return bh;
        }
        ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 83bc8bfb3bea..aee960b1af34 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -686,6 +686,34 @@ out_sem:
        return retval;
 }
+/*
+ * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages
+ * we have to be careful as someone else may be manipulating b_state as well.
+ */
+static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags)
+{
+        unsigned long old_state;
+        unsigned long new_state;
+        flags &= EXT4_MAP_FLAGS;
+        /* Dummy buffer_head? Set non-atomically. */
+        if (!bh->b_page) {
+                bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags;
+                return;
+        }
+        /*
+         * Someone else may be modifying b_state. Be careful! This is ugly but
+         * once we get rid of using bh as a container for mapping information
+         * to pass to / from get_block functions, this can go away.
+         */
+        do {
+                old_state = READ_ONCE(bh->b_state);
+                new_state = (old_state & ~EXT4_MAP_FLAGS) | flags;
+        } while (unlikely(
+                 cmpxchg(&bh->b_state, old_state, new_state) != old_state));
+}
 /* Maximum number of blocks we map for direct IO at once. */
 #define DIO_MAX_BLOCKS 4096
@@ -722,7 +750,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
                ext4_io_end_t *io_end = ext4_inode_aio(inode);
                map_bh(bh, inode->i_sb, map.m_pblk);
-                bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+                ext4_update_bh_state(bh, map.m_flags);
                if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
                        set_buffer_defer_completion(bh);
                bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -1685,7 +1713,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
                return ret;
        map_bh(bh, inode->i_sb, map.m_pblk);
-        bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+        ext4_update_bh_state(bh, map.m_flags);
        if (buffer_unwritten(bh)) {
                /* A delayed write to unwritten bh should be marked
@@ -2450,6 +2478,10 @@ static int ext4_writepages(struct address_space *mapping,
        trace_ext4_writepages(inode, wbc);
+        if (dax_mapping(mapping))
+                return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
+                                                   wbc);
        /*
         * No pages to write? This is mainly a kludge to avoid starting
         * a transaction for special inodes like journal inode on last iput()
@@ -3253,29 +3285,29 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
         * case, we allocate an io_end structure to hook to the iocb.
         */
        iocb->private = NULL;
-        ext4_inode_aio_set(inode, NULL);
-        if (!is_sync_kiocb(iocb)) {
-                io_end = ext4_init_io_end(inode, GFP_NOFS);
-                if (!io_end) {
-                        ret = -ENOMEM;
-                        goto retake_lock;
-                }
-                /*
-                 * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
-                 */
-                iocb->private = ext4_get_io_end(io_end);
-                /*
-                 * we save the io structure for current async direct
-                 * IO, so that later ext4_map_blocks() could flag the
-                 * io structure whether there is a unwritten extents
-                 * needs to be converted when IO is completed.
-                 */
-                ext4_inode_aio_set(inode, io_end);
-        }
        if (overwrite) {
                get_block_func = ext4_get_block_overwrite;
        } else {
+                ext4_inode_aio_set(inode, NULL);
+                if (!is_sync_kiocb(iocb)) {
+                        io_end = ext4_init_io_end(inode, GFP_NOFS);
+                        if (!io_end) {
+                                ret = -ENOMEM;
+                                goto retake_lock;
+                        }
+                        /*
+                         * Grab reference for DIO. Will be dropped in
+                         * ext4_end_io_dio()
+                         */
+                        iocb->private = ext4_get_io_end(io_end);
+                        /*
+                         * we save the io structure for current async direct
+                         * IO, so that later ext4_map_blocks() could flag the
+                         * io structure whether there is a unwritten extents
+                         * needs to be converted when IO is completed.
+                         */
+                        ext4_inode_aio_set(inode, io_end);
+                }
                get_block_func = ext4_get_block_write;
                dio_flags = DIO_LOCKING;
        }
@@ -4127,7 +4159,7 @@ void ext4_set_inode_flags(struct inode *inode)
                new_fl |= S_NOATIME;
        if (flags & EXT4_DIRSYNC_FL)
                new_fl |= S_DIRSYNC;
-        if (test_opt(inode->i_sb, DAX))
+        if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
                new_fl |= S_DAX;
        inode_set_flags(inode, new_fl,
                        S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0f6c36922c24..eae5917c534e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -208,7 +208,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
 {
        struct ext4_inode_info *ei = EXT4_I(inode);
        handle_t *handle = NULL;
-        int err = EPERM, migrate = 0;
+        int err = -EPERM, migrate = 0;
        struct ext4_iloc iloc;
        unsigned int oldflags, mask, i;
        unsigned int jflag;
@@ -583,6 +583,11 @@ group_extend_out:
                                 "Online defrag not supported with bigalloc");
                        err = -EOPNOTSUPP;
                        goto mext_out;
+                } else if (IS_DAX(inode)) {
+                        ext4_msg(sb, KERN_ERR,
+                                 "Online defrag not supported with DAX");
+                        err = -EOPNOTSUPP;
+                        goto mext_out;
                }
                err = mnt_want_write_file(filp);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 61eaf74dca37..4424b7bf8ac6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2285,7 +2285,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
        if (group == 0)
                seq_puts(seq, "#group: free  frags first ["
                              " 2^0   2^1   2^2   2^3   2^4   2^5   2^6  "
-                              " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]");
+                              " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]\n");
        i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
                sizeof(struct ext4_group_info);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index fb6f11709ae6..e032a0423e35 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -265,11 +265,12 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
        ext4_lblk_t orig_blk_offset, donor_blk_offset;
        unsigned long blocksize = orig_inode->i_sb->s_blocksize;
        unsigned int tmp_data_size, data_size, replaced_size;
-        int err2, jblocks, retries = 0;
+        int i, err2, jblocks, retries = 0;
        int replaced_count = 0;
        int from = data_offset_in_page << orig_inode->i_blkbits;
        int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
        struct super_block *sb = orig_inode->i_sb;
+        struct buffer_head *bh = NULL;
        /*
         * It needs twice the amount of ordinary journal buffers because
@@ -380,8 +381,16 @@ data_copy:
        }
        /* Perform all necessary steps similar write_begin()/write_end()
         * but keeping in mind that i_size will not change */
-        *err = __block_write_begin(pagep[0], from, replaced_size,
+        if (!page_has_buffers(pagep[0]))
-                                   ext4_get_block);
+                create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0);
+        bh = page_buffers(pagep[0]);
+        for (i = 0; i < data_offset_in_page; i++)
+                bh = bh->b_this_page;
+        for (i = 0; i < block_len_in_page; i++) {
+                *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0);
+                if (*err < 0)
+                        break;
+        }
        if (!*err)
                *err = block_commit_write(pagep[0], from, from + replaced_size);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 06574dd77614..48e4b8907826 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1558,6 +1558,24 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
        struct ext4_dir_entry_2 *de;
        struct buffer_head *bh;
+       if (ext4_encrypted_inode(dir)) {
+               int res = ext4_get_encryption_info(dir);
+                /*
+                 * This should be a properly defined flag for
+                 * dentry->d_flags when we uplift this to the VFS.
+                 * d_fsdata is set to (void *) 1 if if the dentry is
+                 * created while the directory was encrypted and we
+                 * don't have access to the key.
+                 */
+               dentry->d_fsdata = NULL;
+               if (ext4_encryption_info(dir))
+                       dentry->d_fsdata = (void *) 1;
+               d_set_d_op(dentry, &ext4_encrypted_d_ops);
+               if (res && res != -ENOKEY)
+                       return ERR_PTR(res);
+       }
        if (dentry->d_name.len > EXT4_NAME_LEN)
                return ERR_PTR(-ENAMETOOLONG);
@@ -1585,11 +1603,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
                        return ERR_PTR(-EFSCORRUPTED);
                }
                if (!IS_ERR(inode) && ext4_encrypted_inode(dir) &&
-                    (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+                    (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
-                     S_ISLNK(inode->i_mode)) &&
                    !ext4_is_child_context_consistent_with_parent(dir,
                                                                  inode)) {
+                        int nokey = ext4_encrypted_inode(inode) &&
+                                !ext4_encryption_info(inode);
                        iput(inode);
+                        if (nokey)
+                                return ERR_PTR(-ENOKEY);
                        ext4_warning(inode->i_sb,
                                     "Inconsistent encryption contexts: %lu/%lu\n",
                                     (unsigned long) dir->i_ino,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ad62d7acc315..34038e3598d5 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -198,7 +198,7 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
        if (flex_gd == NULL)
                goto out3;
-        if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data))
+        if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
                goto out2;
        flex_gd->count = flexbg_size;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6915c950e6e8..5c46ed9f3e14 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -223,6 +223,9 @@ static void wb_wait_for_completion(struct backing_dev_info *bdi,
 #define WB_FRN_HIST_MAX_SLOTS   (WB_FRN_HIST_THR_SLOTS / 2 + 1)
                                        /* one round can affect upto 5 slots */
+static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
+static struct workqueue_struct *isw_wq;
 void __inode_attach_wb(struct inode *inode, struct page *page)
 {
        struct backing_dev_info *bdi = inode_to_bdi(inode);
@@ -424,6 +427,8 @@ skip_switch:
        iput(inode);
        kfree(isw);
+        atomic_dec(&isw_nr_in_flight);
 }
 static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
@@ -433,7 +438,7 @@ static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
        /* needs to grab bh-unsafe locks, bounce to work item */
        INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
-        schedule_work(&isw->work);
+        queue_work(isw_wq, &isw->work);
 }
 /**
@@ -469,7 +474,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
        /* while holding I_WB_SWITCH, no one else can update the association */
        spin_lock(&inode->i_lock);
-        if (inode->i_state & (I_WB_SWITCH | I_FREEING) ||
+        if (!(inode->i_sb->s_flags & MS_ACTIVE) ||
+            inode->i_state & (I_WB_SWITCH | I_FREEING) ||
            inode_to_wb(inode) == isw->new_wb) {
                spin_unlock(&inode->i_lock);
                goto out_free;
@@ -480,6 +486,8 @@ static void inode_switch_wbs(struct inode *inode, int new_wb_id)
        ihold(inode);
        isw->inode = inode;
+        atomic_inc(&isw_nr_in_flight);
        /*
         * In addition to synchronizing among switchers, I_WB_SWITCH tells
         * the RCU protected stat update paths to grab the mapping's
@@ -840,6 +848,33 @@ restart:
                wb_put(last_wb);
 }
+/**
+ * cgroup_writeback_umount - flush inode wb switches for umount
+ *
+ * This function is called when a super_block is about to be destroyed and
+ * flushes in-flight inode wb switches.  An inode wb switch goes through
+ * RCU and then workqueue, so the two need to be flushed in order to ensure
+ * that all previously scheduled switches are finished.  As wb switches are
+ * rare occurrences and synchronize_rcu() can take a while, perform
+ * flushing iff wb switches are in flight.
+ */
+void cgroup_writeback_umount(void)
+{
+        if (atomic_read(&isw_nr_in_flight)) {
+                synchronize_rcu();
+                flush_workqueue(isw_wq);
+        }
+}
+static int __init cgroup_writeback_init(void)
+{
+        isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
+        if (!isw_wq)
+                return -ENOMEM;
+        return 0;
+}
+fs_initcall(cgroup_writeback_init);
 #else   /* CONFIG_CGROUP_WRITEBACK */
 static struct bdi_writeback *
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 506765afa1a3..bb8d67e2740a 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -376,12 +376,11 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry)
        struct inode *inode = d_inode(dentry);
        dnode_secno dno;
        int r;
-        int rep = 0;
        int err;
        hpfs_lock(dir->i_sb);
        hpfs_adjust_length(name, &len);
-again:
        err = -ENOENT;
        de = map_dirent(dir, hpfs_i(dir)->i_dno, name, len, &dno, &qbh);
        if (!de)
@@ -401,33 +400,9 @@ again:
                hpfs_error(dir->i_sb, "there was error when removing dirent");
                err = -EFSERROR;
                break;
-        case 2:         /* no space for deleting, try to truncate file */
+        case 2:         /* no space for deleting */
                err = -ENOSPC;
-                if (rep++)
+                break;
-                        break;
-                dentry_unhash(dentry);
-                if (!d_unhashed(dentry)) {
-                        hpfs_unlock(dir->i_sb);
-                        return -ENOSPC;
-                }
-                if (generic_permission(inode, MAY_WRITE) ||
-                    !S_ISREG(inode->i_mode) ||
-                    get_write_access(inode)) {
-                        d_rehash(dentry);
-                } else {
-                        struct iattr newattrs;
-                        /*pr_info("truncating file before delete.\n");*/
-                        newattrs.ia_size = 0;
-                        newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
-                        err = notify_change(dentry, &newattrs, NULL);
-                        put_write_access(inode);
-                        if (!err)
-                                goto again;
-                }
-                hpfs_unlock(dir->i_sb);
-                return -ENOSPC;
        default:
                drop_nlink(inode);
                err = 0;
diff --git a/fs/inode.c b/fs/inode.c
index 9f62db3bcc3e..69b8b526c194 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -154,6 +154,12 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        inode->i_rdev = 0;
        inode->dirtied_when = 0;
+#ifdef CONFIG_CGROUP_WRITEBACK
+        inode->i_wb_frn_winner = 0;
+        inode->i_wb_frn_avg_time = 0;
+        inode->i_wb_frn_history = 0;
+#endif
        if (security_inode_alloc(inode))
                goto out;
        spin_lock_init(&inode->i_lock);
diff --git a/fs/jffs2/README.Locking b/fs/jffs2/README.Locking
index 3ea36554107f..8918ac905a3b 100644
--- a/fs/jffs2/README.Locking
+++ b/fs/jffs2/README.Locking
@@ -2,10 +2,6 @@
        JFFS2 LOCKING DOCUMENTATION
        ---------------------------
-At least theoretically, JFFS2 does not require the Big Kernel Lock
-(BKL), which was always helpfully obtained for it by Linux 2.4 VFS
-code. It has its own locking, as described below.
 This document attempts to describe the existing locking rules for
 JFFS2. It is not expected to remain perfectly up to date, but ought to
 be fairly close.
@@ -69,6 +65,7 @@ Ordering constraints:
           any f->sem held.
        2. Never attempt to lock two file mutexes in one thread.
           No ordering rules have been made for doing so.
+        3. Never lock a page cache page with f->sem held.
        erase_completion_lock spinlock
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index 0ae91ad6df2d..b288c8ae1236 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -50,7 +50,8 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c)
 static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
-                                    struct jffs2_inode_cache *ic)
+                                    struct jffs2_inode_cache *ic,
+                                    int *dir_hardlinks)
 {
        struct jffs2_full_dirent *fd;
@@ -69,19 +70,21 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
                        dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n",
                                  fd->name, fd->ino, ic->ino);
                        jffs2_mark_node_obsolete(c, fd->raw);
+                        /* Clear the ic/raw union so it doesn't cause problems later. */
+                        fd->ic = NULL;
                        continue;
                }
+                /* From this point, fd->raw is no longer used so we can set fd->ic */
+                fd->ic = child_ic;
+                child_ic->pino_nlink++;
+                /* If we appear (at this stage) to have hard-linked directories,
+                 * set a flag to trigger a scan later */
                if (fd->type == DT_DIR) {
-                        if (child_ic->pino_nlink) {
+                        child_ic->flags |= INO_FLAGS_IS_DIR;
-                                JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n",
+                        if (child_ic->pino_nlink > 1)
-                                            fd->name, fd->ino, ic->ino);
+                                *dir_hardlinks = 1;
-                                /* TODO: What do we do about it? */
+                }
-                        } else {
-                                child_ic->pino_nlink = ic->ino;
-                        }
-                } else
-                        child_ic->pino_nlink++;
                dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino);
                /* Can't free scan_dents so far. We might need them in pass 2 */
@@ -95,8 +98,7 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
 */
 static int jffs2_build_filesystem(struct jffs2_sb_info *c)
 {
-        int ret;
+        int ret, i, dir_hardlinks = 0;
-        int i;
        struct jffs2_inode_cache *ic;
        struct jffs2_full_dirent *fd;
        struct jffs2_full_dirent *dead_fds = NULL;
@@ -120,7 +122,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
        /* Now scan the directory tree, increasing nlink according to every dirent found. */
        for_each_inode(i, c, ic) {
                if (ic->scan_dents) {
-                        jffs2_build_inode_pass1(c, ic);
+                        jffs2_build_inode_pass1(c, ic, &dir_hardlinks);
                        cond_resched();
                }
        }
@@ -156,6 +158,20 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
        }
        dbg_fsbuild("pass 2a complete\n");
+        if (dir_hardlinks) {
+                /* If we detected directory hardlinks earlier, *hopefully*
+                 * they are gone now because some of the links were from
+                 * dead directories which still had some old dirents lying
+                 * around and not yet garbage-collected, but which have
+                 * been discarded above. So clear the pino_nlink field
+                 * in each directory, so that the final scan below can
+                 * print appropriate warnings. */
+                for_each_inode(i, c, ic) {
+                        if (ic->flags & INO_FLAGS_IS_DIR)
+                                ic->pino_nlink = 0;
+                }
+        }
        dbg_fsbuild("freeing temporary data structures\n");
        /* Finally, we can scan again and free the dirent structs */
@@ -163,6 +179,33 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
                while(ic->scan_dents) {
                        fd = ic->scan_dents;
                        ic->scan_dents = fd->next;
+                        /* We do use the pino_nlink field to count nlink of
+                         * directories during fs build, so set it to the
+                         * parent ino# now. Now that there's hopefully only
+                         * one. */
+                        if (fd->type == DT_DIR) {
+                                if (!fd->ic) {
+                                        /* We'll have complained about it and marked the coresponding
+                                           raw node obsolete already. Just skip it. */
+                                        continue;
+                                }
+                                /* We *have* to have set this in jffs2_build_inode_pass1() */
+                                BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR));
+                                /* We clear ic->pino_nlink ∀ directories' ic *only* if dir_hardlinks
+                                 * is set. Otherwise, we know this should never trigger anyway, so
+                                 * we don't do the check. And ic->pino_nlink still contains the nlink
+                                 * value (which is 1). */
+                                if (dir_hardlinks && fd->ic->pino_nlink) {
+                                        JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n",
+                                                    fd->name, fd->ino, ic->ino, fd->ic->pino_nlink);
+                                        /* Should we unlink it from its previous parent? */
+                                }
+                                /* For directories, ic->pino_nlink holds that parent inode # */
+                                fd->ic->pino_nlink = ic->ino;
+                        }
                        jffs2_free_full_dirent(fd);
                }
                ic->scan_dents = NULL;
@@ -241,11 +284,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c,
                        /* Reduce nlink of the child. If it's now zero, stick it on the
                           dead_fds list to be cleaned up later. Else just free the fd */
+                        child_ic->pino_nlink--;
-                        if (fd->type == DT_DIR)
-                                child_ic->pino_nlink = 0;
-                        else
-                                child_ic->pino_nlink--;
                        if (!child_ic->pino_nlink) {
                                dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n",
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index c5ac5944bc1b..cad86bac3453 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -137,39 +137,33 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
        struct page *pg;
        struct inode *inode = mapping->host;
        struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
-        struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
-        struct jffs2_raw_inode ri;
-        uint32_t alloc_len = 0;
        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
        uint32_t pageofs = index << PAGE_CACHE_SHIFT;
        int ret = 0;
-        jffs2_dbg(1, "%s()\n", __func__);
-        if (pageofs > inode->i_size) {
-                ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
-                                          ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
-                if (ret)
-                        return ret;
-        }
-        mutex_lock(&f->sem);
        pg = grab_cache_page_write_begin(mapping, index, flags);
-        if (!pg) {
+        if (!pg)
-                if (alloc_len)
-                        jffs2_complete_reservation(c);
-                mutex_unlock(&f->sem);
                return -ENOMEM;
-        }
        *pagep = pg;
-        if (alloc_len) {
+        jffs2_dbg(1, "%s()\n", __func__);
+        if (pageofs > inode->i_size) {
                /* Make new hole frag from old EOF to new page */
+                struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+                struct jffs2_raw_inode ri;
                struct jffs2_full_dnode *fn;
+                uint32_t alloc_len;
                jffs2_dbg(1, "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
                          (unsigned int)inode->i_size, pageofs);
+                ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
+                                          ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
+                if (ret)
+                        goto out_page;
+                mutex_lock(&f->sem);
                memset(&ri, 0, sizeof(ri));
                ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -196,6 +190,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
                if (IS_ERR(fn)) {
                        ret = PTR_ERR(fn);
                        jffs2_complete_reservation(c);
+                        mutex_unlock(&f->sem);
                        goto out_page;
                }
                ret = jffs2_add_full_dnode_to_inode(c, f, fn);
@@ -210,10 +205,12 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
                        jffs2_mark_node_obsolete(c, fn->raw);
                        jffs2_free_full_dnode(fn);
                        jffs2_complete_reservation(c);
+                        mutex_unlock(&f->sem);
                        goto out_page;
                }
                jffs2_complete_reservation(c);
                inode->i_size = pageofs;
+                mutex_unlock(&f->sem);
        }
        /*
@@ -222,18 +219,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
         * case of a short-copy.
         */
        if (!PageUptodate(pg)) {
+                mutex_lock(&f->sem);
                ret = jffs2_do_readpage_nolock(inode, pg);
+                mutex_unlock(&f->sem);
                if (ret)
                        goto out_page;
        }
-        mutex_unlock(&f->sem);
        jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
        return ret;
 out_page:
        unlock_page(pg);
        page_cache_release(pg);
-        mutex_unlock(&f->sem);
        return ret;
 }
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 5a2dec2b064c..95d5880a63ee 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -1296,14 +1296,17 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
                BUG_ON(start > orig_start);
        }
-        /* First, use readpage() to read the appropriate page into the page cache */
+        /* The rules state that we must obtain the page lock *before* f->sem, so
-        /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
+         * drop f->sem temporarily. Since we also hold c->alloc_sem, nothing's
-         *    triggered garbage collection in the first place?
+         * actually going to *change* so we're safe; we only allow reading.
-         * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
+         *
-         *    page OK. We'll actually write it out again in commit_write, which is a little
+         * It is important to note that jffs2_write_begin() will ensure that its
-         *    suboptimal, but at least we're correct.
+         * page is marked Uptodate before allocating space. That means that if we
-         */
+         * end up here trying to GC the *same* page that jffs2_write_begin() is
+         * trying to write out, read_cache_page() will not deadlock. */
+        mutex_unlock(&f->sem);
        pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
+        mutex_lock(&f->sem);
        if (IS_ERR(pg_ptr)) {
                pr_warn("read_cache_page() returned error: %ld\n",
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index fa35ff79ab35..0637271f3770 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -194,6 +194,7 @@ struct jffs2_inode_cache {
 #define INO_STATE_CLEARING      6       /* In clear_inode() */
 #define INO_FLAGS_XATTR_CHECKED 0x01    /* has no duplicate xattr_ref */
+#define INO_FLAGS_IS_DIR        0x02    /* is a directory */
 #define RAWNODE_CLASS_INODE_CACHE       0
 #define RAWNODE_CLASS_XATTR_DATUM       1
@@ -249,7 +250,10 @@ struct jffs2_readinode_info
 struct jffs2_full_dirent
 {
-        struct jffs2_raw_node_ref *raw;
+        union {
+                struct jffs2_raw_node_ref *raw;
+                struct jffs2_inode_cache *ic; /* Just during part of build */
+        };
        struct jffs2_full_dirent *next;
        uint32_t version;
        uint32_t ino; /* == zero for unlink */
diff --git a/fs/namei.c b/fs/namei.c
index f624d132e01e..9c590e0f66e9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1712,6 +1712,11 @@ static inline int should_follow_link(struct nameidata *nd, struct path *link,
                return 0;
        if (!follow)
                return 0;
+        /* make sure that d_is_symlink above matches inode */
+        if (nd->flags & LOOKUP_RCU) {
+                if (read_seqcount_retry(&link->dentry->d_seq, seq))
+                        return -ECHILD;
+        }
        return pick_link(nd, link, inode, seq);
 }
@@ -1743,11 +1748,11 @@ static int walk_component(struct nameidata *nd, int flags)
                if (err < 0)
                        return err;
-                inode = d_backing_inode(path.dentry);
                seq = 0;        /* we are already out of RCU mode */
                err = -ENOENT;
                if (d_is_negative(path.dentry))
                        goto out_path_put;
+                inode = d_backing_inode(path.dentry);
        }
        if (flags & WALK_PUT)
@@ -3192,12 +3197,12 @@ retry_lookup:
                return error;
        BUG_ON(nd->flags & LOOKUP_RCU);
-        inode = d_backing_inode(path.dentry);
        seq = 0;        /* out of RCU mode, so the value doesn't matter */
        if (unlikely(d_is_negative(path.dentry))) {
                path_to_nameidata(&path, nd);
                return -ENOENT;
        }
+        inode = d_backing_inode(path.dentry);
 finish_lookup:
        if (nd->depth)
                put_link(nd);
@@ -3206,11 +3211,6 @@ finish_lookup:
        if (unlikely(error))
                return error;
-        if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) {
-                path_to_nameidata(&path, nd);
-                return -ELOOP;
-        }
        if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) {
                path_to_nameidata(&path, nd);
        } else {
@@ -3229,6 +3229,10 @@ finish_open:
                return error;
        }
        audit_inode(nd->name, nd->path.dentry, 0);
+        if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) {
+                error = -ELOOP;
+                goto out;
+        }
        error = -EISDIR;
        if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
                goto out;
@@ -3273,6 +3277,10 @@ opened:
                        goto exit_fput;
        }
 out:
+        if (unlikely(error > 0)) {
+                WARN_ON(1);
+                error = -EINVAL;
+        }
        if (got_write)
                mnt_drop_write(nd->path.mnt);
        path_put(&save_parent);
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index c59a59c37f3d..35ab51c04814 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -476,6 +476,7 @@ static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
                for (i = 0; i < nr_pages; i++)
                        put_page(arg->layoutupdate_pages[i]);
+                vfree(arg->start_p);
                kfree(arg->layoutupdate_pages);
        } else {
                put_page(arg->layoutupdate_page);
@@ -559,10 +560,15 @@ retry:
        if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
                void *p = start_p, *end = p + arg->layoutupdate_len;
+                struct page *page = NULL;
                int i = 0;
-                for ( ; p < end; p += PAGE_SIZE)
+                arg->start_p = start_p;
-                        arg->layoutupdate_pages[i++] = vmalloc_to_page(p);
+                for ( ; p < end; p += PAGE_SIZE) {
+                        page = vmalloc_to_page(p);
+                        arg->layoutupdate_pages[i++] = page;
+                        get_page(page);
+                }
        }
        dprintk("%s found %zu ranges\n", __func__, count);
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index bd25dc7077f7..dff83460e5a6 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -16,29 +16,8 @@
 #define NFSDBG_FACILITY NFSDBG_PROC
-static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
-                                fmode_t fmode)
-{
-        struct nfs_open_context *open;
-        struct nfs_lock_context *lock;
-        int ret;
-        open = get_nfs_open_context(nfs_file_open_context(file));
-        lock = nfs_get_lock_context(open);
-        if (IS_ERR(lock)) {
-                put_nfs_open_context(open);
-                return PTR_ERR(lock);
-        }
-        ret = nfs4_set_rw_stateid(dst, open, lock, fmode);
-        nfs_put_lock_context(lock);
-        put_nfs_open_context(open);
-        return ret;
-}
 static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
-                                 loff_t offset, loff_t len)
+                struct nfs_lock_context *lock, loff_t offset, loff_t len)
 {
        struct inode *inode = file_inode(filep);
        struct nfs_server *server = NFS_SERVER(inode);
@@ -56,7 +35,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
        msg->rpc_argp = &args;
        msg->rpc_resp = &res;
-        status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE);
+        status = nfs4_set_rw_stateid(&args.falloc_stateid, lock->open_context,
+                        lock, FMODE_WRITE);
        if (status)
                return status;
@@ -78,15 +58,26 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
 {
        struct nfs_server *server = NFS_SERVER(file_inode(filep));
        struct nfs4_exception exception = { };
+        struct nfs_lock_context *lock;
        int err;
+        lock = nfs_get_lock_context(nfs_file_open_context(filep));
+        if (IS_ERR(lock))
+                return PTR_ERR(lock);
+        exception.inode = file_inode(filep);
+        exception.state = lock->open_context->state;
        do {
-                err = _nfs42_proc_fallocate(msg, filep, offset, len);
+                err = _nfs42_proc_fallocate(msg, filep, lock, offset, len);
-                if (err == -ENOTSUPP)
+                if (err == -ENOTSUPP) {
-                        return -EOPNOTSUPP;
+                        err = -EOPNOTSUPP;
+                        break;
+                }
                err = nfs4_handle_exception(server, err, &exception);
        } while (exception.retry);
+        nfs_put_lock_context(lock);
        return err;
 }
@@ -135,7 +126,8 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
        return err;
 }
-static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
+static loff_t _nfs42_proc_llseek(struct file *filep,
+                struct nfs_lock_context *lock, loff_t offset, int whence)
 {
        struct inode *inode = file_inode(filep);
        struct nfs42_seek_args args = {
@@ -156,7 +148,8 @@ static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
        if (!nfs_server_capable(inode, NFS_CAP_SEEK))
                return -ENOTSUPP;
-        status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ);
+        status = nfs4_set_rw_stateid(&args.sa_stateid, lock->open_context,
+                        lock, FMODE_READ);
        if (status)
                return status;
@@ -175,17 +168,28 @@ loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
 {
        struct nfs_server *server = NFS_SERVER(file_inode(filep));
        struct nfs4_exception exception = { };
+        struct nfs_lock_context *lock;
        loff_t err;
+        lock = nfs_get_lock_context(nfs_file_open_context(filep));
+        if (IS_ERR(lock))
+                return PTR_ERR(lock);
+        exception.inode = file_inode(filep);
+        exception.state = lock->open_context->state;
        do {
-                err = _nfs42_proc_llseek(filep, offset, whence);
+                err = _nfs42_proc_llseek(filep, lock, offset, whence);
                if (err >= 0)
                        break;
-                if (err == -ENOTSUPP)
+                if (err == -ENOTSUPP) {
-                        return -EOPNOTSUPP;
+                        err = -EOPNOTSUPP;
+                        break;
+                }
                err = nfs4_handle_exception(server, err, &exception);
        } while (exception.retry);
+        nfs_put_lock_context(lock);
        return err;
 }
@@ -298,8 +302,9 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
 }
 static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
-                             struct file *dst_f, loff_t src_offset,
+                struct file *dst_f, struct nfs_lock_context *src_lock,
-                             loff_t dst_offset, loff_t count)
+                struct nfs_lock_context *dst_lock, loff_t src_offset,
+                loff_t dst_offset, loff_t count)
 {
        struct inode *src_inode = file_inode(src_f);
        struct inode *dst_inode = file_inode(dst_f);
@@ -320,11 +325,13 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
        msg->rpc_argp = &args;
        msg->rpc_resp = &res;
-        status = nfs42_set_rw_stateid(&args.src_stateid, src_f, FMODE_READ);
+        status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
+                        src_lock, FMODE_READ);
        if (status)
                return status;
-        status = nfs42_set_rw_stateid(&args.dst_stateid, dst_f, FMODE_WRITE);
+        status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
+                        dst_lock, FMODE_WRITE);
        if (status)
                return status;
@@ -349,22 +356,48 @@ int nfs42_proc_clone(struct file *src_f, struct file *dst_f,
        };
        struct inode *inode = file_inode(src_f);
        struct nfs_server *server = NFS_SERVER(file_inode(src_f));
-        struct nfs4_exception exception = { };
+        struct nfs_lock_context *src_lock;
-        int err;
+        struct nfs_lock_context *dst_lock;
+        struct nfs4_exception src_exception = { };
+        struct nfs4_exception dst_exception = { };
+        int err, err2;
        if (!nfs_server_capable(inode, NFS_CAP_CLONE))
                return -EOPNOTSUPP;
+        src_lock = nfs_get_lock_context(nfs_file_open_context(src_f));
+        if (IS_ERR(src_lock))
+                return PTR_ERR(src_lock);
+        src_exception.inode = file_inode(src_f);
+        src_exception.state = src_lock->open_context->state;
+        dst_lock = nfs_get_lock_context(nfs_file_open_context(dst_f));
+        if (IS_ERR(dst_lock)) {
+                err = PTR_ERR(dst_lock);
+                goto out_put_src_lock;
+        }
+        dst_exception.inode = file_inode(dst_f);
+        dst_exception.state = dst_lock->open_context->state;
        do {
-                err = _nfs42_proc_clone(&msg, src_f, dst_f, src_offset,
+                err = _nfs42_proc_clone(&msg, src_f, dst_f, src_lock, dst_lock,
-                                        dst_offset, count);
+                                        src_offset, dst_offset, count);
                if (err == -ENOTSUPP || err == -EOPNOTSUPP) {
                        NFS_SERVER(inode)->caps &= ~NFS_CAP_CLONE;
-                        return -EOPNOTSUPP;
+                        err = -EOPNOTSUPP;
+                        break;
                }
-                err = nfs4_handle_exception(server, err, &exception);
-        } while (exception.retry);
-        return err;
+                err2 = nfs4_handle_exception(server, err, &src_exception);
+                err = nfs4_handle_exception(server, err, &dst_exception);
+                if (!err)
+                        err = err2;
+        } while (src_exception.retry || dst_exception.retry);
+        nfs_put_lock_context(dst_lock);
+out_put_src_lock:
+        nfs_put_lock_context(src_lock);
+        return err;
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 4bfc33ad0563..14881594dd07 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2466,9 +2466,9 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
                dentry = d_add_unique(dentry, igrab(state->inode));
                if (dentry == NULL) {
                        dentry = opendata->dentry;
-                } else if (dentry != ctx->dentry) {
+                } else {
                        dput(ctx->dentry);
-                        ctx->dentry = dget(dentry);
+                        ctx->dentry = dentry;
                }
                nfs_set_verifier(dentry,
                                nfs_save_change_attribute(d_inode(opendata->dir)));
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 482b6e94bb37..2fa483e6dbe2 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -252,6 +252,27 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
        }
 }
+/*
+ * Mark a pnfs_layout_hdr and all associated layout segments as invalid
+ *
+ * In order to continue using the pnfs_layout_hdr, a full recovery
+ * is required.
+ * Note that caller must hold inode->i_lock.
+ */
+static int
+pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
+                struct list_head *lseg_list)
+{
+        struct pnfs_layout_range range = {
+                .iomode = IOMODE_ANY,
+                .offset = 0,
+                .length = NFS4_MAX_UINT64,
+        };
+        set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
+        return pnfs_mark_matching_lsegs_invalid(lo, lseg_list, &range);
+}
 static int
 pnfs_iomode_to_fail_bit(u32 iomode)
 {
@@ -554,9 +575,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
        spin_lock(&nfsi->vfs_inode.i_lock);
        lo = nfsi->layout;
        if (lo) {
-                lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
-                pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
                pnfs_get_layout_hdr(lo);
+                pnfs_mark_layout_stateid_invalid(lo, &tmp_list);
                pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
                pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
                spin_unlock(&nfsi->vfs_inode.i_lock);
@@ -617,11 +637,6 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
 {
        struct pnfs_layout_hdr *lo;
        struct inode *inode;
-        struct pnfs_layout_range range = {
-                .iomode = IOMODE_ANY,
-                .offset = 0,
-                .length = NFS4_MAX_UINT64,
-        };
        LIST_HEAD(lseg_list);
        int ret = 0;
@@ -636,11 +651,11 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list,
                spin_lock(&inode->i_lock);
                list_del_init(&lo->plh_bulk_destroy);
-                lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
+                if (pnfs_mark_layout_stateid_invalid(lo, &lseg_list)) {
-                if (is_bulk_recall)
+                        if (is_bulk_recall)
-                        set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
+                                set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
-                if (pnfs_mark_matching_lsegs_invalid(lo, &lseg_list, &range))
                        ret = -EAGAIN;
+                }
                spin_unlock(&inode->i_lock);
                pnfs_free_lseg_list(&lseg_list);
                /* Free all lsegs that are attached to commit buckets */
@@ -1738,8 +1753,19 @@ pnfs_set_plh_return_iomode(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode)
        if (lo->plh_return_iomode != 0)
                iomode = IOMODE_ANY;
        lo->plh_return_iomode = iomode;
+        set_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
 }
+/**
+ * pnfs_mark_matching_lsegs_return - Free or return matching layout segments
+ * @lo: pointer to layout header
+ * @tmp_list: list header to be used with pnfs_free_lseg_list()
+ * @return_range: describe layout segment ranges to be returned
+ *
+ * This function is mainly intended for use by layoutrecall. It attempts
+ * to free the layout segment immediately, or else to mark it for return
+ * as soon as its reference count drops to zero.
+ */
 int
 pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
                                struct list_head *tmp_list,
@@ -1762,12 +1788,11 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
                                lseg, lseg->pls_range.iomode,
                                lseg->pls_range.offset,
                                lseg->pls_range.length);
+                        if (mark_lseg_invalid(lseg, tmp_list))
+                                continue;
+                        remaining++;
                        set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
                        pnfs_set_plh_return_iomode(lo, return_range->iomode);
-                        if (!mark_lseg_invalid(lseg, tmp_list))
-                                remaining++;
-                        set_bit(NFS_LAYOUT_RETURN_REQUESTED,
-                                        &lo->plh_flags);
                }
        return remaining;
 }
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index cfcbf114676e..7115c5d7d373 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -91,7 +91,14 @@
 #include <linux/fsnotify_backend.h>
 #include "fsnotify.h"
+#define FSNOTIFY_REAPER_DELAY   (1)     /* 1 jiffy */
 struct srcu_struct fsnotify_mark_srcu;
+static DEFINE_SPINLOCK(destroy_lock);
+static LIST_HEAD(destroy_list);
+static void fsnotify_mark_destroy(struct work_struct *work);
+static DECLARE_DELAYED_WORK(reaper_work, fsnotify_mark_destroy);
 void fsnotify_get_mark(struct fsnotify_mark *mark)
 {
@@ -165,19 +172,10 @@ void fsnotify_detach_mark(struct fsnotify_mark *mark)
        atomic_dec(&group->num_marks);
 }
-static void
-fsnotify_mark_free_rcu(struct rcu_head *rcu)
-{
-        struct fsnotify_mark    *mark;
-        mark = container_of(rcu, struct fsnotify_mark, g_rcu);
-        fsnotify_put_mark(mark);
-}
 /*
- * Free fsnotify mark. The freeing is actually happening from a call_srcu
+ * Free fsnotify mark. The freeing is actually happening from a kthread which
- * callback. Caller must have a reference to the mark or be protected by
+ * first waits for srcu period end. Caller must have a reference to the mark
- * fsnotify_mark_srcu.
+ * or be protected by fsnotify_mark_srcu.
 */
 void fsnotify_free_mark(struct fsnotify_mark *mark)
 {
@@ -192,7 +190,11 @@ void fsnotify_free_mark(struct fsnotify_mark *mark)
        mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
        spin_unlock(&mark->lock);
-        call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu);
+        spin_lock(&destroy_lock);
+        list_add(&mark->g_list, &destroy_list);
+        spin_unlock(&destroy_lock);
+        queue_delayed_work(system_unbound_wq, &reaper_work,
+                                FSNOTIFY_REAPER_DELAY);
        /*
         * Some groups like to know that marks are being freed.  This is a
@@ -388,7 +390,12 @@ err:
        spin_unlock(&mark->lock);
-        call_srcu(&fsnotify_mark_srcu, &mark->g_rcu, fsnotify_mark_free_rcu);
+        spin_lock(&destroy_lock);
+        list_add(&mark->g_list, &destroy_list);
+        spin_unlock(&destroy_lock);
+        queue_delayed_work(system_unbound_wq, &reaper_work,
+                                FSNOTIFY_REAPER_DELAY);
        return ret;
 }
@@ -491,3 +498,21 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
        atomic_set(&mark->refcnt, 1);
        mark->free_mark = free_mark;
 }
+static void fsnotify_mark_destroy(struct work_struct *work)
+{
+        struct fsnotify_mark *mark, *next;
+        struct list_head private_destroy_list;
+        spin_lock(&destroy_lock);
+        /* exchange the list head */
+        list_replace_init(&destroy_list, &private_destroy_list);
+        spin_unlock(&destroy_lock);
+        synchronize_srcu(&fsnotify_mark_srcu);
+        list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
+                list_del_init(&mark->g_list);
+                fsnotify_put_mark(mark);
+        }
+}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 794fd1587f34..cda0361e95a4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -956,6 +956,7 @@ clean_orphan:
                tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh,
                                update_isize, end);
                if (tmp_ret < 0) {
+                        ocfs2_inode_unlock(inode, 1);
                        ret = tmp_ret;
                        mlog_errno(ret);
                        brelse(di_bh);
diff --git a/fs/pnode.c b/fs/pnode.c
index 6367e1e435c6..c524fdddc7fb 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -202,6 +202,11 @@ static struct mount *last_dest, *last_source, *dest_master;
 static struct mountpoint *mp;
 static struct hlist_head *list;
+static inline bool peers(struct mount *m1, struct mount *m2)
+{
+        return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
+}
 static int propagate_one(struct mount *m)
 {
        struct mount *child;
@@ -212,7 +217,7 @@ static int propagate_one(struct mount *m)
        /* skip if mountpoint isn't covered by it */
        if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
                return 0;
-        if (m->mnt_group_id == last_dest->mnt_group_id) {
+        if (peers(m, last_dest)) {
                type = CL_MAKE_SHARED;
        } else {
                struct mount *n, *p;
@@ -223,7 +228,7 @@ static int propagate_one(struct mount *m)
                                        last_source = last_source->mnt_master;
                                        last_dest = last_source->mnt_parent;
                                }
-                                if (n->mnt_group_id != last_dest->mnt_group_id) {
+                                if (!peers(n, last_dest)) {
                                        last_source = last_source->mnt_master;
                                        last_dest = last_source->mnt_parent;
                                }
diff --git a/fs/read_write.c b/fs/read_write.c
index 324ec271cc4e..dadf24e5c95b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -17,6 +17,7 @@
 #include <linux/splice.h>
 #include <linux/compat.h>
 #include <linux/mount.h>
+#include <linux/fs.h>
 #include "internal.h"
 #include <asm/uaccess.h>
@@ -183,7 +184,7 @@ loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
        switch (whence) {
        case SEEK_SET: case SEEK_CUR:
                return generic_file_llseek_size(file, offset, whence,
-                                                ~0ULL, 0);
+                                                OFFSET_MAX, 0);
        default:
                return -EINVAL;
        }
@@ -1532,10 +1533,12 @@ int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
        if (!(file_in->f_mode & FMODE_READ) ||
            !(file_out->f_mode & FMODE_WRITE) ||
-            (file_out->f_flags & O_APPEND) ||
+            (file_out->f_flags & O_APPEND))
-            !file_in->f_op->clone_file_range)
                return -EBADF;
+        if (!file_in->f_op->clone_file_range)
+                return -EOPNOTSUPP;
        ret = clone_verify_area(file_in, pos_in, len, false);
        if (ret)
                return ret;
diff --git a/fs/super.c b/fs/super.c
index 1182af8fd5ff..74914b1bae70 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -415,6 +415,7 @@ void generic_shutdown_super(struct super_block *sb)
                sb->s_flags &= ~MS_ACTIVE;
                fsnotify_unmount_inodes(sb);
+                cgroup_writeback_umount();
                evict_inodes(sb);
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 50311703135b..66cdb44616d5 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -287,6 +287,12 @@ int handle_userfault(struct vm_area_struct *vma, unsigned long address,
                goto out;
        /*
+         * We don't do userfault handling for the final child pid update.
+         */
+        if (current->flags & PF_EXITING)
+                goto out;
+        /*
         * Check that we can return VM_FAULT_RETRY.
         *
         * NOTE: it should become possible to return VM_FAULT_RETRY
diff --git a/fs/xattr.c b/fs/xattr.c
index 07d0e47f6a7f..4861322e28e8 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -940,7 +940,7 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
        bool trusted = capable(CAP_SYS_ADMIN);
        struct simple_xattr *xattr;
        ssize_t remaining_size = size;
-        int err;
+        int err = 0;
 #ifdef CONFIG_FS_POSIX_ACL
        if (inode->i_acl) {
@@ -965,11 +965,11 @@ ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
                err = xattr_list_one(&buffer, &remaining_size, xattr->name);
                if (err)
-                        return err;
+                        break;
        }
        spin_unlock(&xattrs->lock);
-        return size - remaining_size;
+        return err ? err : size - remaining_size;
 }
 /*
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 379c089fb051..a9ebabfe7587 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -55,7 +55,7 @@ xfs_count_page_state(
        } while ((bh = bh->b_this_page) != head);
 }
-STATIC struct block_device *
+struct block_device *
 xfs_find_bdev_for_inode(
        struct inode            *inode)
 {
@@ -1208,6 +1208,10 @@ xfs_vm_writepages(
        struct writeback_control *wbc)
 {
        xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+        if (dax_mapping(mapping))
+                return dax_writeback_mapping_range(mapping,
+                                xfs_find_bdev_for_inode(mapping->host), wbc);
        return generic_writepages(mapping, wbc);
 }
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index f6ffc9ae5ceb..a4343c63fb38 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -62,5 +62,6 @@ int	xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
                                 struct buffer_head *map_bh, int create);
 extern void xfs_count_page_state(struct page *, int *, int *);
+extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
 #endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 45ec9e40150c..6c876012b2e5 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -75,7 +75,8 @@ xfs_zero_extent(
        ssize_t         size = XFS_FSB_TO_B(mp, count_fsb);
        if (IS_DAX(VFS_I(ip)))
-                return dax_clear_blocks(VFS_I(ip), block, size);
+                return dax_clear_sectors(xfs_find_bdev_for_inode(VFS_I(ip)),
+                                sector, size);
        /*
         * let the block layer decide on the fastest method of
author	Ingo Molnar <mingo@kernel.org>	2016-03-07 03:27:30 -0500
committer	Ingo Molnar <mingo@kernel.org>	2016-03-07 03:27:30 -0500
commit	ec87e1cf7d8399d81d8965c6d852f8057a8dd687 (patch)
tree	472a168fa4861090edf110c8a9712a5c15ea259f /fs
parent	869ae76147ffdf21ad24f0e599303cd58a2bb39f (diff)
parent	f6cede5b49e822ebc41a099fe41ab4989f64e2cb (diff)