33 files changed, 301 insertions, 164 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 80953528572d..68f322f600a0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3163,6 +3163,9 @@ void btrfs_destroy_inode(struct inode *inode);
 int btrfs_drop_inode(struct inode *inode);
 int __init btrfs_init_cachep(void);
 void __cold btrfs_destroy_cachep(void);
+struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
+                              struct btrfs_root *root, int *new,
+                              struct btrfs_path *path);
 struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
                         struct btrfs_root *root, int *was_new);
 struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b0ab41da91d1..3f0b6d1936e8 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1664,9 +1664,8 @@ static int cleaner_kthread(void *arg)
        struct btrfs_root *root = arg;
        struct btrfs_fs_info *fs_info = root->fs_info;
        int again;
-        struct btrfs_trans_handle *trans;
-        do {
+        while (1) {
                again = 0;
                /* Make the cleaner go to sleep early. */
@@ -1715,42 +1714,16 @@ static int cleaner_kthread(void *arg)
                 */
                btrfs_delete_unused_bgs(fs_info);
 sleep:
+                if (kthread_should_park())
+                        kthread_parkme();
+                if (kthread_should_stop())
+                        return 0;
                if (!again) {
                        set_current_state(TASK_INTERRUPTIBLE);
-                        if (!kthread_should_stop())
+                        schedule();
-                                schedule();
                        __set_current_state(TASK_RUNNING);
                }
-        } while (!kthread_should_stop());
-        /*
-         * Transaction kthread is stopped before us and wakes us up.
-         * However we might have started a new transaction and COWed some
-         * tree blocks when deleting unused block groups for example. So
-         * make sure we commit the transaction we started to have a clean
-         * shutdown when evicting the btree inode - if it has dirty pages
-         * when we do the final iput() on it, eviction will trigger a
-         * writeback for it which will fail with null pointer dereferences
-         * since work queues and other resources were already released and
-         * destroyed by the time the iput/eviction/writeback is made.
-         */
-        trans = btrfs_attach_transaction(root);
-        if (IS_ERR(trans)) {
-                if (PTR_ERR(trans) != -ENOENT)
-                        btrfs_err(fs_info,
-                                  "cleaner transaction attach returned %ld",
-                                  PTR_ERR(trans));
-        } else {
-                int ret;
-                ret = btrfs_commit_transaction(trans);
-                if (ret)
-                        btrfs_err(fs_info,
-                                  "cleaner open transaction commit returned %d",
-                                  ret);
        }
-        return 0;
 }
 static int transaction_kthread(void *arg)
@@ -3931,6 +3904,13 @@ void close_ctree(struct btrfs_fs_info *fs_info)
        int ret;
        set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
+        /*
+         * We don't want the cleaner to start new transactions, add more delayed
+         * iputs, etc. while we're closing. We can't use kthread_stop() yet
+         * because that frees the task_struct, and the transaction kthread might
+         * still try to wake up the cleaner.
+         */
+        kthread_park(fs_info->cleaner_kthread);
        /* wait for the qgroup rescan worker to stop */
        btrfs_qgroup_wait_for_completion(fs_info, false);
@@ -3958,9 +3938,8 @@ void close_ctree(struct btrfs_fs_info *fs_info)
        if (!sb_rdonly(fs_info->sb)) {
                /*
-                 * If the cleaner thread is stopped and there are
+                 * The cleaner kthread is stopped, so do one final pass over
-                 * block groups queued for removal, the deletion will be
+                 * unused block groups.
-                 * skipped when we quit the cleaner thread.
                 */
                btrfs_delete_unused_bgs(fs_info);
@@ -4359,13 +4338,23 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
        unpin = pinned_extents;
 again:
        while (1) {
+                /*
+                 * The btrfs_finish_extent_commit() may get the same range as
+                 * ours between find_first_extent_bit and clear_extent_dirty.
+                 * Hence, hold the unused_bg_unpin_mutex to avoid double unpin
+                 * the same extent range.
+                 */
+                mutex_lock(&fs_info->unused_bg_unpin_mutex);
                ret = find_first_extent_bit(unpin, 0, &start, &end,
                                            EXTENT_DIRTY, NULL);
-                if (ret)
+                if (ret) {
+                        mutex_unlock(&fs_info->unused_bg_unpin_mutex);
                        break;
+                }
                clear_extent_dirty(unpin, start, end);
                btrfs_error_unpin_extent_range(fs_info, start, end);
+                mutex_unlock(&fs_info->unused_bg_unpin_mutex);
                cond_resched();
        }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 4ba0aedc878b..74aa552f4793 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -75,7 +75,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
         * sure NOFS is set to keep us from deadlocking.
         */
        nofs_flag = memalloc_nofs_save();
-        inode = btrfs_iget(fs_info->sb, &location, root, NULL);
+        inode = btrfs_iget_path(fs_info->sb, &location, root, NULL, path);
+        btrfs_release_path(path);
        memalloc_nofs_restore(nofs_flag);
        if (IS_ERR(inode))
                return inode;
@@ -838,6 +839,25 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
        path->search_commit_root = 1;
        path->skip_locking = 1;
+        /*
+         * We must pass a path with search_commit_root set to btrfs_iget in
+         * order to avoid a deadlock when allocating extents for the tree root.
+         *
+         * When we are COWing an extent buffer from the tree root, when looking
+         * for a free extent, at extent-tree.c:find_free_extent(), we can find
+         * block group without its free space cache loaded. When we find one
+         * we must load its space cache which requires reading its free space
+         * cache's inode item from the root tree. If this inode item is located
+         * in the same leaf that we started COWing before, then we end up in
+         * deadlock on the extent buffer (trying to read lock it when we
+         * previously write locked it).
+         *
+         * It's safe to read the inode item using the commit root because
+         * block groups, once loaded, stay in memory forever (until they are
+         * removed) as well as their space caches once loaded. New block groups
+         * once created get their ->cached field set to BTRFS_CACHE_FINISHED so
+         * we will never try to read their inode item while the fs is mounted.
+         */
        inode = lookup_free_space_inode(fs_info, block_group, path);
        if (IS_ERR(inode)) {
                btrfs_free_path(path);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d3df5b52278c..9ea4c6f0352f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1531,12 +1531,11 @@ out_check:
        }
        btrfs_release_path(path);
-        if (cur_offset <= end && cow_start == (u64)-1) {
+        if (cur_offset <= end && cow_start == (u64)-1)
                cow_start = cur_offset;
-                cur_offset = end;
-        }
        if (cow_start != (u64)-1) {
+                cur_offset = end;
                ret = cow_file_range(inode, locked_page, cow_start, end, end,
                                     page_started, nr_written, 1, NULL);
                if (ret)
@@ -3570,10 +3569,11 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,
 /*
 * read an inode from the btree into the in-memory inode
 */
-static int btrfs_read_locked_inode(struct inode *inode)
+static int btrfs_read_locked_inode(struct inode *inode,
+                                   struct btrfs_path *in_path)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
-        struct btrfs_path *path;
+        struct btrfs_path *path = in_path;
        struct extent_buffer *leaf;
        struct btrfs_inode_item *inode_item;
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -3589,15 +3589,18 @@ static int btrfs_read_locked_inode(struct inode *inode)
        if (!ret)
                filled = true;
-        path = btrfs_alloc_path();
+        if (!path) {
-        if (!path)
+                path = btrfs_alloc_path();
-                return -ENOMEM;
+                if (!path)
+                        return -ENOMEM;
+        }
        memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
        ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
        if (ret) {
-                btrfs_free_path(path);
+                if (path != in_path)
+                        btrfs_free_path(path);
                return ret;
        }
@@ -3722,7 +3725,8 @@ cache_acl:
                                  btrfs_ino(BTRFS_I(inode)),
                                  root->root_key.objectid, ret);
        }
-        btrfs_free_path(path);
+        if (path != in_path)
+                btrfs_free_path(path);
        if (!maybe_acls)
                cache_no_acl(inode);
@@ -5644,8 +5648,9 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
 /* Get an inode object given its location and corresponding root.
 * Returns in *is_new if the inode was read from disk
 */
-struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
+struct inode *btrfs_iget_path(struct super_block *s, struct btrfs_key *location,
-                         struct btrfs_root *root, int *new)
+                              struct btrfs_root *root, int *new,
+                              struct btrfs_path *path)
 {
        struct inode *inode;
@@ -5656,7 +5661,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
        if (inode->i_state & I_NEW) {
                int ret;
-                ret = btrfs_read_locked_inode(inode);
+                ret = btrfs_read_locked_inode(inode, path);
                if (!ret) {
                        inode_tree_add(inode);
                        unlock_new_inode(inode);
@@ -5678,6 +5683,12 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
        return inode;
 }
+struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
+                         struct btrfs_root *root, int *new)
+{
+        return btrfs_iget_path(s, location, root, new, NULL);
+}
 static struct inode *new_simple_dir(struct super_block *s,
                                    struct btrfs_key *key,
                                    struct btrfs_root *root)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 3ca6943827ef..802a628e9f7d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3488,6 +3488,8 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
                        const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize;
                        len = round_down(i_size_read(src), sz) - loff;
+                        if (len == 0)
+                                return 0;
                        olen = len;
                }
        }
@@ -4257,9 +4259,17 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
                goto out_unlock;
        if (len == 0)
                olen = len = src->i_size - off;
-        /* if we extend to eof, continue to block boundary */
+        /*
-        if (off + len == src->i_size)
+         * If we extend to eof, continue to block boundary if and only if the
+         * destination end offset matches the destination file's size, otherwise
+         * we would be corrupting data by placing the eof block into the middle
+         * of a file.
+         */
+        if (off + len == src->i_size) {
+                if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size)
+                        goto out_unlock;
                len = ALIGN(src->i_size, bs) - off;
+        }
        if (len == 0) {
                ret = 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index b362b45dd757..cbc9d0d2c12d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1916,7 +1916,7 @@ restore:
 }
 /* Used to sort the devices by max_avail(descending sort) */
-static int btrfs_cmp_device_free_bytes(const void *dev_info1,
+static inline int btrfs_cmp_device_free_bytes(const void *dev_info1,
                                       const void *dev_info2)
 {
        if (((struct btrfs_device_info *)dev_info1)->max_avail >
@@ -1945,8 +1945,8 @@ static inline void btrfs_descending_sort_devices(
 * The helper to calc the free space on the devices that can be used to store
 * file data.
 */
-static int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
+static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info,
-                                       u64 *free_bytes)
+                                              u64 *free_bytes)
 {
        struct btrfs_device_info *devices_info;
        struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index cab0b1f1f741..efcf89a8ba44 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -440,7 +440,7 @@ static int check_block_group_item(struct btrfs_fs_info *fs_info,
            type != (BTRFS_BLOCK_GROUP_METADATA |
                           BTRFS_BLOCK_GROUP_DATA)) {
                block_group_err(fs_info, leaf, slot,
-"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llu or 0x%llx",
+"invalid type, have 0x%llx (%lu bits set) expect either 0x%llx, 0x%llx, 0x%llx or 0x%llx",
                        type, hweight64(type),
                        BTRFS_BLOCK_GROUP_DATA, BTRFS_BLOCK_GROUP_METADATA,
                        BTRFS_BLOCK_GROUP_SYSTEM,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index e07f3376b7df..a5ce99a6c936 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4396,6 +4396,23 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
        logged_end = end;
        list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
+                /*
+                 * Skip extents outside our logging range. It's important to do
+                 * it for correctness because if we don't ignore them, we may
+                 * log them before their ordered extent completes, and therefore
+                 * we could log them without logging their respective checksums
+                 * (the checksum items are added to the csum tree at the very
+                 * end of btrfs_finish_ordered_io()). Also leave such extents
+                 * outside of our range in the list, since we may have another
+                 * ranged fsync in the near future that needs them. If an extent
+                 * outside our range corresponds to a hole, log it to avoid
+                 * leaving gaps between extents (fsck will complain when we are
+                 * not using the NO_HOLES feature).
+                 */
+                if ((em->start > end || em->start + em->len <= start) &&
+                    em->block_start != EXTENT_MAP_HOLE)
+                        continue;
                list_del_init(&em->list);
                /*
                 * Just an arbitrary number, this can be really CPU intensive
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 27cad84dab23..189df668b6a0 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1931,10 +1931,17 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
        if (!prealloc_cf)
                return -ENOMEM;
-        /* Start by sync'ing the source file */
+        /* Start by sync'ing the source and destination files */
        ret = file_write_and_wait_range(src_file, src_off, (src_off + len));
-        if (ret < 0)
+        if (ret < 0) {
+                dout("failed to write src file (%zd)\n", ret);
+                goto out;
+        }
+        ret = file_write_and_wait_range(dst_file, dst_off, (dst_off + len));
+        if (ret < 0) {
+                dout("failed to write dst file (%zd)\n", ret);
                goto out;
+        }
        /*
         * We need FILE_WR caps for dst_ci and FILE_RD for src_ci as other
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 67a9aeb2f4ec..bd13a3267ae0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -80,12 +80,8 @@ static int parse_reply_info_in(void **p, void *end,
        info->symlink = *p;
        *p += info->symlink_len;
-        if (features & CEPH_FEATURE_DIRLAYOUTHASH)
+        ceph_decode_copy_safe(p, end, &info->dir_layout,
-                ceph_decode_copy_safe(p, end, &info->dir_layout,
+                              sizeof(info->dir_layout), bad);
-                                      sizeof(info->dir_layout), bad);
-        else
-                memset(&info->dir_layout, 0, sizeof(info->dir_layout));
        ceph_decode_32_safe(p, end, info->xattr_len, bad);
        ceph_decode_need(p, end, info->xattr_len, bad);
        info->xattr_data = *p;
@@ -3182,10 +3178,8 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
        recon_state.pagelist = pagelist;
        if (session->s_con.peer_features & CEPH_FEATURE_MDSENC)
                recon_state.msg_version = 3;
-        else if (session->s_con.peer_features & CEPH_FEATURE_FLOCK)
-                recon_state.msg_version = 2;
        else
-                recon_state.msg_version = 1;
+                recon_state.msg_version = 2;
        err = iterate_session_caps(session, encode_caps_cb, &recon_state);
        if (err < 0)
                goto fail;
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 32d4f13784ba..03f4d24db8fe 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -237,7 +237,8 @@ static bool check_quota_exceeded(struct inode *inode, enum quota_check_op op,
                ceph_put_snap_realm(mdsc, realm);
                realm = next;
        }
-        ceph_put_snap_realm(mdsc, realm);
+        if (realm)
+                ceph_put_snap_realm(mdsc, realm);
        up_read(&mdsc->snap_rwsem);
        return exceeded;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 05f01fbd9c7f..22a9d8159720 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5835,9 +5835,10 @@ int ext4_mark_iloc_dirty(handle_t *handle,
 {
        int err = 0;
-        if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+        if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb)))) {
+                put_bh(iloc->bh);
                return -EIO;
+        }
        if (IS_I_VERSION(inode))
                inode_inc_iversion(inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 17adcb16a9c8..437f71fe83ae 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -126,6 +126,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
        if (!is_dx_block && type == INDEX) {
                ext4_error_inode(inode, func, line, block,
                       "directory leaf block found instead of index block");
+                brelse(bh);
                return ERR_PTR(-EFSCORRUPTED);
        }
        if (!ext4_has_metadata_csum(inode->i_sb) ||
@@ -2811,7 +2812,9 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
                        list_del_init(&EXT4_I(inode)->i_orphan);
                        mutex_unlock(&sbi->s_orphan_lock);
                }
-        }
+        } else
+                brelse(iloc.bh);
        jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
        jbd_debug(4, "orphan inode %lu will point to %d\n",
                        inode->i_ino, NEXT_ORPHAN(inode));
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ebbc663d0798..a5efee34415f 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -459,16 +459,18 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
                BUFFER_TRACE(bh, "get_write_access");
                err = ext4_journal_get_write_access(handle, bh);
-                if (err)
+                if (err) {
+                        brelse(bh);
                        return err;
+                }
                ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n",
                           first_cluster, first_cluster - start, count2);
                ext4_set_bits(bh->b_data, first_cluster - start, count2);
                err = ext4_handle_dirty_metadata(handle, NULL, bh);
+                brelse(bh);
                if (unlikely(err))
                        return err;
-                brelse(bh);
        }
        return 0;
@@ -605,7 +607,6 @@ handle_bb:
                bh = bclean(handle, sb, block);
                if (IS_ERR(bh)) {
                        err = PTR_ERR(bh);
-                        bh = NULL;
                        goto out;
                }
                overhead = ext4_group_overhead_blocks(sb, group);
@@ -618,9 +619,9 @@ handle_bb:
                ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count),
                                     sb->s_blocksize * 8, bh->b_data);
                err = ext4_handle_dirty_metadata(handle, NULL, bh);
+                brelse(bh);
                if (err)
                        goto out;
-                brelse(bh);
 handle_ib:
                if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
@@ -635,18 +636,16 @@ handle_ib:
                bh = bclean(handle, sb, block);
                if (IS_ERR(bh)) {
                        err = PTR_ERR(bh);
-                        bh = NULL;
                        goto out;
                }
                ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
                                     sb->s_blocksize * 8, bh->b_data);
                err = ext4_handle_dirty_metadata(handle, NULL, bh);
+                brelse(bh);
                if (err)
                        goto out;
-                brelse(bh);
        }
-        bh = NULL;
        /* Mark group tables in block bitmap */
        for (j = 0; j < GROUP_TABLE_COUNT; j++) {
@@ -685,7 +684,6 @@ handle_ib:
        }
 out:
-        brelse(bh);
        err2 = ext4_journal_stop(handle);
        if (err2 && !err)
                err = err2;
@@ -873,6 +871,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
        err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
        if (unlikely(err)) {
                ext4_std_error(sb, err);
+                iloc.bh = NULL;
                goto exit_inode;
        }
        brelse(dind);
@@ -924,6 +923,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
                                     sizeof(struct buffer_head *),
                                     GFP_NOFS);
        if (!n_group_desc) {
+                brelse(gdb_bh);
                err = -ENOMEM;
                ext4_warning(sb, "not enough memory for %lu groups",
                             gdb_num + 1);
@@ -939,8 +939,6 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
        kvfree(o_group_desc);
        BUFFER_TRACE(gdb_bh, "get_write_access");
        err = ext4_journal_get_write_access(handle, gdb_bh);
-        if (unlikely(err))
-                brelse(gdb_bh);
        return err;
 }
@@ -1124,8 +1122,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
                           backup_block, backup_block -
                           ext4_group_first_block_no(sb, group));
                BUFFER_TRACE(bh, "get_write_access");
-                if ((err = ext4_journal_get_write_access(handle, bh)))
+                if ((err = ext4_journal_get_write_access(handle, bh))) {
+                        brelse(bh);
                        break;
+                }
                lock_buffer(bh);
                memcpy(bh->b_data, data, size);
                if (rest)
@@ -2023,7 +2023,7 @@ retry:
        err = ext4_alloc_flex_bg_array(sb, n_group + 1);
        if (err)
-                return err;
+                goto out;
        err = ext4_mb_alloc_groupinfo(sb, n_group + 1);
        if (err)
@@ -2059,6 +2059,10 @@ retry:
                n_blocks_count_retry = 0;
                free_flex_gd(flex_gd);
                flex_gd = NULL;
+                if (resize_inode) {
+                        iput(resize_inode);
+                        resize_inode = NULL;
+                }
                goto retry;
        }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a221f1cdf704..53ff6c2a26ed 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4075,6 +4075,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_groups_count = blocks_count;
        sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
                        (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
+        if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
+            le32_to_cpu(es->s_inodes_count)) {
+                ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
+                         le32_to_cpu(es->s_inodes_count),
+                         ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
+                ret = -EINVAL;
+                goto failed_mount;
+        }
        db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
                   EXT4_DESC_PER_BLOCK(sb);
        if (ext4_has_feature_meta_bg(sb)) {
@@ -4094,14 +4102,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                ret = -ENOMEM;
                goto failed_mount;
        }
-        if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
-            le32_to_cpu(es->s_inodes_count)) {
-                ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
-                         le32_to_cpu(es->s_inodes_count),
-                         ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
-                ret = -EINVAL;
-                goto failed_mount;
-        }
        bgl_lock_init(sbi->s_blockgroup_lock);
@@ -4510,6 +4510,7 @@ failed_mount6:
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
        percpu_counter_destroy(&sbi->s_dirs_counter);
        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
+        percpu_free_rwsem(&sbi->s_journal_flag_rwsem);
 failed_mount5:
        ext4_ext_release(sb);
        ext4_release_system_zone(sb);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index f36fc5d5b257..7643d52c776c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1031,10 +1031,8 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
        inode_lock(ea_inode);
        ret = ext4_reserve_inode_write(handle, ea_inode, &iloc);
-        if (ret) {
+        if (ret)
-                iloc.bh = NULL;
                goto out;
-        }
        ref_count = ext4_xattr_inode_get_ref(ea_inode);
        ref_count += ref_change;
@@ -1080,12 +1078,10 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
        }
        ret = ext4_mark_iloc_dirty(handle, ea_inode, &iloc);
-        iloc.bh = NULL;
        if (ret)
                ext4_warning_inode(ea_inode,
                                   "ext4_mark_iloc_dirty() failed ret=%d", ret);
 out:
-        brelse(iloc.bh);
        inode_unlock(ea_inode);
        return ret;
 }
@@ -1388,6 +1384,12 @@ retry:
                bh = ext4_getblk(handle, ea_inode, block, 0);
                if (IS_ERR(bh))
                        return PTR_ERR(bh);
+                if (!bh) {
+                        WARN_ON_ONCE(1);
+                        EXT4_ERROR_INODE(ea_inode,
+                                         "ext4_getblk() return bh = NULL");
+                        return -EFSCORRUPTED;
+                }
                ret = ext4_journal_get_write_access(handle, bh);
                if (ret)
                        goto out;
@@ -2276,8 +2278,10 @@ static struct buffer_head *ext4_xattr_get_block(struct inode *inode)
        if (!bh)
                return ERR_PTR(-EIO);
        error = ext4_xattr_check_block(inode, bh);
-        if (error)
+        if (error) {
+                brelse(bh);
                return ERR_PTR(error);
+        }
        return bh;
 }
@@ -2397,6 +2401,8 @@ retry_inode:
                        error = ext4_xattr_block_set(handle, inode, &i, &bs);
                } else if (error == -ENOSPC) {
                        if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
+                                brelse(bs.bh);
+                                bs.bh = NULL;
                                error = ext4_xattr_block_find(inode, &i, &bs);
                                if (error)
                                        goto cleanup;
@@ -2617,6 +2623,8 @@ out:
        kfree(buffer);
        if (is)
                brelse(is->iloc.bh);
+        if (bs)
+                brelse(bs->bh);
        kfree(is);
        kfree(bs);
@@ -2696,7 +2704,6 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
                               struct ext4_inode *raw_inode, handle_t *handle)
 {
        struct ext4_xattr_ibody_header *header;
-        struct buffer_head *bh;
        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
        static unsigned int mnt_count;
        size_t min_offs;
@@ -2737,13 +2744,17 @@ retry:
         * EA block can hold new_extra_isize bytes.
         */
        if (EXT4_I(inode)->i_file_acl) {
+                struct buffer_head *bh;
                bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
                error = -EIO;
                if (!bh)
                        goto cleanup;
                error = ext4_xattr_check_block(inode, bh);
-                if (error)
+                if (error) {
+                        brelse(bh);
                        goto cleanup;
+                }
                base = BHDR(bh);
                end = bh->b_data + bh->b_size;
                min_offs = end - base;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ae813e609932..a5e516a40e7a 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -165,9 +165,13 @@ static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background)
 static void fuse_drop_waiting(struct fuse_conn *fc)
 {
-        if (fc->connected) {
+        /*
-                atomic_dec(&fc->num_waiting);
+         * lockess check of fc->connected is okay, because atomic_dec_and_test()
-        } else if (atomic_dec_and_test(&fc->num_waiting)) {
+         * provides a memory barrier mached with the one in fuse_wait_aborted()
+         * to ensure no wake-up is missed.
+         */
+        if (atomic_dec_and_test(&fc->num_waiting) &&
+            !READ_ONCE(fc->connected)) {
                /* wake up aborters */
                wake_up_all(&fc->blocked_waitq);
        }
@@ -1768,8 +1772,10 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
        req->in.args[1].size = total_len;
        err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
-        if (err)
+        if (err) {
                fuse_retrieve_end(fc, req);
+                fuse_put_request(fc, req);
+        }
        return err;
 }
@@ -2219,6 +2225,8 @@ EXPORT_SYMBOL_GPL(fuse_abort_conn);
 void fuse_wait_aborted(struct fuse_conn *fc)
 {
+        /* matches implicit memory barrier in fuse_drop_waiting() */
+        smp_mb();
        wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0);
 }
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index cc2121b37bf5..b52f9baaa3e7 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2924,10 +2924,12 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
        }
        if (io->async) {
+                bool blocking = io->blocking;
                fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
                /* we have a non-extending, async request, so return */
-                if (!io->blocking)
+                if (!blocking)
                        return -EIOCBQUEUED;
                wait_for_completion(&wait);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index a683d9b27d76..9a4a15d646eb 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -826,7 +826,7 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length,
        ret = gfs2_meta_inode_buffer(ip, &dibh);
        if (ret)
                goto unlock;
-        iomap->private = dibh;
+        mp->mp_bh[0] = dibh;
        if (gfs2_is_stuffed(ip)) {
                if (flags & IOMAP_WRITE) {
@@ -863,9 +863,6 @@ unstuff:
        len = lblock_stop - lblock + 1;
        iomap->length = len << inode->i_blkbits;
-        get_bh(dibh);
-        mp->mp_bh[0] = dibh;
        height = ip->i_height;
        while ((lblock + 1) * sdp->sd_sb.sb_bsize > sdp->sd_heightsize[height])
                height++;
@@ -898,8 +895,6 @@ out:
        iomap->bdev = inode->i_sb->s_bdev;
 unlock:
        up_read(&ip->i_rw_mutex);
-        if (ret && dibh)
-                brelse(dibh);
        return ret;
 do_alloc:
@@ -980,9 +975,9 @@ static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos,
 static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
                                  loff_t length, unsigned flags,
-                                  struct iomap *iomap)
+                                  struct iomap *iomap,
+                                  struct metapath *mp)
 {
-        struct metapath mp = { .mp_aheight = 1, };
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
@@ -996,9 +991,9 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
        unstuff = gfs2_is_stuffed(ip) &&
                  pos + length > gfs2_max_stuffed_size(ip);
-        ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
+        ret = gfs2_iomap_get(inode, pos, length, flags, iomap, mp);
        if (ret)
-                goto out_release;
+                goto out_unlock;
        alloc_required = unstuff || iomap->type == IOMAP_HOLE;
@@ -1013,7 +1008,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
                ret = gfs2_quota_lock_check(ip, &ap);
                if (ret)
-                        goto out_release;
+                        goto out_unlock;
                ret = gfs2_inplace_reserve(ip, &ap);
                if (ret)
@@ -1038,17 +1033,15 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
                ret = gfs2_unstuff_dinode(ip, NULL);
                if (ret)
                        goto out_trans_end;
-                release_metapath(&mp);
+                release_metapath(mp);
-                brelse(iomap->private);
-                iomap->private = NULL;
                ret = gfs2_iomap_get(inode, iomap->offset, iomap->length,
-                                     flags, iomap, &mp);
+                                     flags, iomap, mp);
                if (ret)
                        goto out_trans_end;
        }
        if (iomap->type == IOMAP_HOLE) {
-                ret = gfs2_iomap_alloc(inode, iomap, flags, &mp);
+                ret = gfs2_iomap_alloc(inode, iomap, flags, mp);
                if (ret) {
                        gfs2_trans_end(sdp);
                        gfs2_inplace_release(ip);
@@ -1056,7 +1049,6 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
                        goto out_qunlock;
                }
        }
-        release_metapath(&mp);
        if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip))
                iomap->page_done = gfs2_iomap_journaled_page_done;
        return 0;
@@ -1069,10 +1061,7 @@ out_trans_fail:
 out_qunlock:
        if (alloc_required)
                gfs2_quota_unlock(ip);
-out_release:
+out_unlock:
-        if (iomap->private)
-                brelse(iomap->private);
-        release_metapath(&mp);
        gfs2_write_unlock(inode);
        return ret;
 }
@@ -1088,10 +1077,10 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
        trace_gfs2_iomap_start(ip, pos, length, flags);
        if ((flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)) {
-                ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap);
+                ret = gfs2_iomap_begin_write(inode, pos, length, flags, iomap, &mp);
        } else {
                ret = gfs2_iomap_get(inode, pos, length, flags, iomap, &mp);
-                release_metapath(&mp);
                /*
                 * Silently fall back to buffered I/O for stuffed files or if
                 * we've hot a hole (see gfs2_file_direct_write).
@@ -1100,6 +1089,11 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length,
                    iomap->type != IOMAP_MAPPED)
                        ret = -ENOTBLK;
        }
+        if (!ret) {
+                get_bh(mp.mp_bh[0]);
+                iomap->private = mp.mp_bh[0];
+        }
+        release_metapath(&mp);
        trace_gfs2_iomap_end(ip, iomap, ret);
        return ret;
 }
@@ -1908,10 +1902,16 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
                        if (ret < 0)
                                goto out;
-                        /* issue read-ahead on metadata */
+                        /* On the first pass, issue read-ahead on metadata. */
-                        if (mp.mp_aheight > 1) {
+                        if (mp.mp_aheight > 1 && strip_h == ip->i_height - 1) {
-                                for (; ret > 1; ret--) {
+                                unsigned int height = mp.mp_aheight - 1;
-                                        metapointer_range(&mp, mp.mp_aheight - ret,
+                                /* No read-ahead for data blocks. */
+                                if (mp.mp_aheight - 1 == strip_h)
+                                        height--;
+                                for (; height >= mp.mp_aheight - ret; height--) {
+                                        metapointer_range(&mp, height,
                                                          start_list, start_aligned,
                                                          end_list, end_aligned,
                                                          &start, &end);
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index ffe3032b1043..b08a530433ad 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -733,6 +733,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
                if (gl) {
                        glock_clear_object(gl, rgd);
+                        gfs2_rgrp_brelse(rgd);
                        gfs2_glock_put(gl);
                }
@@ -1174,7 +1175,7 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd)
 * @rgd: the struct gfs2_rgrpd describing the RG to read in
 *
 * Read in all of a Resource Group's header and bitmap blocks.
- * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
+ * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps.
 *
 * Returns: errno
 */
diff --git a/fs/inode.c b/fs/inode.c
index 9e198f00b64c..35d2108d567c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -730,8 +730,11 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
                return LRU_REMOVED;
        }
-        /* recently referenced inodes get one more pass */
+        /*
-        if (inode->i_state & I_REFERENCED) {
+         * Recently referenced inodes and inodes with many attached pages
+         * get one more pass.
+         */
+        if (inode->i_state & I_REFERENCED || inode->i_data.nrpages > 1) {
                inode->i_state &= ~I_REFERENCED;
                spin_unlock(&inode->i_lock);
                return LRU_ROTATE;
diff --git a/fs/namespace.c b/fs/namespace.c
index 98d27da43304..a7f91265ea67 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -695,9 +695,6 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
        hlist_for_each_entry(mp, chain, m_hash) {
                if (mp->m_dentry == dentry) {
-                        /* might be worth a WARN_ON() */
-                        if (d_unlinked(dentry))
-                                return ERR_PTR(-ENOENT);
                        mp->m_count++;
                        return mp;
                }
@@ -711,6 +708,9 @@ static struct mountpoint *get_mountpoint(struct dentry *dentry)
        int ret;
        if (d_mountpoint(dentry)) {
+                /* might be worth a WARN_ON() */
+                if (d_unlinked(dentry))
+                        return ERR_PTR(-ENOENT);
 mountpoint:
                read_seqlock_excl(&mount_lock);
                mp = lookup_mountpoint(dentry);
@@ -1540,8 +1540,13 @@ static int do_umount(struct mount *mnt, int flags)
        namespace_lock();
        lock_mount_hash();
-        event++;
+        /* Recheck MNT_LOCKED with the locks held */
+        retval = -EINVAL;
+        if (mnt->mnt.mnt_flags & MNT_LOCKED)
+                goto out;
+        event++;
        if (flags & MNT_DETACH) {
                if (!list_empty(&mnt->mnt_list))
                        umount_tree(mnt, UMOUNT_PROPAGATE);
@@ -1555,6 +1560,7 @@ static int do_umount(struct mount *mnt, int flags)
                        retval = 0;
                }
        }
+out:
        unlock_mount_hash();
        namespace_unlock();
        return retval;
@@ -1645,7 +1651,7 @@ int ksys_umount(char __user *name, int flags)
                goto dput_and_out;
        if (!check_mnt(mnt))
                goto dput_and_out;
-        if (mnt->mnt.mnt_flags & MNT_LOCKED)
+        if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
                goto dput_and_out;
        retval = -EPERM;
        if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
@@ -1728,8 +1734,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
                for (s = r; s; s = next_mnt(s, r)) {
                        if (!(flag & CL_COPY_UNBINDABLE) &&
                            IS_MNT_UNBINDABLE(s)) {
-                                s = skip_mnt_tree(s);
+                                if (s->mnt.mnt_flags & MNT_LOCKED) {
-                                continue;
+                                        /* Both unbindable and locked. */
+                                        q = ERR_PTR(-EPERM);
+                                        goto out;
+                                } else {
+                                        s = skip_mnt_tree(s);
+                                        continue;
+                                }
                        }
                        if (!(flag & CL_COPY_MNT_NS_FILE) &&
                            is_mnt_ns_file(s->mnt.mnt_root)) {
@@ -1782,7 +1794,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
 {
        namespace_lock();
        lock_mount_hash();
-        umount_tree(real_mount(mnt), UMOUNT_SYNC);
+        umount_tree(real_mount(mnt), 0);
        unlock_mount_hash();
        namespace_unlock();
 }
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index fa515d5ea5ba..7b861bbc0b43 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -66,7 +66,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
 out_iput:
        rcu_read_unlock();
        trace_nfs4_cb_getattr(cps->clp, &args->fh, inode, -ntohl(res->status));
-        iput(inode);
+        nfs_iput_and_deactive(inode);
 out:
        dprintk("%s: exit with status = %d\n", __func__, ntohl(res->status));
        return res->status;
@@ -108,7 +108,7 @@ __be32 nfs4_callback_recall(void *argp, void *resp,
        }
        trace_nfs4_cb_recall(cps->clp, &args->fh, inode,
                        &args->stateid, -ntohl(res));
-        iput(inode);
+        nfs_iput_and_deactive(inode);
 out:
        dprintk("%s: exit with status = %d\n", __func__, ntohl(res));
        return res;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 07b839560576..6ec2f78c1e19 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -850,16 +850,23 @@ nfs_delegation_find_inode_server(struct nfs_server *server,
                                 const struct nfs_fh *fhandle)
 {
        struct nfs_delegation *delegation;
-        struct inode *res = NULL;
+        struct inode *freeme, *res = NULL;
        list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
                spin_lock(&delegation->lock);
                if (delegation->inode != NULL &&
                    nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
-                        res = igrab(delegation->inode);
+                        freeme = igrab(delegation->inode);
+                        if (freeme && nfs_sb_active(freeme->i_sb))
+                                res = freeme;
                        spin_unlock(&delegation->lock);
                        if (res != NULL)
                                return res;
+                        if (freeme) {
+                                rcu_read_unlock();
+                                iput(freeme);
+                                rcu_read_lock();
+                        }
                        return ERR_PTR(-EAGAIN);
                }
                spin_unlock(&delegation->lock);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 62ae0fd345ad..ffea57885394 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2601,11 +2601,12 @@ static void nfs4_state_manager(struct nfs_client *clp)
                nfs4_clear_state_manager_bit(clp);
                /* Did we race with an attempt to give us more work? */
                if (clp->cl_state == 0)
-                        break;
+                        return;
                if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
-                        break;
+                        return;
-        } while (refcount_read(&clp->cl_count) > 1);
+        } while (refcount_read(&clp->cl_count) > 1 && !signalled());
-        return;
+        goto out_drain;
 out_error:
        if (strlen(section))
                section_sep = ": ";
@@ -2613,6 +2614,7 @@ out_error:
                        " with error %d\n", section_sep, section,
                        clp->cl_hostname, -status);
        ssleep(1);
+out_drain:
        nfs4_end_drain_session(clp);
        nfs4_clear_state_manager_bit(clp);
 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index edff074d38c7..d505990dac7c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1038,6 +1038,9 @@ nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
        __be32 status;
+        if (!cstate->save_fh.fh_dentry)
+                return nfserr_nofilehandle;
        status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
                                            src_stateid, RD_STATE, src, NULL);
        if (status) {
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 5769cf3ff035..e08a6647267b 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -115,12 +115,12 @@ static bool fanotify_should_send_event(struct fsnotify_iter_info *iter_info,
                        continue;
                mark = iter_info->marks[type];
                /*
-                 * if the event is for a child and this inode doesn't care about
+                 * If the event is for a child and this mark doesn't care about
-                 * events on the child, don't send it!
+                 * events on a child, don't send it!
                 */
-                if (type == FSNOTIFY_OBJ_TYPE_INODE &&
+                if (event_mask & FS_EVENT_ON_CHILD &&
-                    (event_mask & FS_EVENT_ON_CHILD) &&
+                    (type != FSNOTIFY_OBJ_TYPE_INODE ||
-                    !(mark->mask & FS_EVENT_ON_CHILD))
+                     !(mark->mask & FS_EVENT_ON_CHILD)))
                        continue;
                marks_mask |= mark->mask;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 2172ba516c61..d2c34900ae05 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -167,9 +167,9 @@ int __fsnotify_parent(const struct path *path, struct dentry *dentry, __u32 mask
        parent = dget_parent(dentry);
        p_inode = parent->d_inode;
-        if (unlikely(!fsnotify_inode_watches_children(p_inode)))
+        if (unlikely(!fsnotify_inode_watches_children(p_inode))) {
                __fsnotify_update_child_dentry_flags(p_inode);
-        else if (p_inode->i_fsnotify_mask & mask) {
+        } else if (p_inode->i_fsnotify_mask & mask & ALL_FSNOTIFY_EVENTS) {
                struct name_snapshot name;
                /* we are notifying a parent so come up with the new mask which
@@ -339,6 +339,9 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
                sb = mnt->mnt.mnt_sb;
                mnt_or_sb_mask = mnt->mnt_fsnotify_mask | sb->s_fsnotify_mask;
        }
+        /* An event "on child" is not intended for a mount/sb mark */
+        if (mask & FS_EVENT_ON_CHILD)
+                mnt_or_sb_mask = 0;
        /*
         * Optimization: srcu_read_lock() has a memory barrier which can
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index da578ad4c08f..eb1ce30412dc 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2411,8 +2411,16 @@ static int ocfs2_dio_end_io(struct kiocb *iocb,
        /* this io's submitter should not have unlocked this before we could */
        BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
-        if (bytes > 0 && private)
+        if (bytes <= 0)
-                ret = ocfs2_dio_end_io_write(inode, private, offset, bytes);
+                mlog_ratelimited(ML_ERROR, "Direct IO failed, bytes = %lld",
+                                 (long long)bytes);
+        if (private) {
+                if (bytes > 0)
+                        ret = ocfs2_dio_end_io_write(inode, private, offset,
+                                                     bytes);
+                else
+                        ocfs2_dio_free_write_ctx(inode, private);
+        }
        ocfs2_iocb_clear_rw_locked(iocb);
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 308ea0eb35fd..a396096a5099 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -178,6 +178,15 @@ do {									\
                              ##__VA_ARGS__);                           \
 } while (0)
+#define mlog_ratelimited(mask, fmt, ...)                                \
+do {                                                                    \
+        static DEFINE_RATELIMIT_STATE(_rs,                              \
+                                      DEFAULT_RATELIMIT_INTERVAL,       \
+                                      DEFAULT_RATELIMIT_BURST);         \
+        if (__ratelimit(&_rs))                                          \
+                mlog(mask, fmt, ##__VA_ARGS__);                         \
+} while (0)
 #define mlog_errno(st) ({                                               \
        int _st = (st);                                                 \
        if (_st != -ERESTARTSYS && _st != -EINTR &&                     \
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 6fc5425b1474..2652d00842d6 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -243,7 +243,7 @@ xfs_attr3_leaf_verify(
        struct xfs_mount                *mp = bp->b_target->bt_mount;
        struct xfs_attr_leafblock       *leaf = bp->b_addr;
        struct xfs_attr_leaf_entry      *entries;
-        uint16_t                        end;
+        uint32_t                        end;    /* must be 32bit - see below */
        int                             i;
        xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
@@ -293,6 +293,11 @@ xfs_attr3_leaf_verify(
        /*
         * Quickly check the freemap information.  Attribute data has to be
         * aligned to 4-byte boundaries, and likewise for the free space.
+         *
+         * Note that for 64k block size filesystems, the freemap entries cannot
+         * overflow as they are only be16 fields. However, when checking end
+         * pointer of the freemap, we have to be careful to detect overflows and
+         * so use uint32_t for those checks.
         */
        for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
                if (ichdr.freemap[i].base > mp->m_attr_geo->blksize)
@@ -303,7 +308,9 @@ xfs_attr3_leaf_verify(
                        return __this_address;
                if (ichdr.freemap[i].size & 0x3)
                        return __this_address;
-                end = ichdr.freemap[i].base + ichdr.freemap[i].size;
+                /* be care of 16 bit overflows here */
+                end = (uint32_t)ichdr.freemap[i].base + ichdr.freemap[i].size;
                if (end < ichdr.freemap[i].base)
                        return __this_address;
                if (end > mp->m_attr_geo->blksize)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 6e2c08f30f60..6ecdbb3af7de 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1608,7 +1608,7 @@ xfs_ioc_getbmap(
        error = 0;
 out_free_buf:
        kmem_free(buf);
-        return 0;
+        return error;
 }
 struct getfsmap_info {
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 576c375ce12a..6b736ea58d35 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -107,5 +107,5 @@ assfail(char *expr, char *file, int line)
 void
 xfs_hex_dump(void *p, int length)
 {
-        print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
+        print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1);
 }