Merge branch 'v3.16-next/cleanup-samsung' into v3.16-next/platform-exynos

author: Kukjin Kim <kgene.kim@samsung.com> 2014-05-30 13:36:49 -0400
committer: Kukjin Kim <kgene.kim@samsung.com> 2014-05-30 13:36:49 -0400
commit: fced6dee29f6fb143fe16ea90331176ff77e6120 (patch)
tree: 5b6e57e7a757adc2a6518ce291a4d2914397b917 /fs
parent: bfed1074f213051e94648bfad0d0611a16d81366 (diff)
parent: be1f7c8d7e2bc8b8c76846aa6f276e8d2ef8975a (diff)
52 files changed, 657 insertions, 378 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 12a3de0ee6da..a0ed6c7d2cd2 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -112,6 +112,11 @@ struct kioctx {
        struct work_struct      free_work;
+        /*
+         * signals when all in-flight requests are done
+         */
+        struct completion *requests_done;
        struct {
                /*
                 * This counts the number of available slots in the ringbuffer,
@@ -508,6 +513,10 @@ static void free_ioctx_reqs(struct percpu_ref *ref)
 {
        struct kioctx *ctx = container_of(ref, struct kioctx, reqs);
+        /* At this point we know that there are no any in-flight requests */
+        if (ctx->requests_done)
+                complete(ctx->requests_done);
        INIT_WORK(&ctx->free_work, free_ioctx);
        schedule_work(&ctx->free_work);
 }
@@ -718,7 +727,8 @@ err:
 *      when the processes owning a context have all exited to encourage
 *      the rapid destruction of the kioctx.
 */
-static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
+static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx,
+                struct completion *requests_done)
 {
        if (!atomic_xchg(&ctx->dead, 1)) {
                struct kioctx_table *table;
@@ -747,7 +757,11 @@ static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
                if (ctx->mmap_size)
                        vm_munmap(ctx->mmap_base, ctx->mmap_size);
+                ctx->requests_done = requests_done;
                percpu_ref_kill(&ctx->users);
+        } else {
+                if (requests_done)
+                        complete(requests_done);
        }
 }
@@ -809,7 +823,7 @@ void exit_aio(struct mm_struct *mm)
                 */
                ctx->mmap_size = 0;
-                kill_ioctx(mm, ctx);
+                kill_ioctx(mm, ctx, NULL);
        }
 }
@@ -1185,7 +1199,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
        if (!IS_ERR(ioctx)) {
                ret = put_user(ioctx->user_id, ctxp);
                if (ret)
-                        kill_ioctx(current->mm, ioctx);
+                        kill_ioctx(current->mm, ioctx, NULL);
                percpu_ref_put(&ioctx->users);
        }
@@ -1203,8 +1217,22 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
 {
        struct kioctx *ioctx = lookup_ioctx(ctx);
        if (likely(NULL != ioctx)) {
-                kill_ioctx(current->mm, ioctx);
+                struct completion requests_done =
+                        COMPLETION_INITIALIZER_ONSTACK(requests_done);
+                /* Pass requests_done to kill_ioctx() where it can be set
+                 * in a thread-safe way. If we try to set it here then we have
+                 * a race condition if two io_destroy() called simultaneously.
+                 */
+                kill_ioctx(current->mm, ioctx, &requests_done);
                percpu_ref_put(&ioctx->users);
+                /* Wait until all IO for the context are done. Otherwise kernel
+                 * keep using user-space buffers even if user thinks the context
+                 * is destroyed.
+                 */
+                wait_for_completion(&requests_done);
                return 0;
        }
        pr_debug("EINVAL: io_destroy: invalid context id\n");
@@ -1299,10 +1327,8 @@ rw_common:
                                                &iovec, compat)
                        : aio_setup_single_vector(req, rw, buf, &nr_segs,
                                                  iovec);
-                if (ret)
+                if (!ret)
-                        return ret;
+                        ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
-                ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
                if (ret < 0) {
                        if (iovec != &inline_vec)
                                kfree(iovec);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4c48df572bd6..ba6b88528dc7 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2058,6 +2058,20 @@ struct btrfs_ioctl_defrag_range_args {
 #define btrfs_raw_test_opt(o, opt)      ((o) & BTRFS_MOUNT_##opt)
 #define btrfs_test_opt(root, opt)       ((root)->fs_info->mount_opt & \
                                         BTRFS_MOUNT_##opt)
+#define btrfs_set_and_info(root, opt, fmt, args...)                     \
+{                                                                       \
+        if (!btrfs_test_opt(root, opt))                                 \
+                btrfs_info(root->fs_info, fmt, ##args);                 \
+        btrfs_set_opt(root->fs_info->mount_opt, opt);                   \
+}
+#define btrfs_clear_and_info(root, opt, fmt, args...)                   \
+{                                                                       \
+        if (btrfs_test_opt(root, opt))                                  \
+                btrfs_info(root->fs_info, fmt, ##args);                 \
+        btrfs_clear_opt(root->fs_info->mount_opt, opt);                 \
+}
 /*
 * Inode flags
 */
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 029d46c2e170..983314932af3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2861,7 +2861,7 @@ retry_root_backup:
                        printk(KERN_ERR "BTRFS: failed to read log tree\n");
                        free_extent_buffer(log_tree_root->node);
                        kfree(log_tree_root);
-                        goto fail_trans_kthread;
+                        goto fail_qgroup;
                }
                /* returns with log_tree_root freed on success */
                ret = btrfs_recover_log_trees(log_tree_root);
@@ -2870,24 +2870,24 @@ retry_root_backup:
                                    "Failed to recover log tree");
                        free_extent_buffer(log_tree_root->node);
                        kfree(log_tree_root);
-                        goto fail_trans_kthread;
+                        goto fail_qgroup;
                }
                if (sb->s_flags & MS_RDONLY) {
                        ret = btrfs_commit_super(tree_root);
                        if (ret)
-                                goto fail_trans_kthread;
+                                goto fail_qgroup;
                }
        }
        ret = btrfs_find_orphan_roots(tree_root);
        if (ret)
-                goto fail_trans_kthread;
+                goto fail_qgroup;
        if (!(sb->s_flags & MS_RDONLY)) {
                ret = btrfs_cleanup_fs_roots(fs_info);
                if (ret)
-                        goto fail_trans_kthread;
+                        goto fail_qgroup;
                ret = btrfs_recover_relocation(tree_root);
                if (ret < 0) {
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 1306487c82cf..5590af92094b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1542,6 +1542,7 @@ again:
                                ret = 0;
                }
                if (ret) {
+                        key.objectid = bytenr;
                        key.type = BTRFS_EXTENT_ITEM_KEY;
                        key.offset = num_bytes;
                        btrfs_release_path(path);
@@ -3542,11 +3543,13 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
        return extended_to_chunk(flags | tmp);
 }
-static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
+static u64 get_alloc_profile(struct btrfs_root *root, u64 orig_flags)
 {
        unsigned seq;
+        u64 flags;
        do {
+                flags = orig_flags;
                seq = read_seqbegin(&root->fs_info->profiles_lock);
                if (flags & BTRFS_BLOCK_GROUP_DATA)
@@ -5719,6 +5722,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        if (ret > 0 && skinny_metadata) {
                                skinny_metadata = false;
+                                key.objectid = bytenr;
                                key.type = BTRFS_EXTENT_ITEM_KEY;
                                key.offset = num_bytes;
                                btrfs_release_path(path);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index eb742c07e7a4..ae6af072b635 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -800,7 +800,7 @@ next_slot:
                if (start > key.offset && end < extent_end) {
                        BUG_ON(del_nr > 0);
                        if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
-                                ret = -EINVAL;
+                                ret = -EOPNOTSUPP;
                                break;
                        }
@@ -846,7 +846,7 @@ next_slot:
                 */
                if (start <= key.offset && end < extent_end) {
                        if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
-                                ret = -EINVAL;
+                                ret = -EOPNOTSUPP;
                                break;
                        }
@@ -872,7 +872,7 @@ next_slot:
                if (start > key.offset && end >= extent_end) {
                        BUG_ON(del_nr > 0);
                        if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
-                                ret = -EINVAL;
+                                ret = -EOPNOTSUPP;
                                break;
                        }
@@ -1777,7 +1777,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        start_pos = round_down(pos, root->sectorsize);
        if (start_pos > i_size_read(inode)) {
                /* Expand hole size to cover write data, preventing empty gap */
-                end_pos = round_up(pos + iov->iov_len, root->sectorsize);
+                end_pos = round_up(pos + count, root->sectorsize);
                err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
                if (err) {
                        mutex_unlock(&inode->i_mutex);
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index cc8ca193d830..86935f5ae291 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -176,7 +176,11 @@ static void start_caching(struct btrfs_root *root)
        tsk = kthread_run(caching_kthread, root, "btrfs-ino-cache-%llu\n",
                          root->root_key.objectid);
-        BUG_ON(IS_ERR(tsk)); /* -ENOMEM */
+        if (IS_ERR(tsk)) {
+                btrfs_warn(root->fs_info, "failed to start inode caching task");
+                btrfs_clear_and_info(root, CHANGE_INODE_CACHE,
+                                "disabling inode map caching");
+        }
 }
 int btrfs_find_free_ino(struct btrfs_root *root, u64 *objectid)
@@ -205,24 +209,14 @@ again:
 void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
 {
-        struct btrfs_free_space_ctl *ctl = root->free_ino_ctl;
        struct btrfs_free_space_ctl *pinned = root->free_ino_pinned;
        if (!btrfs_test_opt(root, INODE_MAP_CACHE))
                return;
 again:
        if (root->cached == BTRFS_CACHE_FINISHED) {
-                __btrfs_add_free_space(ctl, objectid, 1);
+                __btrfs_add_free_space(pinned, objectid, 1);
        } else {
-                /*
-                 * If we are in the process of caching free ino chunks,
-                 * to avoid adding the same inode number to the free_ino
-                 * tree twice due to cross transaction, we'll leave it
-                 * in the pinned tree until a transaction is committed
-                 * or the caching work is done.
-                 */
                down_write(&root->fs_info->commit_root_sem);
                spin_lock(&root->cache_lock);
                if (root->cached == BTRFS_CACHE_FINISHED) {
@@ -234,11 +228,7 @@ again:
                start_caching(root);
-                if (objectid <= root->cache_progress ||
+                __btrfs_add_free_space(pinned, objectid, 1);
-                    objectid >= root->highest_objectid)
-                        __btrfs_add_free_space(ctl, objectid, 1);
-                else
-                        __btrfs_add_free_space(pinned, objectid, 1);
                up_write(&root->fs_info->commit_root_sem);
        }
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e79ff6b90cb7..2ad7de94efef 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3066,7 +3066,7 @@ process_slot:
                                                         new_key.offset + datal,
                                                         1);
                                if (ret) {
-                                        if (ret != -EINVAL)
+                                        if (ret != -EOPNOTSUPP)
                                                btrfs_abort_transaction(trans,
                                                                root, ret);
                                        btrfs_end_transaction(trans, root);
@@ -3141,7 +3141,7 @@ process_slot:
                                                         new_key.offset + datal,
                                                         1);
                                if (ret) {
-                                        if (ret != -EINVAL)
+                                        if (ret != -EOPNOTSUPP)
                                                btrfs_abort_transaction(trans,
                                                        root, ret);
                                        btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 1ac3ca98c429..eb6537a08c1b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -349,6 +349,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
        if (p->buf_len >= len)
                return 0;
+        if (len > PATH_MAX) {
+                WARN_ON(1);
+                return -ENOMEM;
+        }
        path_len = p->end - p->start;
        old_buf_len = p->buf_len;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 5011aadacab8..9601d25a4607 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -385,20 +385,6 @@ static match_table_t tokens = {
        {Opt_err, NULL},
 };
-#define btrfs_set_and_info(root, opt, fmt, args...)                     \
-{                                                                       \
-        if (!btrfs_test_opt(root, opt))                                 \
-                btrfs_info(root->fs_info, fmt, ##args);                 \
-        btrfs_set_opt(root->fs_info->mount_opt, opt);                   \
-}
-#define btrfs_clear_and_info(root, opt, fmt, args...)                   \
-{                                                                       \
-        if (btrfs_test_opt(root, opt))                                  \
-                btrfs_info(root->fs_info, fmt, ##args);                 \
-        btrfs_clear_opt(root->fs_info->mount_opt, opt);                 \
-}
 /*
 * Regular mount options parser.  Everything that is needed only when
 * reading in a new superblock is parsed here.
@@ -1186,7 +1172,6 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
                return ERR_PTR(-ENOMEM);
        mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
                             newargs);
-        kfree(newargs);
        if (PTR_RET(mnt) == -EBUSY) {
                if (flags & MS_RDONLY) {
@@ -1196,17 +1181,22 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
                        int r;
                        mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name,
                                             newargs);
-                        if (IS_ERR(mnt))
+                        if (IS_ERR(mnt)) {
+                                kfree(newargs);
                                return ERR_CAST(mnt);
+                        }
                        r = btrfs_remount(mnt->mnt_sb, &flags, NULL);
                        if (r < 0) {
                                /* FIXME: release vfsmount mnt ??*/
+                                kfree(newargs);
                                return ERR_PTR(r);
                        }
                }
        }
+        kfree(newargs);
        if (IS_ERR(mnt))
                return ERR_CAST(mnt);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 39da1c2efa50..88a6df4cbe6d 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1221,9 +1221,6 @@ static long ceph_fallocate(struct file *file, int mode,
        if (!S_ISREG(inode->i_mode))
                return -EOPNOTSUPP;
-        if (IS_SWAPFILE(inode))
-                return -ETXTBSY;
        mutex_lock(&inode->i_mutex);
        if (ceph_snap(inode) != CEPH_NOSNAP) {
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index df9c9141c099..5be1f997ecde 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -253,6 +253,11 @@ cifs_alloc_inode(struct super_block *sb)
        cifs_set_oplock_level(cifs_inode, 0);
        cifs_inode->delete_pending = false;
        cifs_inode->invalid_mapping = false;
+        clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cifs_inode->flags);
+        clear_bit(CIFS_INODE_PENDING_WRITERS, &cifs_inode->flags);
+        clear_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cifs_inode->flags);
+        spin_lock_init(&cifs_inode->writers_lock);
+        cifs_inode->writers = 0;
        cifs_inode->vfs_inode.i_blkbits = 14;  /* 2**14 = CIFS_MAX_MSGSIZE */
        cifs_inode->server_eof = 0;
        cifs_inode->uniqueid = 0;
@@ -732,19 +737,26 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
                                   unsigned long nr_segs, loff_t pos)
 {
        struct inode *inode = file_inode(iocb->ki_filp);
+        struct cifsInodeInfo *cinode = CIFS_I(inode);
        ssize_t written;
        int rc;
+        written = cifs_get_writer(cinode);
+        if (written)
+                return written;
        written = generic_file_aio_write(iocb, iov, nr_segs, pos);
        if (CIFS_CACHE_WRITE(CIFS_I(inode)))
-                return written;
+                goto out;
        rc = filemap_fdatawrite(inode->i_mapping);
        if (rc)
                cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n",
                         rc, inode);
+out:
+        cifs_put_writer(cinode);
        return written;
 }
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index c0f3718b77a8..30f6e9251a4a 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -228,6 +228,8 @@ struct smb_version_operations {
        /* verify the message */
        int (*check_message)(char *, unsigned int);
        bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
+        void (*downgrade_oplock)(struct TCP_Server_Info *,
+                                        struct cifsInodeInfo *, bool);
        /* process transaction2 response */
        bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *,
                             char *, int);
@@ -1113,6 +1115,12 @@ struct cifsInodeInfo {
        unsigned int epoch;             /* used to track lease state changes */
        bool delete_pending;            /* DELETE_ON_CLOSE is set */
        bool invalid_mapping;           /* pagecache is invalid */
+        unsigned long flags;
+#define CIFS_INODE_PENDING_OPLOCK_BREAK   (0) /* oplock break in progress */
+#define CIFS_INODE_PENDING_WRITERS        (1) /* Writes in progress */
+#define CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2 (2) /* Downgrade oplock to L2 */
+        spinlock_t writers_lock;
+        unsigned int writers;           /* Number of writers on this inode */
        unsigned long time;             /* jiffies of last update of inode */
        u64  server_eof;                /* current file size on server -- protected by i_lock */
        u64  uniqueid;                  /* server inode number */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index acc4ee8ed075..ca7980a1e303 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -127,6 +127,9 @@ extern u64 cifs_UnixTimeToNT(struct timespec);
 extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
                                      int offset);
 extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
+extern int cifs_get_writer(struct cifsInodeInfo *cinode);
+extern void cifs_put_writer(struct cifsInodeInfo *cinode);
+extern void cifs_done_oplock_break(struct cifsInodeInfo *cinode);
 extern int cifs_unlock_range(struct cifsFileInfo *cfile,
                             struct file_lock *flock, const unsigned int xid);
 extern int cifs_push_mandatory_locks(struct cifsFileInfo *cfile);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index f3264bd7a83d..6ce4e0954b98 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -6197,6 +6197,9 @@ QAllEAsRetry:
        cifs_dbg(FYI, "ea length %d\n", list_len);
        if (list_len <= 8) {
                cifs_dbg(FYI, "empty EA list returned from server\n");
+                /* didn't find the named attribute */
+                if (ea_name)
+                        rc = -ENODATA;
                goto QAllEAsOut;
        }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8add25538a3b..5ed03e0b8b40 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2599,7 +2599,7 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
                        ssize_t err;
                        err = generic_write_sync(file, iocb->ki_pos - rc, rc);
-                        if (rc < 0)
+                        if (err < 0)
                                rc = err;
                }
        } else {
@@ -2621,12 +2621,20 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
        ssize_t written;
+        written = cifs_get_writer(cinode);
+        if (written)
+                return written;
        if (CIFS_CACHE_WRITE(cinode)) {
                if (cap_unix(tcon->ses) &&
                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
-                    && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+                  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
-                        return generic_file_aio_write(iocb, iov, nr_segs, pos);
+                        written = generic_file_aio_write(
-                return cifs_writev(iocb, iov, nr_segs, pos);
+                                        iocb, iov, nr_segs, pos);
+                        goto out;
+                }
+                written = cifs_writev(iocb, iov, nr_segs, pos);
+                goto out;
        }
        /*
         * For non-oplocked files in strict cache mode we need to write the data
@@ -2646,6 +2654,8 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
                         inode);
                cinode->oplock = 0;
        }
+out:
+        cifs_put_writer(cinode);
        return written;
 }
@@ -2872,7 +2882,7 @@ ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
                                            cifs_uncached_readv_complete);
                if (!rdata) {
                        rc = -ENOMEM;
-                        goto error;
+                        break;
                }
                rc = cifs_read_allocate_pages(rdata, npages);
@@ -3621,6 +3631,13 @@ static int cifs_launder_page(struct page *page)
        return rc;
 }
+static int
+cifs_pending_writers_wait(void *unused)
+{
+        schedule();
+        return 0;
+}
 void cifs_oplock_break(struct work_struct *work)
 {
        struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
@@ -3628,8 +3645,15 @@ void cifs_oplock_break(struct work_struct *work)
        struct inode *inode = cfile->dentry->d_inode;
        struct cifsInodeInfo *cinode = CIFS_I(inode);
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+        struct TCP_Server_Info *server = tcon->ses->server;
        int rc = 0;
+        wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
+                        cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE);
+        server->ops->downgrade_oplock(server, cinode,
+                test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
        if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
                                                cifs_has_mand_locks(cinode)) {
                cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
@@ -3666,6 +3690,7 @@ void cifs_oplock_break(struct work_struct *work)
                                                             cinode);
                cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
        }
+        cifs_done_oplock_break(cinode);
 }
 /*
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 2f9f3790679d..3b0c62e622da 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -466,8 +466,22 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
                                cifs_dbg(FYI, "file id match, oplock break\n");
                                pCifsInode = CIFS_I(netfile->dentry->d_inode);
-                                cifs_set_oplock_level(pCifsInode,
+                                set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK,
-                                        pSMB->OplockLevel ? OPLOCK_READ : 0);
+                                        &pCifsInode->flags);
+                                /*
+                                 * Set flag if the server downgrades the oplock
+                                 * to L2 else clear.
+                                 */
+                                if (pSMB->OplockLevel)
+                                        set_bit(
+                                           CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
+                                           &pCifsInode->flags);
+                                else
+                                        clear_bit(
+                                           CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
+                                           &pCifsInode->flags);
                                queue_work(cifsiod_wq,
                                           &netfile->oplock_break);
                                netfile->oplock_break_cancelled = false;
@@ -551,6 +565,62 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
                cinode->oplock = 0;
 }
+static int
+cifs_oplock_break_wait(void *unused)
+{
+        schedule();
+        return signal_pending(current) ? -ERESTARTSYS : 0;
+}
+/*
+ * We wait for oplock breaks to be processed before we attempt to perform
+ * writes.
+ */
+int cifs_get_writer(struct cifsInodeInfo *cinode)
+{
+        int rc;
+start:
+        rc = wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK,
+                                   cifs_oplock_break_wait, TASK_KILLABLE);
+        if (rc)
+                return rc;
+        spin_lock(&cinode->writers_lock);
+        if (!cinode->writers)
+                set_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
+        cinode->writers++;
+        /* Check to see if we have started servicing an oplock break */
+        if (test_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags)) {
+                cinode->writers--;
+                if (cinode->writers == 0) {
+                        clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
+                        wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS);
+                }
+                spin_unlock(&cinode->writers_lock);
+                goto start;
+        }
+        spin_unlock(&cinode->writers_lock);
+        return 0;
+}
+void cifs_put_writer(struct cifsInodeInfo *cinode)
+{
+        spin_lock(&cinode->writers_lock);
+        cinode->writers--;
+        if (cinode->writers == 0) {
+                clear_bit(CIFS_INODE_PENDING_WRITERS, &cinode->flags);
+                wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS);
+        }
+        spin_unlock(&cinode->writers_lock);
+}
+void cifs_done_oplock_break(struct cifsInodeInfo *cinode)
+{
+        clear_bit(CIFS_INODE_PENDING_OPLOCK_BREAK, &cinode->flags);
+        wake_up_bit(&cinode->flags, CIFS_INODE_PENDING_OPLOCK_BREAK);
+}
 bool
 backup_cred(struct cifs_sb_info *cifs_sb)
 {
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 526fb89f9230..d1fdfa848703 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -372,6 +372,16 @@ coalesce_t2(char *second_buf, struct smb_hdr *target_hdr)
        return 0;
 }
+static void
+cifs_downgrade_oplock(struct TCP_Server_Info *server,
+                        struct cifsInodeInfo *cinode, bool set_level2)
+{
+        if (set_level2)
+                cifs_set_oplock_level(cinode, OPLOCK_READ);
+        else
+                cifs_set_oplock_level(cinode, 0);
+}
 static bool
 cifs_check_trans2(struct mid_q_entry *mid, struct TCP_Server_Info *server,
                  char *buf, int malformed)
@@ -1019,6 +1029,7 @@ struct smb_version_operations smb1_operations = {
        .clear_stats = cifs_clear_stats,
        .print_stats = cifs_print_stats,
        .is_oplock_break = is_valid_oplock_break,
+        .downgrade_oplock = cifs_downgrade_oplock,
        .check_trans2 = cifs_check_trans2,
        .need_neg = cifs_need_neg,
        .negotiate = cifs_negotiate,
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index fb3966265b6e..b8021fde987d 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -575,9 +575,21 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
                                else
                                        cfile->oplock_break_cancelled = false;
-                                server->ops->set_oplock_level(cinode,
+                                set_bit(CIFS_INODE_PENDING_OPLOCK_BREAK,
-                                  rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0,
+                                        &cinode->flags);
-                                  0, NULL);
+                                /*
+                                 * Set flag if the server downgrades the oplock
+                                 * to L2 else clear.
+                                 */
+                                if (rsp->OplockLevel)
+                                        set_bit(
+                                           CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
+                                           &cinode->flags);
+                                else
+                                        clear_bit(
+                                           CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
+                                           &cinode->flags);
                                queue_work(cifsiod_wq, &cfile->oplock_break);
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 192f51a12cf1..35ddc3ed119d 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -905,6 +905,17 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
 }
 static void
+smb2_downgrade_oplock(struct TCP_Server_Info *server,
+                        struct cifsInodeInfo *cinode, bool set_level2)
+{
+        if (set_level2)
+                server->ops->set_oplock_level(cinode, SMB2_OPLOCK_LEVEL_II,
+                                                0, NULL);
+        else
+                server->ops->set_oplock_level(cinode, 0, 0, NULL);
+}
+static void
 smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
                      unsigned int epoch, bool *purge_cache)
 {
@@ -1110,6 +1121,7 @@ struct smb_version_operations smb20_operations = {
        .clear_stats = smb2_clear_stats,
        .print_stats = smb2_print_stats,
        .is_oplock_break = smb2_is_valid_oplock_break,
+        .downgrade_oplock = smb2_downgrade_oplock,
        .need_neg = smb2_need_neg,
        .negotiate = smb2_negotiate,
        .negotiate_wsize = smb2_negotiate_wsize,
@@ -1184,6 +1196,7 @@ struct smb_version_operations smb21_operations = {
        .clear_stats = smb2_clear_stats,
        .print_stats = smb2_print_stats,
        .is_oplock_break = smb2_is_valid_oplock_break,
+        .downgrade_oplock = smb2_downgrade_oplock,
        .need_neg = smb2_need_neg,
        .negotiate = smb2_negotiate,
        .negotiate_wsize = smb2_negotiate_wsize,
@@ -1259,6 +1272,7 @@ struct smb_version_operations smb30_operations = {
        .print_stats = smb2_print_stats,
        .dump_share_caps = smb2_dump_share_caps,
        .is_oplock_break = smb2_is_valid_oplock_break,
+        .downgrade_oplock = smb2_downgrade_oplock,
        .need_neg = smb2_need_neg,
        .negotiate = smb2_negotiate,
        .negotiate_wsize = smb2_negotiate_wsize,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 860344701067..3802f8c94acc 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1352,7 +1352,6 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
                     u64 persistent_fid, u64 volatile_fid)
 {
        int rc;
-        char *res_key = NULL;
        struct  compress_ioctl fsctl_input;
        char *ret_data = NULL;
@@ -1365,7 +1364,6 @@ SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
                        2 /* in data len */, &ret_data /* out data */, NULL);
        cifs_dbg(FYI, "set compression rc %d\n", rc);
-        kfree(res_key);
        return rc;
 }
diff --git a/fs/compat.c b/fs/compat.c
index ca926ad0430c..66d3d3c6b4b2 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -457,9 +457,9 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
        case F_GETLK64:
        case F_SETLK64:
        case F_SETLKW64:
-        case F_GETLKP:
+        case F_OFD_GETLK:
-        case F_SETLKP:
+        case F_OFD_SETLK:
-        case F_SETLKPW:
+        case F_OFD_SETLKW:
                ret = get_compat_flock64(&f, compat_ptr(arg));
                if (ret != 0)
                        break;
@@ -468,7 +468,7 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
                conv_cmd = convert_fcntl_cmd(cmd);
                ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f);
                set_fs(old_fs);
-                if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) {
+                if ((conv_cmd == F_GETLK || conv_cmd == F_OFD_GETLK) && ret == 0) {
                        /* need to return lock information - see above for commentary */
                        if (f.l_start > COMPAT_LOFF_T_MAX)
                                ret = -EOVERFLOW;
@@ -493,9 +493,9 @@ COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
        case F_GETLK64:
        case F_SETLK64:
        case F_SETLKW64:
-        case F_GETLKP:
+        case F_OFD_GETLK:
-        case F_SETLKP:
+        case F_OFD_SETLK:
-        case F_SETLKPW:
+        case F_OFD_SETLKW:
                return -EINVAL;
        }
        return compat_sys_fcntl64(fd, cmd, arg);
diff --git a/fs/coredump.c b/fs/coredump.c
index e3ad709a4232..0b2528fb640e 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -73,10 +73,15 @@ static int expand_corename(struct core_name *cn, int size)
 static int cn_vprintf(struct core_name *cn, const char *fmt, va_list arg)
 {
        int free, need;
+        va_list arg_copy;
 again:
        free = cn->size - cn->used;
-        need = vsnprintf(cn->corename + cn->used, free, fmt, arg);
+        va_copy(arg_copy, arg);
+        need = vsnprintf(cn->corename + cn->used, free, fmt, arg_copy);
+        va_end(arg_copy);
        if (need < free) {
                cn->used += need;
                return 0;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6ea7b1436bbc..5c56785007e0 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -667,7 +667,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
                        continue;
                x = ext4_count_free(bitmap_bh->b_data,
-                                    EXT4_BLOCKS_PER_GROUP(sb) / 8);
+                                    EXT4_CLUSTERS_PER_GROUP(sb) / 8);
                printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
                        i, ext4_free_group_clusters(sb, gdp), x);
                bitmap_count += x;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f1c65dc7cc0a..66946aa62127 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2466,23 +2466,6 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
        up_write(&EXT4_I(inode)->i_data_sem);
 }
-/*
- * Update i_disksize after writeback has been started. Races with truncate
- * are avoided by checking i_size under i_data_sem.
- */
-static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
-{
-        loff_t i_size;
-        down_write(&EXT4_I(inode)->i_data_sem);
-        i_size = i_size_read(inode);
-        if (newsize > i_size)
-                newsize = i_size;
-        if (newsize > EXT4_I(inode)->i_disksize)
-                EXT4_I(inode)->i_disksize = newsize;
-        up_write(&EXT4_I(inode)->i_data_sem);
-}
 struct ext4_group_info {
        unsigned long   bb_state;
        struct rb_root  bb_free_root;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 82df3ce9874a..01b0c208f625 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3313,6 +3313,11 @@ static int ext4_split_extent(handle_t *handle,
                return PTR_ERR(path);
        depth = ext_depth(inode);
        ex = path[depth].p_ext;
+        if (!ex) {
+                EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+                                 (unsigned long) map->m_lblk);
+                return -EIO;
+        }
        uninitialized = ext4_ext_is_uninitialized(ex);
        split_flag1 = 0;
@@ -3694,6 +3699,12 @@ static int ext4_convert_initialized_extents(handle_t *handle,
                }
                depth = ext_depth(inode);
                ex = path[depth].p_ext;
+                if (!ex) {
+                        EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+                                         (unsigned long) map->m_lblk);
+                        err = -EIO;
+                        goto out;
+                }
        }
        err = ext4_ext_get_access(handle, inode, path + depth);
@@ -4730,6 +4741,9 @@ static long ext4_zero_range(struct file *file, loff_t offset,
        trace_ext4_zero_range(inode, offset, len, mode);
+        if (!S_ISREG(inode->i_mode))
+                return -EINVAL;
        /*
         * Write out all dirty pages to avoid race conditions
         * Then release them.
@@ -4878,9 +4892,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (mode & FALLOC_FL_PUNCH_HOLE)
                return ext4_punch_hole(inode, offset, len);
-        if (mode & FALLOC_FL_COLLAPSE_RANGE)
-                return ext4_collapse_range(inode, offset, len);
        ret = ext4_convert_inline_data(inode);
        if (ret)
                return ret;
@@ -4892,6 +4903,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return -EOPNOTSUPP;
+        if (mode & FALLOC_FL_COLLAPSE_RANGE)
+                return ext4_collapse_range(inode, offset, len);
        if (mode & FALLOC_FL_ZERO_RANGE)
                return ext4_zero_range(file, offset, len, mode);
@@ -5229,18 +5243,19 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
                        if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
                                update = 1;
-                        *start = ex_last->ee_block +
+                        *start = le32_to_cpu(ex_last->ee_block) +
                                ext4_ext_get_actual_len(ex_last);
                        while (ex_start <= ex_last) {
-                                ex_start->ee_block -= shift;
+                                le32_add_cpu(&ex_start->ee_block, -shift);
-                                if (ex_start >
+                                /* Try to merge to the left. */
-                                        EXT_FIRST_EXTENT(path[depth].p_hdr)) {
+                                if ((ex_start >
-                                        if (ext4_ext_try_to_merge_right(inode,
+                                     EXT_FIRST_EXTENT(path[depth].p_hdr)) &&
-                                                path, ex_start - 1))
+                                    ext4_ext_try_to_merge_right(inode,
-                                                ex_last--;
+                                                        path, ex_start - 1))
-                                }
+                                        ex_last--;
-                                ex_start++;
+                                else
+                                        ex_start++;
                        }
                        err = ext4_ext_dirty(handle, inode, path + depth);
                        if (err)
@@ -5255,7 +5270,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
                if (err)
                        goto out;
-                path[depth].p_idx->ei_block -= shift;
+                le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
                err = ext4_ext_dirty(handle, inode, path + depth);
                if (err)
                        goto out;
@@ -5300,7 +5315,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
                return ret;
        }
-        stop_block = extent->ee_block + ext4_ext_get_actual_len(extent);
+        stop_block = le32_to_cpu(extent->ee_block) +
+                        ext4_ext_get_actual_len(extent);
        ext4_ext_drop_refs(path);
        kfree(path);
@@ -5313,10 +5329,18 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
         * enough to accomodate the shift.
         */
        path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
+        if (IS_ERR(path))
+                return PTR_ERR(path);
        depth = path->p_depth;
        extent =  path[depth].p_ext;
-        ex_start = extent->ee_block;
+        if (extent) {
-        ex_end = extent->ee_block + ext4_ext_get_actual_len(extent);
+                ex_start = le32_to_cpu(extent->ee_block);
+                ex_end = le32_to_cpu(extent->ee_block) +
+                        ext4_ext_get_actual_len(extent);
+        } else {
+                ex_start = 0;
+                ex_end = 0;
+        }
        ext4_ext_drop_refs(path);
        kfree(path);
@@ -5331,7 +5355,13 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
                        return PTR_ERR(path);
                depth = path->p_depth;
                extent = path[depth].p_ext;
-                current_block = extent->ee_block;
+                if (!extent) {
+                        EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
+                                         (unsigned long) start);
+                        return -EIO;
+                }
+                current_block = le32_to_cpu(extent->ee_block);
                if (start > current_block) {
                        /* Hole, move to the next extent */
                        ret = mext_next_extent(inode, path, &extent);
@@ -5365,17 +5395,18 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        ext4_lblk_t punch_start, punch_stop;
        handle_t *handle;
        unsigned int credits;
-        loff_t new_size;
+        loff_t new_size, ioffset;
        int ret;
-        BUG_ON(offset + len > i_size_read(inode));
        /* Collapse range works only on fs block size aligned offsets. */
        if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
            len & (EXT4_BLOCK_SIZE(sb) - 1))
                return -EINVAL;
        if (!S_ISREG(inode->i_mode))
+                return -EINVAL;
+        if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1)
                return -EOPNOTSUPP;
        trace_ext4_collapse_range(inode, offset, len);
@@ -5383,22 +5414,34 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
        punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
+        /* Call ext4_force_commit to flush all data in case of data=journal. */
+        if (ext4_should_journal_data(inode)) {
+                ret = ext4_force_commit(inode->i_sb);
+                if (ret)
+                        return ret;
+        }
+        /*
+         * Need to round down offset to be aligned with page size boundary
+         * for page size > block size.
+         */
+        ioffset = round_down(offset, PAGE_SIZE);
        /* Write out all dirty pages */
-        ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1);
+        ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+                                           LLONG_MAX);
        if (ret)
                return ret;
        /* Take mutex lock */
        mutex_lock(&inode->i_mutex);
-        /* It's not possible punch hole on append only file */
+        /*
-        if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
+         * There is no need to overlap collapse range with EOF, in which case
-                ret = -EPERM;
+         * it is effectively a truncate operation
-                goto out_mutex;
+         */
-        }
+        if (offset + len >= i_size_read(inode)) {
+                ret = -EINVAL;
-        if (IS_SWAPFILE(inode)) {
-                ret = -ETXTBSY;
                goto out_mutex;
        }
@@ -5408,7 +5451,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                goto out_mutex;
        }
-        truncate_pagecache_range(inode, offset, -1);
+        truncate_pagecache(inode, ioffset);
        /* Wait for existing dio to complete */
        ext4_inode_block_unlocked_dio(inode);
@@ -5425,7 +5468,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        ext4_discard_preallocations(inode);
        ret = ext4_es_remove_extent(inode, punch_start,
-                                    EXT_MAX_BLOCKS - punch_start - 1);
+                                    EXT_MAX_BLOCKS - punch_start);
        if (ret) {
                up_write(&EXT4_I(inode)->i_data_sem);
                goto out_stop;
@@ -5436,6 +5479,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                up_write(&EXT4_I(inode)->i_data_sem);
                goto out_stop;
        }
+        ext4_discard_preallocations(inode);
        ret = ext4_ext_shift_extents(inode, handle, punch_stop,
                                     punch_stop - punch_start);
@@ -5445,10 +5489,9 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
        }
        new_size = i_size_read(inode) - len;
-        truncate_setsize(inode, new_size);
+        i_size_write(inode, new_size);
        EXT4_I(inode)->i_disksize = new_size;
-        ext4_discard_preallocations(inode);
        up_write(&EXT4_I(inode)->i_data_sem);
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 0a014a7194b2..0ebc21204b51 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -810,7 +810,7 @@ retry:
                        newes.es_lblk = end + 1;
                        newes.es_len = len2;
-                        block = 0x7FDEADBEEF;
+                        block = 0x7FDEADBEEFULL;
                        if (ext4_es_is_written(&orig_es) ||
                            ext4_es_is_unwritten(&orig_es))
                                block = ext4_es_pblock(&orig_es) +
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ca7502d89fde..063fc1538355 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -82,7 +82,7 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
        size_t count = iov_length(iov, nr_segs);
        loff_t final_size = pos + count;
-        if (pos >= inode->i_size)
+        if (pos >= i_size_read(inode))
                return 0;
        if ((pos & blockmask) || (final_size & blockmask))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5b0d2c7d5408..d7b7462a0e13 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -522,6 +522,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
        if (unlikely(map->m_len > INT_MAX))
                map->m_len = INT_MAX;
+        /* We can handle the block number less than EXT_MAX_BLOCKS */
+        if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS))
+                return -EIO;
        /* Lookup extent status tree firstly */
        if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
                ext4_es_lru_add(inode);
@@ -2243,13 +2247,23 @@ static int mpage_map_and_submit_extent(handle_t *handle,
                        return err;
        } while (map->m_len);
-        /* Update on-disk size after IO is submitted */
+        /*
+         * Update on-disk size after IO is submitted.  Races with
+         * truncate are avoided by checking i_size under i_data_sem.
+         */
        disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
        if (disksize > EXT4_I(inode)->i_disksize) {
                int err2;
+                loff_t i_size;
-                ext4_wb_update_i_disksize(inode, disksize);
+                down_write(&EXT4_I(inode)->i_data_sem);
+                i_size = i_size_read(inode);
+                if (disksize > i_size)
+                        disksize = i_size;
+                if (disksize > EXT4_I(inode)->i_disksize)
+                        EXT4_I(inode)->i_disksize = disksize;
                err2 = ext4_mark_inode_dirty(handle, inode);
+                up_write(&EXT4_I(inode)->i_data_sem);
                if (err2)
                        ext4_error(inode->i_sb,
                                   "Failed to mark inode %lu dirty",
@@ -3527,15 +3541,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
        }
        mutex_lock(&inode->i_mutex);
-        /* It's not possible punch hole on append only file */
-        if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
-                ret = -EPERM;
-                goto out_mutex;
-        }
-        if (IS_SWAPFILE(inode)) {
-                ret = -ETXTBSY;
-                goto out_mutex;
-        }
        /* No need to punch hole beyond i_size */
        if (offset >= inode->i_size)
@@ -3616,7 +3621,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
                ret = ext4_free_hole_blocks(handle, inode, first_block,
                                            stop_block);
-        ext4_discard_preallocations(inode);
        up_write(&EXT4_I(inode)->i_data_sem);
        if (IS_SYNC(inode))
                ext4_handle_sync(handle);
@@ -4423,21 +4427,20 @@ out_brelse:
 *
 * We are called from a few places:
 *
- * - Within generic_file_write() for O_SYNC files.
+ * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
 *   Here, there will be no transaction running. We wait for any running
 *   transaction to commit.
 *
- * - Within sys_sync(), kupdate and such.
+ * - Within flush work (sys_sync(), kupdate and such).
- *   We wait on commit, if tol to.
+ *   We wait on commit, if told to.
 *
- * - Within prune_icache() (PF_MEMALLOC == true)
+ * - Within iput_final() -> write_inode_now()
- *   Here we simply return.  We can't afford to block kswapd on the
+ *   We wait on commit, if told to.
- *   journal commit.
 *
 * In all cases it is actually safe for us to return without doing anything,
 * because the inode has been copied into a raw inode buffer in
- * ext4_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
+ * ext4_mark_inode_dirty().  This is a correctness thing for WB_SYNC_ALL
- * knfsd.
+ * writeback.
 *
 * Note that we are absolutely dependent upon all inode dirtiers doing the
 * right thing: they *must* call mark_inode_dirty() after dirtying info in
@@ -4449,15 +4452,15 @@ out_brelse:
 *      stuff();
 *      inode->i_size = expr;
 *
- * is in error because a kswapd-driven write_inode() could occur while
+ * is in error because write_inode() could occur while `stuff()' is running,
- * `stuff()' is running, and the new i_size will be lost.  Plus the inode
+ * and the new i_size will be lost.  Plus the inode will no longer be on the
- * will no longer be on the superblock's dirty inode list.
+ * superblock's dirty inode list.
 */
 int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
        int err;
-        if (current->flags & PF_MEMALLOC)
+        if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
                return 0;
        if (EXT4_SB(inode->i_sb)->s_journal) {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a888cac76e9c..c8238a26818c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -989,7 +989,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
        poff = block % blocks_per_page;
        page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
        if (!page)
-                return -EIO;
+                return -ENOMEM;
        BUG_ON(page->mapping != inode->i_mapping);
        e4b->bd_bitmap_page = page;
        e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
@@ -1003,7 +1003,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
        pnum = block / blocks_per_page;
        page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
        if (!page)
-                return -EIO;
+                return -ENOMEM;
        BUG_ON(page->mapping != inode->i_mapping);
        e4b->bd_buddy_page = page;
        return 0;
@@ -1168,7 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                        unlock_page(page);
                }
        }
-        if (page == NULL || !PageUptodate(page)) {
+        if (page == NULL) {
+                ret = -ENOMEM;
+                goto err;
+        }
+        if (!PageUptodate(page)) {
                ret = -EIO;
                goto err;
        }
@@ -1197,7 +1201,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                        unlock_page(page);
                }
        }
-        if (page == NULL || !PageUptodate(page)) {
+        if (page == NULL) {
+                ret = -ENOMEM;
+                goto err;
+        }
+        if (!PageUptodate(page)) {
                ret = -EIO;
                goto err;
        }
@@ -5008,6 +5016,8 @@ error_return:
 */
 static int ext4_trim_extent(struct super_block *sb, int start, int count,
                             ext4_group_t group, struct ext4_buddy *e4b)
+__releases(bitlock)
+__acquires(bitlock)
 {
        struct ext4_free_extent ex;
        int ret = 0;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index ab95508e3d40..c18d95b50540 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -308,13 +308,14 @@ static void ext4_end_bio(struct bio *bio, int error)
        if (error) {
                struct inode *inode = io_end->inode;
-                ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
+                ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
                             "(offset %llu size %ld starting block %llu)",
-                             inode->i_ino,
+                             error, inode->i_ino,
                             (unsigned long long) io_end->offset,
                             (long) io_end->size,
                             (unsigned long long)
                             bi_sector >> (inode->i_blkbits - 9));
+                mapping_set_error(inode->i_mapping, error);
        }
        if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f3c667091618..6f9e6fadac04 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3869,19 +3869,38 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                        goto failed_mount2;
                }
        }
+        /*
+         * set up enough so that it can read an inode,
+         * and create new inode for buddy allocator
+         */
+        sbi->s_gdb_count = db_count;
+        if (!test_opt(sb, NOLOAD) &&
+            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
+                sb->s_op = &ext4_sops;
+        else
+                sb->s_op = &ext4_nojournal_sops;
+        ext4_ext_init(sb);
+        err = ext4_mb_init(sb);
+        if (err) {
+                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
+                         err);
+                goto failed_mount2;
+        }
        if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
                ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
-                goto failed_mount2;
+                goto failed_mount2a;
        }
        if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
                if (!ext4_fill_flex_info(sb)) {
                        ext4_msg(sb, KERN_ERR,
                               "unable to initialize "
                               "flex_bg meta info!");
-                        goto failed_mount2;
+                        goto failed_mount2a;
                }
-        sbi->s_gdb_count = db_count;
        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
        spin_lock_init(&sbi->s_next_gen_lock);
@@ -3916,14 +3935,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_stripe = ext4_get_stripe_size(sbi);
        sbi->s_extent_max_zeroout_kb = 32;
-        /*
-         * set up enough so that it can read an inode
-         */
-        if (!test_opt(sb, NOLOAD) &&
-            EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
-                sb->s_op = &ext4_sops;
-        else
-                sb->s_op = &ext4_nojournal_sops;
        sb->s_export_op = &ext4_export_ops;
        sb->s_xattr = ext4_xattr_handlers;
 #ifdef CONFIG_QUOTA
@@ -4113,21 +4124,13 @@ no_journal:
        if (err) {
                ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
                         "reserved pool", ext4_calculate_resv_clusters(sb));
-                goto failed_mount4a;
+                goto failed_mount5;
        }
        err = ext4_setup_system_zone(sb);
        if (err) {
                ext4_msg(sb, KERN_ERR, "failed to initialize system "
                         "zone (%d)", err);
-                goto failed_mount4a;
-        }
-        ext4_ext_init(sb);
-        err = ext4_mb_init(sb);
-        if (err) {
-                ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
-                         err);
                goto failed_mount5;
        }
@@ -4204,11 +4207,8 @@ failed_mount8:
 failed_mount7:
        ext4_unregister_li_request(sb);
 failed_mount6:
-        ext4_mb_release(sb);
-failed_mount5:
-        ext4_ext_release(sb);
        ext4_release_system_zone(sb);
-failed_mount4a:
+failed_mount5:
        dput(sb->s_root);
        sb->s_root = NULL;
 failed_mount4:
@@ -4232,11 +4232,14 @@ failed_mount3:
        percpu_counter_destroy(&sbi->s_extent_cache_cnt);
        if (sbi->s_mmp_tsk)
                kthread_stop(sbi->s_mmp_tsk);
+failed_mount2a:
+        ext4_mb_release(sb);
 failed_mount2:
        for (i = 0; i < db_count; i++)
                brelse(sbi->s_group_desc[i]);
        ext4_kvfree(sbi->s_group_desc);
 failed_mount:
+        ext4_ext_release(sb);
        if (sbi->s_chksum_driver)
                crypto_free_shash(sbi->s_chksum_driver);
        if (sbi->s_proc) {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1f5cf5880718..4eec399ec807 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -520,8 +520,8 @@ static void ext4_xattr_update_super_block(handle_t *handle,
 }
 /*
- * Release the xattr block BH: If the reference count is > 1, decrement
+ * Release the xattr block BH: If the reference count is > 1, decrement it;
- * it; otherwise free the block.
+ * otherwise free the block.
 */
 static void
 ext4_xattr_release_block(handle_t *handle, struct inode *inode,
@@ -542,16 +542,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
                if (ce)
                        mb_cache_entry_free(ce);
                get_bh(bh);
+                unlock_buffer(bh);
                ext4_free_blocks(handle, inode, bh, 0, 1,
                                 EXT4_FREE_BLOCKS_METADATA |
                                 EXT4_FREE_BLOCKS_FORGET);
-                unlock_buffer(bh);
        } else {
                le32_add_cpu(&BHDR(bh)->h_refcount, -1);
                if (ce)
                        mb_cache_entry_release(ce);
+                /*
+                 * Beware of this ugliness: Releasing of xattr block references
+                 * from different inodes can race and so we have to protect
+                 * from a race where someone else frees the block (and releases
+                 * its journal_head) before we are done dirtying the buffer. In
+                 * nojournal mode this race is harmless and we actually cannot
+                 * call ext4_handle_dirty_xattr_block() with locked buffer as
+                 * that function can call sync_dirty_buffer() so for that case
+                 * we handle the dirtying after unlocking the buffer.
+                 */
+                if (ext4_handle_valid(handle))
+                        error = ext4_handle_dirty_xattr_block(handle, inode,
+                                                              bh);
                unlock_buffer(bh);
-                error = ext4_handle_dirty_xattr_block(handle, inode, bh);
+                if (!ext4_handle_valid(handle))
+                        error = ext4_handle_dirty_xattr_block(handle, inode,
+                                                              bh);
                if (IS_SYNC(inode))
                        ext4_handle_sync(handle);
                dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 9ead1596399a..72c82f69b01b 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -274,15 +274,15 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
                break;
 #if BITS_PER_LONG != 32
        /* 32-bit arches must use fcntl64() */
-        case F_GETLKP:
+        case F_OFD_GETLK:
 #endif
        case F_GETLK:
                err = fcntl_getlk(filp, cmd, (struct flock __user *) arg);
                break;
 #if BITS_PER_LONG != 32
        /* 32-bit arches must use fcntl64() */
-        case F_SETLKP:
+        case F_OFD_SETLK:
-        case F_SETLKPW:
+        case F_OFD_SETLKW:
 #endif
                /* Fallthrough */
        case F_SETLK:
@@ -399,13 +399,13 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
        
        switch (cmd) {
        case F_GETLK64:
-        case F_GETLKP:
+        case F_OFD_GETLK:
                err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg);
                break;
        case F_SETLK64:
        case F_SETLKW64:
-        case F_SETLKP:
+        case F_OFD_SETLK:
-        case F_SETLKPW:
+        case F_OFD_SETLKW:
                err = fcntl_setlk64(fd, f.file, cmd,
                                (struct flock64 __user *) arg);
                break;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 78f3403300af..ac127cd008bf 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -232,9 +232,6 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
        struct rb_node **node = &kn->parent->dir.children.rb_node;
        struct rb_node *parent = NULL;
-        if (kernfs_type(kn) == KERNFS_DIR)
-                kn->parent->dir.subdirs++;
        while (*node) {
                struct kernfs_node *pos;
                int result;
@@ -249,9 +246,15 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
                else
                        return -EEXIST;
        }
        /* add new node and rebalance the tree */
        rb_link_node(&kn->rb, parent, node);
        rb_insert_color(&kn->rb, &kn->parent->dir.children);
+        /* successfully added, account subdir number */
+        if (kernfs_type(kn) == KERNFS_DIR)
+                kn->parent->dir.subdirs++;
        return 0;
 }
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 8034706a7af8..e01ea4a14a01 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -484,6 +484,8 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
        ops = kernfs_ops(of->kn);
        rc = ops->mmap(of, vma);
+        if (rc)
+                goto out_put;
        /*
         * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index abb0f1f53d93..985217626e66 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -48,14 +48,18 @@ void __init kernfs_inode_init(void)
 static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn)
 {
+        static DEFINE_MUTEX(iattr_mutex);
+        struct kernfs_iattrs *ret;
        struct iattr *iattrs;
+        mutex_lock(&iattr_mutex);
        if (kn->iattr)
-                return kn->iattr;
+                goto out_unlock;
        kn->iattr = kzalloc(sizeof(struct kernfs_iattrs), GFP_KERNEL);
        if (!kn->iattr)
-                return NULL;
+                goto out_unlock;
        iattrs = &kn->iattr->ia_iattr;
        /* assign default attributes */
@@ -65,8 +69,10 @@ static struct kernfs_iattrs *kernfs_iattrs(struct kernfs_node *kn)
        iattrs->ia_atime = iattrs->ia_mtime = iattrs->ia_ctime = CURRENT_TIME;
        simple_xattrs_init(&kn->iattr->xattrs);
+out_unlock:
-        return kn->iattr;
+        ret = kn->iattr;
+        mutex_unlock(&iattr_mutex);
+        return ret;
 }
 static int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
diff --git a/fs/locks.c b/fs/locks.c
index 13fc7a6d380a..e663aeac579e 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -135,7 +135,7 @@
 #define IS_POSIX(fl)    (fl->fl_flags & FL_POSIX)
 #define IS_FLOCK(fl)    (fl->fl_flags & FL_FLOCK)
 #define IS_LEASE(fl)    (fl->fl_flags & (FL_LEASE|FL_DELEG))
-#define IS_FILE_PVT(fl) (fl->fl_flags & FL_FILE_PVT)
+#define IS_OFDLCK(fl)   (fl->fl_flags & FL_OFDLCK)
 static bool lease_breaking(struct file_lock *fl)
 {
@@ -564,7 +564,7 @@ static void __locks_insert_block(struct file_lock *blocker,
        BUG_ON(!list_empty(&waiter->fl_block));
        waiter->fl_next = blocker;
        list_add_tail(&waiter->fl_block, &blocker->fl_block);
-        if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker))
+        if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
                locks_insert_global_blocked(waiter);
 }
@@ -759,12 +759,12 @@ EXPORT_SYMBOL(posix_test_lock);
 * of tasks (such as posix threads) sharing the same open file table.
 * To handle those cases, we just bail out after a few iterations.
 *
- * For FL_FILE_PVT locks, the owner is the filp, not the files_struct.
+ * For FL_OFDLCK locks, the owner is the filp, not the files_struct.
 * Because the owner is not even nominally tied to a thread of
 * execution, the deadlock detection below can't reasonably work well. Just
 * skip it for those.
 *
- * In principle, we could do a more limited deadlock detection on FL_FILE_PVT
+ * In principle, we could do a more limited deadlock detection on FL_OFDLCK
 * locks that just checks for the case where two tasks are attempting to
 * upgrade from read to write locks on the same inode.
 */
@@ -791,9 +791,9 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
        /*
         * This deadlock detector can't reasonably detect deadlocks with
-         * FL_FILE_PVT locks, since they aren't owned by a process, per-se.
+         * FL_OFDLCK locks, since they aren't owned by a process, per-se.
         */
-        if (IS_FILE_PVT(caller_fl))
+        if (IS_OFDLCK(caller_fl))
                return 0;
        while ((block_fl = what_owner_is_waiting_for(block_fl))) {
@@ -1391,11 +1391,10 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 restart:
        break_time = flock->fl_break_time;
-        if (break_time != 0) {
+        if (break_time != 0)
                break_time -= jiffies;
-                if (break_time == 0)
+        if (break_time == 0)
-                        break_time++;
+                break_time++;
-        }
        locks_insert_block(flock, new_fl);
        spin_unlock(&inode->i_lock);
        error = wait_event_interruptible_timeout(new_fl->fl_wait,
@@ -1891,7 +1890,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock);
 static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
 {
-        flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid;
+        flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
 #if BITS_PER_LONG == 32
        /*
         * Make sure we can represent the posix lock via
@@ -1913,7 +1912,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
 #if BITS_PER_LONG == 32
 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
 {
-        flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid;
+        flock->l_pid = IS_OFDLCK(fl) ? -1 : fl->fl_pid;
        flock->l_start = fl->fl_start;
        flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
                fl->fl_end - fl->fl_start + 1;
@@ -1942,13 +1941,13 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l)
        if (error)
                goto out;
-        if (cmd == F_GETLKP) {
+        if (cmd == F_OFD_GETLK) {
                error = -EINVAL;
                if (flock.l_pid != 0)
                        goto out;
                cmd = F_GETLK;
-                file_lock.fl_flags |= FL_FILE_PVT;
+                file_lock.fl_flags |= FL_OFDLCK;
                file_lock.fl_owner = (fl_owner_t)filp;
        }
@@ -2074,25 +2073,25 @@ again:
        /*
         * If the cmd is requesting file-private locks, then set the
-         * FL_FILE_PVT flag and override the owner.
+         * FL_OFDLCK flag and override the owner.
         */
        switch (cmd) {
-        case F_SETLKP:
+        case F_OFD_SETLK:
                error = -EINVAL;
                if (flock.l_pid != 0)
                        goto out;
                cmd = F_SETLK;
-                file_lock->fl_flags |= FL_FILE_PVT;
+                file_lock->fl_flags |= FL_OFDLCK;
                file_lock->fl_owner = (fl_owner_t)filp;
                break;
-        case F_SETLKPW:
+        case F_OFD_SETLKW:
                error = -EINVAL;
                if (flock.l_pid != 0)
                        goto out;
                cmd = F_SETLKW;
-                file_lock->fl_flags |= FL_FILE_PVT;
+                file_lock->fl_flags |= FL_OFDLCK;
                file_lock->fl_owner = (fl_owner_t)filp;
                /* Fallthrough */
        case F_SETLKW:
@@ -2144,13 +2143,13 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
        if (error)
                goto out;
-        if (cmd == F_GETLKP) {
+        if (cmd == F_OFD_GETLK) {
                error = -EINVAL;
                if (flock.l_pid != 0)
                        goto out;
                cmd = F_GETLK64;
-                file_lock.fl_flags |= FL_FILE_PVT;
+                file_lock.fl_flags |= FL_OFDLCK;
                file_lock.fl_owner = (fl_owner_t)filp;
        }
@@ -2209,25 +2208,25 @@ again:
        /*
         * If the cmd is requesting file-private locks, then set the
-         * FL_FILE_PVT flag and override the owner.
+         * FL_OFDLCK flag and override the owner.
         */
        switch (cmd) {
-        case F_SETLKP:
+        case F_OFD_SETLK:
                error = -EINVAL;
                if (flock.l_pid != 0)
                        goto out;
                cmd = F_SETLK64;
-                file_lock->fl_flags |= FL_FILE_PVT;
+                file_lock->fl_flags |= FL_OFDLCK;
                file_lock->fl_owner = (fl_owner_t)filp;
                break;
-        case F_SETLKPW:
+        case F_OFD_SETLKW:
                error = -EINVAL;
                if (flock.l_pid != 0)
                        goto out;
                cmd = F_SETLKW64;
-                file_lock->fl_flags |= FL_FILE_PVT;
+                file_lock->fl_flags |= FL_OFDLCK;
                file_lock->fl_owner = (fl_owner_t)filp;
                /* Fallthrough */
        case F_SETLKW64:
@@ -2413,8 +2412,8 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
        if (IS_POSIX(fl)) {
                if (fl->fl_flags & FL_ACCESS)
                        seq_printf(f, "ACCESS");
-                else if (IS_FILE_PVT(fl))
+                else if (IS_OFDLCK(fl))
-                        seq_printf(f, "FLPVT ");
+                        seq_printf(f, "OFDLCK");
                else
                        seq_printf(f, "POSIX ");
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 39c8ef875f91..2c73cae9899d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -654,9 +654,11 @@ static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args)
 static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
 {
+        int maxtime = max_cb_time(clp->net);
        struct rpc_timeout      timeparms = {
-                .to_initval     = max_cb_time(clp->net),
+                .to_initval     = maxtime,
                .to_retries     = 0,
+                .to_maxval      = maxtime,
        };
        struct rpc_create_args args = {
                .net            = clp->net,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2723c1badd01..18881f34737a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3627,14 +3627,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
        /* nfsd4_check_resp_size guarantees enough room for error status */
        if (!op->status)
                op->status = nfsd4_check_resp_size(resp, 0);
-        if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) {
-                struct nfsd4_slot *slot = resp->cstate.slot;
-                if (slot->sl_flags & NFSD4_SLOT_CACHETHIS)
-                        op->status = nfserr_rep_too_big_to_cache;
-                else
-                        op->status = nfserr_rep_too_big;
-        }
        if (so) {
                so->so_replay.rp_status = op->status;
                so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1);
diff --git a/fs/open.c b/fs/open.c
index 3d30eb1fc95e..9d64679cec73 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -254,17 +254,22 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                return -EBADF;
        /*
-         * It's not possible to punch hole or perform collapse range
+         * We can only allow pure fallocate on append only files
-         * on append only file
         */
-        if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)
+        if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
-            && IS_APPEND(inode))
                return -EPERM;
        if (IS_IMMUTABLE(inode))
                return -EPERM;
        /*
+         * We can not allow to do any fallocate operation on an active
+         * swapfile
+         */
+        if (IS_SWAPFILE(inode))
+                ret = -ETXTBSY;
+        /*
         * Revalidate the write permissions, in case security policy has
         * changed since the files were opened.
         */
@@ -286,14 +291,6 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
                return -EFBIG;
-        /*
-         * There is no need to overlap collapse range with EOF, in which case
-         * it is effectively a truncate operation
-         */
-        if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
-            (offset + len >= i_size_read(inode)))
-                return -EINVAL;
        if (!file->f_op->fallocate)
                return -EOPNOTSUPP;
diff --git a/fs/super.c b/fs/super.c
index e9dc3c3fe159..48377f7463c0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -800,7 +800,10 @@ void emergency_remount(void)
 static DEFINE_IDA(unnamed_dev_ida);
 static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
-static int unnamed_dev_start = 0; /* don't bother trying below it */
+/* Many userspace utilities consider an FSID of 0 invalid.
+ * Always return at least 1 from get_anon_bdev.
+ */
+static int unnamed_dev_start = 1;
 int get_anon_bdev(dev_t *p)
 {
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 1b8b91b67fdb..28cc1acd5439 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -453,95 +453,3 @@ void sysfs_remove_bin_file(struct kobject *kobj,
        kernfs_remove_by_name(kobj->sd, attr->attr.name);
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);
-struct sysfs_schedule_callback_struct {
-        struct list_head        workq_list;
-        struct kobject          *kobj;
-        void                    (*func)(void *);
-        void                    *data;
-        struct module           *owner;
-        struct work_struct      work;
-};
-static struct workqueue_struct *sysfs_workqueue;
-static DEFINE_MUTEX(sysfs_workq_mutex);
-static LIST_HEAD(sysfs_workq);
-static void sysfs_schedule_callback_work(struct work_struct *work)
-{
-        struct sysfs_schedule_callback_struct *ss = container_of(work,
-                        struct sysfs_schedule_callback_struct, work);
-        (ss->func)(ss->data);
-        kobject_put(ss->kobj);
-        module_put(ss->owner);
-        mutex_lock(&sysfs_workq_mutex);
-        list_del(&ss->workq_list);
-        mutex_unlock(&sysfs_workq_mutex);
-        kfree(ss);
-}
-/**
- * sysfs_schedule_callback - helper to schedule a callback for a kobject
- * @kobj: object we're acting for.
- * @func: callback function to invoke later.
- * @data: argument to pass to @func.
- * @owner: module owning the callback code
- *
- * sysfs attribute methods must not unregister themselves or their parent
- * kobject (which would amount to the same thing).  Attempts to do so will
- * deadlock, since unregistration is mutually exclusive with driver
- * callbacks.
- *
- * Instead methods can call this routine, which will attempt to allocate
- * and schedule a workqueue request to call back @func with @data as its
- * argument in the workqueue's process context.  @kobj will be pinned
- * until @func returns.
- *
- * Returns 0 if the request was submitted, -ENOMEM if storage could not
- * be allocated, -ENODEV if a reference to @owner isn't available,
- * -EAGAIN if a callback has already been scheduled for @kobj.
- */
-int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
-                void *data, struct module *owner)
-{
-        struct sysfs_schedule_callback_struct *ss, *tmp;
-        if (!try_module_get(owner))
-                return -ENODEV;
-        mutex_lock(&sysfs_workq_mutex);
-        list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list)
-                if (ss->kobj == kobj) {
-                        module_put(owner);
-                        mutex_unlock(&sysfs_workq_mutex);
-                        return -EAGAIN;
-                }
-        mutex_unlock(&sysfs_workq_mutex);
-        if (sysfs_workqueue == NULL) {
-                sysfs_workqueue = create_singlethread_workqueue("sysfsd");
-                if (sysfs_workqueue == NULL) {
-                        module_put(owner);
-                        return -ENOMEM;
-                }
-        }
-        ss = kmalloc(sizeof(*ss), GFP_KERNEL);
-        if (!ss) {
-                module_put(owner);
-                return -ENOMEM;
-        }
-        kobject_get(kobj);
-        ss->kobj = kobj;
-        ss->func = func;
-        ss->data = data;
-        ss->owner = owner;
-        INIT_WORK(&ss->work, sysfs_schedule_callback_work);
-        INIT_LIST_HEAD(&ss->workq_list);
-        mutex_lock(&sysfs_workq_mutex);
-        list_add_tail(&ss->workq_list, &sysfs_workq);
-        mutex_unlock(&sysfs_workq_mutex);
-        queue_work(sysfs_workqueue, &ss->work);
-        return 0;
-}
-EXPORT_SYMBOL_GPL(sysfs_schedule_callback);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 75df77d09f75..0479c32c5eb1 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1344,6 +1344,14 @@ __xfs_get_blocks(
        /*
         * If this is O_DIRECT or the mpage code calling tell them how large
         * the mapping is, so that we can avoid repeated get_blocks calls.
+         *
+         * If the mapping spans EOF, then we have to break the mapping up as the
+         * mapping for blocks beyond EOF must be marked new so that sub block
+         * regions can be correctly zeroed. We can't do this for mappings within
+         * EOF unless the mapping was just allocated or is unwritten, otherwise
+         * the callers would overwrite existing data with zeros. Hence we have
+         * to split the mapping into a range up to and including EOF, and a
+         * second mapping for beyond EOF.
         */
        if (direct || size > (1 << inode->i_blkbits)) {
                xfs_off_t               mapping_size;
@@ -1354,6 +1362,12 @@ __xfs_get_blocks(
                ASSERT(mapping_size > 0);
                if (mapping_size > size)
                        mapping_size = size;
+                if (offset < i_size_read(inode) &&
+                    offset + mapping_size >= i_size_read(inode)) {
+                        /* limit mapping to block that spans EOF */
+                        mapping_size = roundup_64(i_size_read(inode) - offset,
+                                                  1 << inode->i_blkbits);
+                }
                if (mapping_size > LONG_MAX)
                        mapping_size = LONG_MAX;
@@ -1566,6 +1580,16 @@ xfs_vm_write_failed(
                xfs_vm_kill_delalloc_range(inode, block_offset,
                                           block_offset + bh->b_size);
+                /*
+                 * This buffer does not contain data anymore. make sure anyone
+                 * who finds it knows that for certain.
+                 */
+                clear_buffer_delay(bh);
+                clear_buffer_uptodate(bh);
+                clear_buffer_mapped(bh);
+                clear_buffer_new(bh);
+                clear_buffer_dirty(bh);
        }
 }
@@ -1599,12 +1623,21 @@ xfs_vm_write_begin(
        status = __block_write_begin(page, pos, len, xfs_get_blocks);
        if (unlikely(status)) {
                struct inode    *inode = mapping->host;
+                size_t          isize = i_size_read(inode);
                xfs_vm_write_failed(inode, page, pos, len);
                unlock_page(page);
-                if (pos + len > i_size_read(inode))
+                /*
-                        truncate_pagecache(inode, i_size_read(inode));
+                 * If the write is beyond EOF, we only want to kill blocks
+                 * allocated in this write, not blocks that were previously
+                 * written successfully.
+                 */
+                if (pos + len > isize) {
+                        ssize_t start = max_t(ssize_t, pos, isize);
+                        truncate_pagecache_range(inode, start, pos + len);
+                }
                page_cache_release(page);
                page = NULL;
@@ -1615,9 +1648,12 @@ xfs_vm_write_begin(
 }
 /*
- * On failure, we only need to kill delalloc blocks beyond EOF because they
+ * On failure, we only need to kill delalloc blocks beyond EOF in the range of
- * will never be written. For blocks within EOF, generic_write_end() zeros them
+ * this specific write because they will never be written. Previous writes
- * so they are safe to leave alone and be written with all the other valid data.
+ * beyond EOF where block allocation succeeded do not need to be trashed, so
+ * only new blocks from this write should be trashed. For blocks within
+ * EOF, generic_write_end() zeros them so they are safe to leave alone and be
+ * written with all the other valid data.
 */
 STATIC int
 xfs_vm_write_end(
@@ -1640,8 +1676,11 @@ xfs_vm_write_end(
                loff_t          to = pos + len;
                if (to > isize) {
-                        truncate_pagecache(inode, isize);
+                        /* only kill blocks in this write beyond EOF */
+                        if (pos > isize)
+                                isize = pos;
                        xfs_vm_kill_delalloc_range(inode, isize, to);
+                        truncate_pagecache_range(inode, isize, to);
                }
        }
        return ret;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 5b6092ef51ef..f0efc7e970ef 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5413,6 +5413,7 @@ xfs_bmap_shift_extents(
        int                             whichfork = XFS_DATA_FORK;
        int                             logflags;
        xfs_filblks_t                   blockcount = 0;
+        int                             total_extents;
        if (unlikely(XFS_TEST_ERROR(
            (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
@@ -5429,7 +5430,6 @@ xfs_bmap_shift_extents(
        ASSERT(current_ext != NULL);
        ifp = XFS_IFORK_PTR(ip, whichfork);
        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
                /* Read in all the extents */
                error = xfs_iread_extents(tp, ip, whichfork);
@@ -5456,7 +5456,6 @@ xfs_bmap_shift_extents(
        /* We are going to change core inode */
        logflags = XFS_ILOG_CORE;
        if (ifp->if_flags & XFS_IFBROOT) {
                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
                cur->bc_private.b.firstblock = *firstblock;
@@ -5467,8 +5466,14 @@ xfs_bmap_shift_extents(
                logflags |= XFS_ILOG_DEXT;
        }
-        while (nexts++ < num_exts &&
+        /*
-               *current_ext <  XFS_IFORK_NEXTENTS(ip, whichfork)) {
+         * There may be delalloc extents in the data fork before the range we
+         * are collapsing out, so we cannot
+         * use the count of real extents here. Instead we have to calculate it
+         * from the incore fork.
+         */
+        total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+        while (nexts++ < num_exts && *current_ext < total_extents) {
                gotp = xfs_iext_get_ext(ifp, *current_ext);
                xfs_bmbt_get_all(gotp, &got);
@@ -5556,10 +5561,11 @@ xfs_bmap_shift_extents(
                }
                (*current_ext)++;
+                total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
        }
        /* Check if we are done */
-        if (*current_ext ==  XFS_IFORK_NEXTENTS(ip, whichfork))
+        if (*current_ext == total_extents)
                *done = 1;
 del_cursor:
@@ -5568,6 +5574,5 @@ del_cursor:
                        error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
        xfs_trans_log_inode(tp, ip, logflags);
        return error;
 }
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 01f6a646caa1..296160b8e78c 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1418,6 +1418,8 @@ xfs_zero_file_space(
        xfs_off_t               end_boundary;
        int                     error;
+        trace_xfs_zero_file_space(ip);
        granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
        /*
@@ -1432,9 +1434,18 @@ xfs_zero_file_space(
        ASSERT(end_boundary <= offset + len);
        if (start_boundary < end_boundary - 1) {
-                /* punch out the page cache over the conversion range */
+                /*
+                 * punch out delayed allocation blocks and the page cache over
+                 * the conversion range
+                 */
+                xfs_ilock(ip, XFS_ILOCK_EXCL);
+                error = xfs_bmap_punch_delalloc_range(ip,
+                                XFS_B_TO_FSBT(mp, start_boundary),
+                                XFS_B_TO_FSB(mp, end_boundary - start_boundary));
+                xfs_iunlock(ip, XFS_ILOCK_EXCL);
                truncate_pagecache_range(VFS_I(ip), start_boundary,
                                         end_boundary - 1);
                /* convert the blocks */
                error = xfs_alloc_file_space(ip, start_boundary,
                                        end_boundary - start_boundary - 1,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 107f2fdfe41f..cb10a0aaab3a 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1372,21 +1372,29 @@ xfs_buf_iorequest(
                xfs_buf_wait_unpin(bp);
        xfs_buf_hold(bp);
-        /* Set the count to 1 initially, this will stop an I/O
+        /*
+         * Set the count to 1 initially, this will stop an I/O
         * completion callout which happens before we have started
         * all the I/O from calling xfs_buf_ioend too early.
         */
        atomic_set(&bp->b_io_remaining, 1);
        _xfs_buf_ioapply(bp);
-        _xfs_buf_ioend(bp, 1);
+        /*
+         * If _xfs_buf_ioapply failed, we'll get back here with
+         * only the reference we took above.  _xfs_buf_ioend will
+         * drop it to zero, so we'd better not queue it for later,
+         * or we'll free it before it's done.
+         */
+        _xfs_buf_ioend(bp, bp->b_error ? 0 : 1);
        xfs_buf_rele(bp);
 }
 /*
 * Waits for I/O to complete on the buffer supplied.  It returns immediately if
- * no I/O is pending or there is already a pending error on the buffer.  It
+ * no I/O is pending or there is already a pending error on the buffer, in which
- * returns the I/O error code, if any, or 0 if there was no error.
+ * case nothing will ever complete.  It returns the I/O error code, if any, or
+ * 0 if there was no error.
 */
 int
 xfs_buf_iowait(
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 79e96ce98733..951a2321ee01 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -679,7 +679,7 @@ xfs_file_dio_aio_write(
                goto out;
        if (mapping->nrpages) {
-                ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+                ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
                                                    pos, -1);
                if (ret)
                        goto out;
@@ -841,7 +841,15 @@ xfs_file_fallocate(
                        goto out_unlock;
                }
-                ASSERT(offset + len < i_size_read(inode));
+                /*
+                 * There is no need to overlap collapse range with EOF,
+                 * in which case it is effectively a truncate operation
+                 */
+                if (offset + len >= i_size_read(inode)) {
+                        error = -EINVAL;
+                        goto out_unlock;
+                }
                new_size = i_size_read(inode) - len;
                error = xfs_collapse_file_space(ip, offset, len);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5e7a38fa6ee6..768087bedbac 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1334,7 +1334,8 @@ int
 xfs_create_tmpfile(
        struct xfs_inode        *dp,
        struct dentry           *dentry,
-        umode_t                 mode)
+        umode_t                 mode,
+        struct xfs_inode        **ipp)
 {
        struct xfs_mount        *mp = dp->i_mount;
        struct xfs_inode        *ip = NULL;
@@ -1402,7 +1403,6 @@ xfs_create_tmpfile(
        xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
        ip->i_d.di_nlink--;
-        d_tmpfile(dentry, VFS_I(ip));
        error = xfs_iunlink(tp, ip);
        if (error)
                goto out_trans_abort;
@@ -1415,6 +1415,7 @@ xfs_create_tmpfile(
        xfs_qm_dqrele(gdqp);
        xfs_qm_dqrele(pdqp);
+        *ipp = ip;
        return 0;
 out_trans_abort:
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 396cc1fafd0d..f2fcde52b66d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -334,7 +334,7 @@ int		xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
 int             xfs_create(struct xfs_inode *dp, struct xfs_name *name,
                           umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
 int             xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
-                           umode_t mode);
+                           umode_t mode, struct xfs_inode **ipp);
 int             xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
                           struct xfs_inode *ip);
 int             xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 89b07e43ca28..ef1ca010f417 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1053,11 +1053,25 @@ xfs_vn_tmpfile(
        struct dentry   *dentry,
        umode_t         mode)
 {
-        int             error;
+        int                     error;
+        struct xfs_inode        *ip;
+        struct inode            *inode;
-        error = xfs_create_tmpfile(XFS_I(dir), dentry, mode);
+        error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);
+        if (unlikely(error))
+                return -error;
-        return -error;
+        inode = VFS_I(ip);
+        error = xfs_init_security(inode, dir, &dentry->d_name);
+        if (unlikely(error)) {
+                iput(inode);
+                return -error;
+        }
+        d_tmpfile(dentry, inode);
+        return 0;
 }
 static const struct inode_operations xfs_inode_operations = {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 8497a00e399d..08624dc67317 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1181,11 +1181,14 @@ xlog_iodone(xfs_buf_t *bp)
        /* log I/O is always issued ASYNC */
        ASSERT(XFS_BUF_ISASYNC(bp));
        xlog_state_done_syncing(iclog, aborted);
        /*
-         * do not reference the buffer (bp) here as we could race
+         * drop the buffer lock now that we are done. Nothing references
-         * with it being freed after writing the unmount record to the
+         * the buffer after this, so an unmount waiting on this lock can now
-         * log.
+         * tear it down safely. As such, it is unsafe to reference the buffer
+         * (bp) after the unlock as we could race with it being freed.
         */
+        xfs_buf_unlock(bp);
 }
 /*
@@ -1368,8 +1371,16 @@ xlog_alloc_log(
        bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0);
        if (!bp)
                goto out_free_log;
-        bp->b_iodone = xlog_iodone;
+        /*
+         * The iclogbuf buffer locks are held over IO but we are not going to do
+         * IO yet.  Hence unlock the buffer so that the log IO path can grab it
+         * when appropriately.
+         */
        ASSERT(xfs_buf_islocked(bp));
+        xfs_buf_unlock(bp);
+        bp->b_iodone = xlog_iodone;
        log->l_xbuf = bp;
        spin_lock_init(&log->l_icloglock);
@@ -1398,6 +1409,9 @@ xlog_alloc_log(
                if (!bp)
                        goto out_free_iclog;
+                ASSERT(xfs_buf_islocked(bp));
+                xfs_buf_unlock(bp);
                bp->b_iodone = xlog_iodone;
                iclog->ic_bp = bp;
                iclog->ic_data = bp->b_addr;
@@ -1422,7 +1436,6 @@ xlog_alloc_log(
                iclog->ic_callback_tail = &(iclog->ic_callback);
                iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
-                ASSERT(xfs_buf_islocked(iclog->ic_bp));
                init_waitqueue_head(&iclog->ic_force_wait);
                init_waitqueue_head(&iclog->ic_write_wait);
@@ -1631,6 +1644,12 @@ xlog_cksum(
 * we transition the iclogs to IOERROR state *after* flushing all existing
 * iclogs to disk. This is because we don't want anymore new transactions to be
 * started or completed afterwards.
+ *
+ * We lock the iclogbufs here so that we can serialise against IO completion
+ * during unmount. We might be processing a shutdown triggered during unmount,
+ * and that can occur asynchronously to the unmount thread, and hence we need to
+ * ensure that completes before tearing down the iclogbufs. Hence we need to
+ * hold the buffer lock across the log IO to acheive that.
 */
 STATIC int
 xlog_bdstrat(
@@ -1638,6 +1657,7 @@ xlog_bdstrat(
 {
        struct xlog_in_core     *iclog = bp->b_fspriv;
+        xfs_buf_lock(bp);
        if (iclog->ic_state & XLOG_STATE_IOERROR) {
                xfs_buf_ioerror(bp, EIO);
                xfs_buf_stale(bp);
@@ -1645,7 +1665,8 @@ xlog_bdstrat(
                /*
                 * It would seem logical to return EIO here, but we rely on
                 * the log state machine to propagate I/O errors instead of
-                 * doing it here.
+                 * doing it here. Similarly, IO completion will unlock the
+                 * buffer, so we don't do it here.
                 */
                return 0;
        }
@@ -1847,14 +1868,28 @@ xlog_dealloc_log(
        xlog_cil_destroy(log);
        /*
-         * always need to ensure that the extra buffer does not point to memory
+         * Cycle all the iclogbuf locks to make sure all log IO completion
-         * owned by another log buffer before we free it.
+         * is done before we tear down these buffers.
         */
+        iclog = log->l_iclog;
+        for (i = 0; i < log->l_iclog_bufs; i++) {
+                xfs_buf_lock(iclog->ic_bp);
+                xfs_buf_unlock(iclog->ic_bp);
+                iclog = iclog->ic_next;
+        }
+        /*
+         * Always need to ensure that the extra buffer does not point to memory
+         * owned by another log buffer before we free it. Also, cycle the lock
+         * first to ensure we've completed IO on it.
+         */
+        xfs_buf_lock(log->l_xbuf);
+        xfs_buf_unlock(log->l_xbuf);
        xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
        xfs_buf_free(log->l_xbuf);
        iclog = log->l_iclog;
-        for (i=0; i<log->l_iclog_bufs; i++) {
+        for (i = 0; i < log->l_iclog_bufs; i++) {
                xfs_buf_free(iclog->ic_bp);
                next_iclog = iclog->ic_next;
                kmem_free(iclog);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index a4ae41c179a8..65d8c793a25c 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink);
 DEFINE_INODE_EVENT(xfs_inactive_symlink);
 DEFINE_INODE_EVENT(xfs_alloc_file_space);
 DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_zero_file_space);
 DEFINE_INODE_EVENT(xfs_collapse_file_space);
 DEFINE_INODE_EVENT(xfs_readdir);
 #ifdef CONFIG_XFS_POSIX_ACL
author	Kukjin Kim <kgene.kim@samsung.com>	2014-05-30 13:36:49 -0400
committer	Kukjin Kim <kgene.kim@samsung.com>	2014-05-30 13:36:49 -0400
commit	fced6dee29f6fb143fe16ea90331176ff77e6120 (patch)
tree	5b6e57e7a757adc2a6518ce291a4d2914397b917 /fs
parent	bfed1074f213051e94648bfad0d0611a16d81366 (diff)
parent	be1f7c8d7e2bc8b8c76846aa6f276e8d2ef8975a (diff)