56 files changed, 1215 insertions, 415 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 6e6bff375244..31ba0935e32e 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1193,13 +1193,19 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        }
        /*
-         * For file extending writes updating i_size before data
+         * For file extending writes updating i_size before data writeouts
-         * writeouts complete can expose uninitialized blocks. So
+         * complete can expose uninitialized blocks in dumb filesystems.
-         * even for AIO, we need to wait for i/o to complete before
+         * In that case we need to wait for I/O completion even if asked
-         * returning in this case.
+         * for an asynchronous write.
         */
-        dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
+        if (is_sync_kiocb(iocb))
-                (end > i_size_read(inode)));
+                dio->is_async = false;
+        else if (!(dio->flags & DIO_ASYNC_EXTEND) &&
+            (rw & WRITE) && end > i_size_read(inode))
+                dio->is_async = false;
+        else
+                dio->is_async = true;
        dio->inode = inode;
        dio->rw = rw;
diff --git a/fs/open.c b/fs/open.c
index b9ed8b25c108..631aea815def 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -231,7 +231,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
                return -EINVAL;
        /* Return error if mode is not supported */
-        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+                     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+                return -EOPNOTSUPP;
+        /* Punch hole and zero range are mutually exclusive */
+        if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) ==
+            (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))
                return -EOPNOTSUPP;
        /* Punch hole must have keep size set */
@@ -239,11 +245,20 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
            !(mode & FALLOC_FL_KEEP_SIZE))
                return -EOPNOTSUPP;
+        /* Collapse range should only be used exclusively. */
+        if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
+            (mode & ~FALLOC_FL_COLLAPSE_RANGE))
+                return -EINVAL;
        if (!(file->f_mode & FMODE_WRITE))
                return -EBADF;
-        /* It's not possible punch hole on append only file */
+        /*
-        if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
+         * It's not possible to punch hole or perform collapse range
+         * on append only file
+         */
+        if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)
+            && IS_APPEND(inode))
                return -EPERM;
        if (IS_IMMUTABLE(inode))
@@ -271,6 +286,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
                return -EFBIG;
+        /*
+         * There is no need to overlap collapse range with EOF, in which case
+         * it is effectively a truncate operation
+         */
+        if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
+            (offset + len >= i_size_read(inode)))
+                return -EINVAL;
        if (!file->f_op->fallocate)
                return -EOPNOTSUPP;
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 66a36befc5c0..844e288b9576 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -65,12 +65,31 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
 void *
 kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
 {
+        unsigned noio_flag = 0;
        void    *ptr;
+        gfp_t   lflags;
        ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
        if (ptr)
                return ptr;
-        return vzalloc(size);
+        /*
+         * __vmalloc() will allocate data pages and auxillary structures (e.g.
+         * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context
+         * here. Hence we need to tell memory reclaim that we are in such a
+         * context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering
+         * the filesystem here and potentially deadlocking.
+         */
+        if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+                noio_flag = memalloc_noio_save();
+        lflags = kmem_flags_convert(flags);
+        ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
+        if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
+                memalloc_noio_restore(noio_flag);
+        return ptr;
 }
 void
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 0ecec1896f25..6888ad886ff6 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -281,7 +281,7 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
        if (!acl)
                goto set_acl;
-        error = -EINVAL;
+        error = -E2BIG;
        if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb)))
                return error;
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 3fc109819c34..0fdd4109c624 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -89,6 +89,8 @@ typedef struct xfs_agf {
        /* structure must be padded to 64 bit alignment */
 } xfs_agf_t;
+#define XFS_AGF_CRC_OFF         offsetof(struct xfs_agf, agf_crc)
 #define XFS_AGF_MAGICNUM        0x00000001
 #define XFS_AGF_VERSIONNUM      0x00000002
 #define XFS_AGF_SEQNO           0x00000004
@@ -167,6 +169,8 @@ typedef struct xfs_agi {
        /* structure must be padded to 64 bit alignment */
 } xfs_agi_t;
+#define XFS_AGI_CRC_OFF         offsetof(struct xfs_agi, agi_crc)
 #define XFS_AGI_MAGICNUM        0x00000001
 #define XFS_AGI_VERSIONNUM      0x00000002
 #define XFS_AGI_SEQNO           0x00000004
@@ -222,6 +226,8 @@ typedef struct xfs_agfl {
        __be32          agfl_bno[];     /* actually XFS_AGFL_SIZE(mp) */
 } xfs_agfl_t;
+#define XFS_AGFL_CRC_OFF        offsetof(struct xfs_agfl, agfl_crc)
 /*
 * tags for inode radix tree
 */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 9eab2dfdcbb5..c1cf6a336a72 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -474,7 +474,6 @@ xfs_agfl_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
-        int             agfl_ok = 1;
        /*
         * There is no verification of non-crc AGFLs because mkfs does not
@@ -485,15 +484,13 @@ xfs_agfl_read_verify(
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return;
-        agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+        if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
-                                   offsetof(struct xfs_agfl, agfl_crc));
+                xfs_buf_ioerror(bp, EFSBADCRC);
+        else if (!xfs_agfl_verify(bp))
-        agfl_ok = agfl_ok && xfs_agfl_verify(bp);
-        if (!agfl_ok) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-        }
+        if (bp->b_error)
+                xfs_verifier_error(bp);
 }
 static void
@@ -508,16 +505,15 @@ xfs_agfl_write_verify(
                return;
        if (!xfs_agfl_verify(bp)) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
                return;
        }
        if (bip)
                XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+        xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
-                         offsetof(struct xfs_agfl, agfl_crc));
 }
 const struct xfs_buf_ops xfs_agfl_buf_ops = {
@@ -2238,19 +2234,17 @@ xfs_agf_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
-        int             agf_ok = 1;
-        if (xfs_sb_version_hascrc(&mp->m_sb))
-                agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                          offsetof(struct xfs_agf, agf_crc));
-        agf_ok = agf_ok && xfs_agf_verify(mp, bp);
+        if (xfs_sb_version_hascrc(&mp->m_sb) &&
+            !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
-        if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
+                xfs_buf_ioerror(bp, EFSBADCRC);
-                        XFS_RANDOM_ALLOC_READ_AGF))) {
+        else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp,
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+                                XFS_ERRTAG_ALLOC_READ_AGF,
+                                XFS_RANDOM_ALLOC_READ_AGF))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-        }
+        if (bp->b_error)
+                xfs_verifier_error(bp);
 }
 static void
@@ -2261,8 +2255,8 @@ xfs_agf_write_verify(
        struct xfs_buf_log_item *bip = bp->b_fspriv;
        if (!xfs_agf_verify(mp, bp)) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
                return;
        }
@@ -2272,8 +2266,7 @@ xfs_agf_write_verify(
        if (bip)
                XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+        xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
-                         offsetof(struct xfs_agf, agf_crc));
 }
 const struct xfs_buf_ops xfs_agf_buf_ops = {
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 13085429e523..cc1eadcbb049 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -355,12 +355,14 @@ static void
 xfs_allocbt_read_verify(
        struct xfs_buf  *bp)
 {
-        if (!(xfs_btree_sblock_verify_crc(bp) &&
+        if (!xfs_btree_sblock_verify_crc(bp))
-              xfs_allocbt_verify(bp))) {
+                xfs_buf_ioerror(bp, EFSBADCRC);
-                trace_xfs_btree_corrupt(bp, _RET_IP_);
+        else if (!xfs_allocbt_verify(bp))
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                     bp->b_target->bt_mount, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+        if (bp->b_error) {
+                trace_xfs_btree_corrupt(bp, _RET_IP_);
+                xfs_verifier_error(bp);
        }
 }
@@ -370,9 +372,9 @@ xfs_allocbt_write_verify(
 {
        if (!xfs_allocbt_verify(bp)) {
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                     bp->b_target->bt_mount, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
+                return;
        }
        xfs_btree_sblock_calc_crc(bp);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index db2cfb067d0b..75df77d09f75 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -632,38 +632,46 @@ xfs_map_at_offset(
 }
 /*
- * Test if a given page is suitable for writing as part of an unwritten
+ * Test if a given page contains at least one buffer of a given @type.
- * or delayed allocate extent.
+ * If @check_all_buffers is true, then we walk all the buffers in the page to
+ * try to find one of the type passed in. If it is not set, then the caller only
+ * needs to check the first buffer on the page for a match.
 */
-STATIC int
+STATIC bool
 xfs_check_page_type(
        struct page             *page,
-        unsigned int            type)
+        unsigned int            type,
+        bool                    check_all_buffers)
 {
-        if (PageWriteback(page))
+        struct buffer_head      *bh;
-                return 0;
+        struct buffer_head      *head;
-        if (page->mapping && page_has_buffers(page)) {
+        if (PageWriteback(page))
-                struct buffer_head      *bh, *head;
+                return false;
-                int                     acceptable = 0;
+        if (!page->mapping)
+                return false;
+        if (!page_has_buffers(page))
+                return false;
-                bh = head = page_buffers(page);
+        bh = head = page_buffers(page);
-                do {
+        do {
-                        if (buffer_unwritten(bh))
+                if (buffer_unwritten(bh)) {
-                                acceptable += (type == XFS_IO_UNWRITTEN);
+                        if (type == XFS_IO_UNWRITTEN)
-                        else if (buffer_delay(bh))
+                                return true;
-                                acceptable += (type == XFS_IO_DELALLOC);
+                } else if (buffer_delay(bh)) {
-                        else if (buffer_dirty(bh) && buffer_mapped(bh))
+                        if (type == XFS_IO_DELALLOC)
-                                acceptable += (type == XFS_IO_OVERWRITE);
+                                return true;
-                        else
+                } else if (buffer_dirty(bh) && buffer_mapped(bh)) {
-                                break;
+                        if (type == XFS_IO_OVERWRITE)
-                } while ((bh = bh->b_this_page) != head);
+                                return true;
+                }
-                if (acceptable)
+                /* If we are only checking the first buffer, we are done now. */
-                        return 1;
+                if (!check_all_buffers)
-        }
+                        break;
+        } while ((bh = bh->b_this_page) != head);
-        return 0;
+        return false;
 }
 /*
@@ -697,7 +705,7 @@ xfs_convert_page(
                goto fail_unlock_page;
        if (page->mapping != inode->i_mapping)
                goto fail_unlock_page;
-        if (!xfs_check_page_type(page, (*ioendp)->io_type))
+        if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
                goto fail_unlock_page;
        /*
@@ -742,6 +750,15 @@ xfs_convert_page(
        p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
        page_dirty = p_offset / len;
+        /*
+         * The moment we find a buffer that doesn't match our current type
+         * specification or can't be written, abort the loop and start
+         * writeback. As per the above xfs_imap_valid() check, only
+         * xfs_vm_writepage() can handle partial page writeback fully - we are
+         * limited here to the buffers that are contiguous with the current
+         * ioend, and hence a buffer we can't write breaks that contiguity and
+         * we have to defer the rest of the IO to xfs_vm_writepage().
+         */
        bh = head = page_buffers(page);
        do {
                if (offset >= end_offset)
@@ -750,7 +767,7 @@ xfs_convert_page(
                        uptodate = 0;
                if (!(PageUptodate(page) || buffer_uptodate(bh))) {
                        done = 1;
-                        continue;
+                        break;
                }
                if (buffer_unwritten(bh) || buffer_delay(bh) ||
@@ -762,10 +779,11 @@ xfs_convert_page(
                        else
                                type = XFS_IO_OVERWRITE;
-                        if (!xfs_imap_valid(inode, imap, offset)) {
+                        /*
-                                done = 1;
+                         * imap should always be valid because of the above
-                                continue;
+                         * partial page end_offset check on the imap.
-                        }
+                         */
+                        ASSERT(xfs_imap_valid(inode, imap, offset));
                        lock_buffer(bh);
                        if (type != XFS_IO_OVERWRITE)
@@ -777,6 +795,7 @@ xfs_convert_page(
                        count++;
                } else {
                        done = 1;
+                        break;
                }
        } while (offset += len, (bh = bh->b_this_page) != head);
@@ -868,7 +887,7 @@ xfs_aops_discard_page(
        struct buffer_head      *bh, *head;
        loff_t                  offset = page_offset(page);
-        if (!xfs_check_page_type(page, XFS_IO_DELALLOC))
+        if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true))
                goto out_invalidate;
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -1441,7 +1460,8 @@ xfs_vm_direct_IO(
                ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
                                            offset, nr_segs,
                                            xfs_get_blocks_direct,
-                                            xfs_end_io_direct_write, NULL, 0);
+                                            xfs_end_io_direct_write, NULL,
+                                            DIO_ASYNC_EXTEND);
                if (ret != -EIOCBQUEUED && iocb->private)
                        goto out_destroy_ioend;
        } else {
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 7b126f46a2f9..fe9587fab17a 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -213,8 +213,8 @@ xfs_attr3_leaf_write_verify(
        struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;
        if (!xfs_attr3_leaf_verify(bp)) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
                return;
        }
@@ -224,7 +224,7 @@ xfs_attr3_leaf_write_verify(
        if (bip)
                hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF);
+        xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF);
 }
 /*
@@ -239,13 +239,14 @@ xfs_attr3_leaf_read_verify(
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
-        if ((xfs_sb_version_hascrc(&mp->m_sb) &&
+        if (xfs_sb_version_hascrc(&mp->m_sb) &&
-             !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+             !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF))
-                                          XFS_ATTR3_LEAF_CRC_OFF)) ||
+                xfs_buf_ioerror(bp, EFSBADCRC);
-            !xfs_attr3_leaf_verify(bp)) {
+        else if (!xfs_attr3_leaf_verify(bp))
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-        }
+        if (bp->b_error)
+                xfs_verifier_error(bp);
 }
 const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c
index 5549d69ddb45..6e37823e2932 100644
--- a/fs/xfs/xfs_attr_remote.c
+++ b/fs/xfs/xfs_attr_remote.c
@@ -125,7 +125,6 @@ xfs_attr3_rmt_read_verify(
        struct xfs_mount *mp = bp->b_target->bt_mount;
        char            *ptr;
        int             len;
-        bool            corrupt = false;
        xfs_daddr_t     bno;
        /* no verification of non-crc buffers */
@@ -140,11 +139,11 @@ xfs_attr3_rmt_read_verify(
        while (len > 0) {
                if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp),
                                      XFS_ATTR3_RMT_CRC_OFF)) {
-                        corrupt = true;
+                        xfs_buf_ioerror(bp, EFSBADCRC);
                        break;
                }
                if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
-                        corrupt = true;
+                        xfs_buf_ioerror(bp, EFSCORRUPTED);
                        break;
                }
                len -= XFS_LBSIZE(mp);
@@ -152,10 +151,9 @@ xfs_attr3_rmt_read_verify(
                bno += mp->m_bsize;
        }
-        if (corrupt) {
+        if (bp->b_error)
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+                xfs_verifier_error(bp);
-                xfs_buf_ioerror(bp, EFSCORRUPTED);
+        else
-        } else
                ASSERT(len == 0);
 }
@@ -180,9 +178,8 @@ xfs_attr3_rmt_write_verify(
        while (len > 0) {
                if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) {
-                        XFS_CORRUPTION_ERROR(__func__,
-                                            XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                        xfs_buf_ioerror(bp, EFSCORRUPTED);
+                        xfs_verifier_error(bp);
                        return;
                }
                if (bip) {
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 152543c4ca70..5b6092ef51ef 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5378,3 +5378,196 @@ error0:
        }
        return error;
 }
+/*
+ * Shift extent records to the left to cover a hole.
+ *
+ * The maximum number of extents to be shifted in a single operation
+ * is @num_exts, and @current_ext keeps track of the current extent
+ * index we have shifted. @offset_shift_fsb is the length by which each
+ * extent is shifted. If there is no hole to shift the extents
+ * into, this will be considered invalid operation and we abort immediately.
+ */
+int
+xfs_bmap_shift_extents(
+        struct xfs_trans        *tp,
+        struct xfs_inode        *ip,
+        int                     *done,
+        xfs_fileoff_t           start_fsb,
+        xfs_fileoff_t           offset_shift_fsb,
+        xfs_extnum_t            *current_ext,
+        xfs_fsblock_t           *firstblock,
+        struct xfs_bmap_free    *flist,
+        int                     num_exts)
+{
+        struct xfs_btree_cur            *cur;
+        struct xfs_bmbt_rec_host        *gotp;
+        struct xfs_bmbt_irec            got;
+        struct xfs_bmbt_irec            left;
+        struct xfs_mount                *mp = ip->i_mount;
+        struct xfs_ifork                *ifp;
+        xfs_extnum_t                    nexts = 0;
+        xfs_fileoff_t                   startoff;
+        int                             error = 0;
+        int                             i;
+        int                             whichfork = XFS_DATA_FORK;
+        int                             logflags;
+        xfs_filblks_t                   blockcount = 0;
+        if (unlikely(XFS_TEST_ERROR(
+            (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+             mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+                XFS_ERROR_REPORT("xfs_bmap_shift_extents",
+                                 XFS_ERRLEVEL_LOW, mp);
+                return XFS_ERROR(EFSCORRUPTED);
+        }
+        if (XFS_FORCED_SHUTDOWN(mp))
+                return XFS_ERROR(EIO);
+        ASSERT(current_ext != NULL);
+        ifp = XFS_IFORK_PTR(ip, whichfork);
+        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+                /* Read in all the extents */
+                error = xfs_iread_extents(tp, ip, whichfork);
+                if (error)
+                        return error;
+        }
+        /*
+         * If *current_ext is 0, we would need to lookup the extent
+         * from where we would start shifting and store it in gotp.
+         */
+        if (!*current_ext) {
+                gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext);
+                /*
+                 * gotp can be null in 2 cases: 1) if there are no extents
+                 * or 2) start_fsb lies in a hole beyond which there are
+                 * no extents. Either way, we are done.
+                 */
+                if (!gotp) {
+                        *done = 1;
+                        return 0;
+                }
+        }
+        /* We are going to change core inode */
+        logflags = XFS_ILOG_CORE;
+        if (ifp->if_flags & XFS_IFBROOT) {
+                cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+                cur->bc_private.b.firstblock = *firstblock;
+                cur->bc_private.b.flist = flist;
+                cur->bc_private.b.flags = 0;
+        } else {
+                cur = NULL;
+                logflags |= XFS_ILOG_DEXT;
+        }
+        while (nexts++ < num_exts &&
+               *current_ext <  XFS_IFORK_NEXTENTS(ip, whichfork)) {
+                gotp = xfs_iext_get_ext(ifp, *current_ext);
+                xfs_bmbt_get_all(gotp, &got);
+                startoff = got.br_startoff - offset_shift_fsb;
+                /*
+                 * Before shifting extent into hole, make sure that the hole
+                 * is large enough to accomodate the shift.
+                 */
+                if (*current_ext) {
+                        xfs_bmbt_get_all(xfs_iext_get_ext(ifp,
+                                                *current_ext - 1), &left);
+                        if (startoff < left.br_startoff + left.br_blockcount)
+                                error = XFS_ERROR(EINVAL);
+                } else if (offset_shift_fsb > got.br_startoff) {
+                        /*
+                         * When first extent is shifted, offset_shift_fsb
+                         * should be less than the stating offset of
+                         * the first extent.
+                         */
+                        error = XFS_ERROR(EINVAL);
+                }
+                if (error)
+                        goto del_cursor;
+                if (cur) {
+                        error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+                                                   got.br_startblock,
+                                                   got.br_blockcount,
+                                                   &i);
+                        if (error)
+                                goto del_cursor;
+                        XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+                }
+                /* Check if we can merge 2 adjacent extents */
+                if (*current_ext &&
+                    left.br_startoff + left.br_blockcount == startoff &&
+                    left.br_startblock + left.br_blockcount ==
+                                got.br_startblock &&
+                    left.br_state == got.br_state &&
+                    left.br_blockcount + got.br_blockcount <= MAXEXTLEN) {
+                        blockcount = left.br_blockcount +
+                                got.br_blockcount;
+                        xfs_iext_remove(ip, *current_ext, 1, 0);
+                        if (cur) {
+                                error = xfs_btree_delete(cur, &i);
+                                if (error)
+                                        goto del_cursor;
+                                XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+                        }
+                        XFS_IFORK_NEXT_SET(ip, whichfork,
+                                XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
+                        gotp = xfs_iext_get_ext(ifp, --*current_ext);
+                        xfs_bmbt_get_all(gotp, &got);
+                        /* Make cursor point to the extent we will update */
+                        if (cur) {
+                                error = xfs_bmbt_lookup_eq(cur, got.br_startoff,
+                                                           got.br_startblock,
+                                                           got.br_blockcount,
+                                                           &i);
+                                if (error)
+                                        goto del_cursor;
+                                XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor);
+                        }
+                        xfs_bmbt_set_blockcount(gotp, blockcount);
+                        got.br_blockcount = blockcount;
+                } else {
+                        /* We have to update the startoff */
+                        xfs_bmbt_set_startoff(gotp, startoff);
+                        got.br_startoff = startoff;
+                }
+                if (cur) {
+                        error = xfs_bmbt_update(cur, got.br_startoff,
+                                                got.br_startblock,
+                                                got.br_blockcount,
+                                                got.br_state);
+                        if (error)
+                                goto del_cursor;
+                }
+                (*current_ext)++;
+        }
+        /* Check if we are done */
+        if (*current_ext ==  XFS_IFORK_NEXTENTS(ip, whichfork))
+                *done = 1;
+del_cursor:
+        if (cur)
+                xfs_btree_del_cursor(cur,
+                        error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+        xfs_trans_log_inode(tp, ip, logflags);
+        return error;
+}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 33b41f351225..f84bd7af43be 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -127,6 +127,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
        { BMAP_RIGHT_FILLING,   "RF" }, \
        { BMAP_ATTRFORK,        "ATTR" }
+/*
+ * This macro is used to determine how many extents will be shifted
+ * in one write transaction. We could require two splits,
+ * an extent move on the first and an extent merge on the second,
+ * So it is proper that one extent is shifted inside write transaction
+ * at a time.
+ */
+#define XFS_BMAP_MAX_SHIFT_EXTENTS      1
 #ifdef DEBUG
 void    xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
                int whichfork, unsigned long caller_ip);
@@ -169,5 +179,10 @@ int	xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
 int     xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
                xfs_extnum_t num);
 uint    xfs_default_attroffset(struct xfs_inode *ip);
+int     xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip,
+                int *done, xfs_fileoff_t start_fsb,
+                xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext,
+                xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist,
+                int num_exts);
 #endif  /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 706bc3f777cb..818d546664e7 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -780,12 +780,14 @@ static void
 xfs_bmbt_read_verify(
        struct xfs_buf  *bp)
 {
-        if (!(xfs_btree_lblock_verify_crc(bp) &&
+        if (!xfs_btree_lblock_verify_crc(bp))
-              xfs_bmbt_verify(bp))) {
+                xfs_buf_ioerror(bp, EFSBADCRC);
-                trace_xfs_btree_corrupt(bp, _RET_IP_);
+        else if (!xfs_bmbt_verify(bp))
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                     bp->b_target->bt_mount, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+        if (bp->b_error) {
+                trace_xfs_btree_corrupt(bp, _RET_IP_);
+                xfs_verifier_error(bp);
        }
 }
@@ -794,11 +796,9 @@ xfs_bmbt_write_verify(
        struct xfs_buf  *bp)
 {
        if (!xfs_bmbt_verify(bp)) {
-                xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn);
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                     bp->b_target->bt_mount, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
                return;
        }
        xfs_btree_lblock_calc_crc(bp);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index f264616080ca..01f6a646caa1 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1349,7 +1349,6 @@ xfs_free_file_space(
                 * the freeing of the space succeeds at ENOSPC.
                 */
                tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
-                tp->t_flags |= XFS_TRANS_RESERVE;
                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);
                /*
@@ -1468,6 +1467,102 @@ out:
 }
 /*
+ * xfs_collapse_file_space()
+ *      This routine frees disk space and shift extent for the given file.
+ *      The first thing we do is to free data blocks in the specified range
+ *      by calling xfs_free_file_space(). It would also sync dirty data
+ *      and invalidate page cache over the region on which collapse range
+ *      is working. And Shift extent records to the left to cover a hole.
+ * RETURNS:
+ *      0 on success
+ *      errno on error
+ *
+ */
+int
+xfs_collapse_file_space(
+        struct xfs_inode        *ip,
+        xfs_off_t               offset,
+        xfs_off_t               len)
+{
+        int                     done = 0;
+        struct xfs_mount        *mp = ip->i_mount;
+        struct xfs_trans        *tp;
+        int                     error;
+        xfs_extnum_t            current_ext = 0;
+        struct xfs_bmap_free    free_list;
+        xfs_fsblock_t           first_block;
+        int                     committed;
+        xfs_fileoff_t           start_fsb;
+        xfs_fileoff_t           shift_fsb;
+        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+        trace_xfs_collapse_file_space(ip);
+        start_fsb = XFS_B_TO_FSB(mp, offset + len);
+        shift_fsb = XFS_B_TO_FSB(mp, len);
+        error = xfs_free_file_space(ip, offset, len);
+        if (error)
+                return error;
+        while (!error && !done) {
+                tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+                tp->t_flags |= XFS_TRANS_RESERVE;
+                /*
+                 * We would need to reserve permanent block for transaction.
+                 * This will come into picture when after shifting extent into
+                 * hole we found that adjacent extents can be merged which
+                 * may lead to freeing of a block during record update.
+                 */
+                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
+                                XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
+                if (error) {
+                        ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
+                        xfs_trans_cancel(tp, 0);
+                        break;
+                }
+                xfs_ilock(ip, XFS_ILOCK_EXCL);
+                error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
+                                ip->i_gdquot, ip->i_pdquot,
+                                XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
+                                XFS_QMOPT_RES_REGBLKS);
+                if (error)
+                        goto out;
+                xfs_trans_ijoin(tp, ip, 0);
+                xfs_bmap_init(&free_list, &first_block);
+                /*
+                 * We are using the write transaction in which max 2 bmbt
+                 * updates are allowed
+                 */
+                error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb,
+                                               shift_fsb, &current_ext,
+                                               &first_block, &free_list,
+                                               XFS_BMAP_MAX_SHIFT_EXTENTS);
+                if (error)
+                        goto out;
+                error = xfs_bmap_finish(&tp, &free_list, &committed);
+                if (error)
+                        goto out;
+                error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+                xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        }
+        return error;
+out:
+        xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        return error;
+}
+/*
 * We need to check that the format of the data fork in the temporary inode is
 * valid for the target inode before doing the swap. This is not a problem with
 * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 900747b25772..935ed2b24edf 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -99,6 +99,8 @@ int	xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
                            xfs_off_t len);
 int     xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
                            xfs_off_t len);
+int     xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
+                                xfs_off_t len);
 /* EOF block manipulation functions */
 bool    xfs_can_free_eofblocks(struct xfs_inode *ip, bool force);
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 9adaae4f3e2f..e80d59fdf89a 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -234,8 +234,7 @@ xfs_btree_lblock_calc_crc(
                return;
        if (bip)
                block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+        xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
-                         XFS_BTREE_LBLOCK_CRC_OFF);
 }
 bool
@@ -243,8 +242,8 @@ xfs_btree_lblock_verify_crc(
        struct xfs_buf          *bp)
 {
        if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
-                return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+                return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
-                                        XFS_BTREE_LBLOCK_CRC_OFF);
        return true;
 }
@@ -267,8 +266,7 @@ xfs_btree_sblock_calc_crc(
                return;
        if (bip)
                block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+        xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
-                         XFS_BTREE_SBLOCK_CRC_OFF);
 }
 bool
@@ -276,8 +274,8 @@ xfs_btree_sblock_verify_crc(
        struct xfs_buf          *bp)
 {
        if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
-                return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+                return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
-                                        XFS_BTREE_SBLOCK_CRC_OFF);
        return true;
 }
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 9c061ef2b0d9..107f2fdfe41f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -396,7 +396,17 @@ _xfs_buf_map_pages(
                bp->b_addr = NULL;
        } else {
                int retried = 0;
+                unsigned noio_flag;
+                /*
+                 * vm_map_ram() will allocate auxillary structures (e.g.
+                 * pagetables) with GFP_KERNEL, yet we are likely to be under
+                 * GFP_NOFS context here. Hence we need to tell memory reclaim
+                 * that we are in such a context via PF_MEMALLOC_NOIO to prevent
+                 * memory reclaim re-entering the filesystem here and
+                 * potentially deadlocking.
+                 */
+                noio_flag = memalloc_noio_save();
                do {
                        bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
                                                -1, PAGE_KERNEL);
@@ -404,6 +414,7 @@ _xfs_buf_map_pages(
                                break;
                        vm_unmap_aliases();
                } while (retried++ <= 1);
+                memalloc_noio_restore(noio_flag);
                if (!bp->b_addr)
                        return -ENOMEM;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 995339534db6..b8a3abf6cf47 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -369,6 +369,20 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
        xfs_buf_rele(bp);
 }
+static inline int
+xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
+{
+        return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+                                cksum_offset);
+}
+static inline void
+xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
+{
+        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+                         cksum_offset);
+}
 /*
 *      Handling of buftargs.
 */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 33149113e333..8752821443be 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -796,20 +796,6 @@ xfs_buf_item_init(
                bip->bli_formats[i].blf_map_size = map_size;
        }
-#ifdef XFS_TRANS_DEBUG
-        /*
-         * Allocate the arrays for tracking what needs to be logged
-         * and what our callers request to be logged.  bli_orig
-         * holds a copy of the original, clean buffer for comparison
-         * against, and bli_logged keeps a 1 bit flag per byte in
-         * the buffer to indicate which bytes the callers have asked
-         * to have logged.
-         */
-        bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP);
-        memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length));
-        bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP);
-#endif
        /*
         * Put the buf item into the list of items attached to the
         * buffer at the front.
@@ -957,11 +943,6 @@ STATIC void
 xfs_buf_item_free(
        xfs_buf_log_item_t      *bip)
 {
-#ifdef XFS_TRANS_DEBUG
-        kmem_free(bip->bli_orig);
-        kmem_free(bip->bli_logged);
-#endif /* XFS_TRANS_DEBUG */
        xfs_buf_item_free_format(bip);
        kmem_zone_free(xfs_buf_item_zone, bip);
 }
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 796272a2e129..6cc5f6785a77 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -185,8 +185,8 @@ xfs_da3_node_write_verify(
        struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
        if (!xfs_da3_node_verify(bp)) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
                return;
        }
@@ -196,7 +196,7 @@ xfs_da3_node_write_verify(
        if (bip)
                hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF);
+        xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);
 }
 /*
@@ -209,18 +209,20 @@ static void
 xfs_da3_node_read_verify(
        struct xfs_buf          *bp)
 {
-        struct xfs_mount        *mp = bp->b_target->bt_mount;
        struct xfs_da_blkinfo   *info = bp->b_addr;
        switch (be16_to_cpu(info->magic)) {
                case XFS_DA3_NODE_MAGIC:
-                        if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+                        if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) {
-                                              XFS_DA3_NODE_CRC_OFF))
+                                xfs_buf_ioerror(bp, EFSBADCRC);
                                break;
+                        }
                        /* fall through */
                case XFS_DA_NODE_MAGIC:
-                        if (!xfs_da3_node_verify(bp))
+                        if (!xfs_da3_node_verify(bp)) {
+                                xfs_buf_ioerror(bp, EFSCORRUPTED);
                                break;
+                        }
                        return;
                case XFS_ATTR_LEAF_MAGIC:
                case XFS_ATTR3_LEAF_MAGIC:
@@ -237,8 +239,7 @@ xfs_da3_node_read_verify(
        }
        /* corrupt block */
-        XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+        xfs_verifier_error(bp);
-        xfs_buf_ioerror(bp, EFSCORRUPTED);
 }
 const struct xfs_buf_ops xfs_da3_node_buf_ops = {
@@ -1295,7 +1296,7 @@ xfs_da3_fixhashpath(
                node = blk->bp->b_addr;
                dp->d_ops->node_hdr_from_disk(&nodehdr, node);
                btree = dp->d_ops->node_tree_p(node);
-                if (be32_to_cpu(btree->hashval) == lasthash)
+                if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
                        break;
                blk->hashval = lasthash;
                btree[blk->index].hashval = cpu_to_be32(lasthash);
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index e5869b50dc41..623bbe8fd921 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -89,6 +89,8 @@ typedef struct xfs_dinode {
        /* structure must be padded to 64 bit alignment */
 } xfs_dinode_t;
+#define XFS_DINODE_CRC_OFF      offsetof(struct xfs_dinode, di_crc)
 #define DI_MAX_FLUSH 0xffff
 /*
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index ce16ef02997a..fda46253966a 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -180,16 +180,23 @@ xfs_dir_init(
        xfs_inode_t     *dp,
        xfs_inode_t     *pdp)
 {
-        xfs_da_args_t   args;
+        struct xfs_da_args *args;
        int             error;
-        memset((char *)&args, 0, sizeof(args));
-        args.dp = dp;
-        args.trans = tp;
        ASSERT(S_ISDIR(dp->i_d.di_mode));
-        if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
+        error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
+        if (error)
                return error;
-        return xfs_dir2_sf_create(&args, pdp->i_ino);
+        args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+        if (!args)
+                return ENOMEM;
+        args->dp = dp;
+        args->trans = tp;
+        error = xfs_dir2_sf_create(args, pdp->i_ino);
+        kmem_free(args);
+        return error;
 }
 /*
@@ -205,41 +212,56 @@ xfs_dir_createname(
        xfs_bmap_free_t         *flist,         /* bmap's freeblock list */
        xfs_extlen_t            total)          /* bmap's total block count */
 {
-        xfs_da_args_t           args;
+        struct xfs_da_args      *args;
        int                     rval;
        int                     v;              /* type-checking value */
        ASSERT(S_ISDIR(dp->i_d.di_mode));
-        if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
+        rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+        if (rval)
                return rval;
        XFS_STATS_INC(xs_dir_create);
-        memset(&args, 0, sizeof(xfs_da_args_t));
+        args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
-        args.name = name->name;
+        if (!args)
-        args.namelen = name->len;
+                return ENOMEM;
-        args.filetype = name->type;
-        args.hashval = dp->i_mount->m_dirnameops->hashname(name);
+        args->name = name->name;
-        args.inumber = inum;
+        args->namelen = name->len;
-        args.dp = dp;
+        args->filetype = name->type;
-        args.firstblock = first;
+        args->hashval = dp->i_mount->m_dirnameops->hashname(name);
-        args.flist = flist;
+        args->inumber = inum;
-        args.total = total;
+        args->dp = dp;
-        args.whichfork = XFS_DATA_FORK;
+        args->firstblock = first;
-        args.trans = tp;
+        args->flist = flist;
-        args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
+        args->total = total;
+        args->whichfork = XFS_DATA_FORK;
-        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+        args->trans = tp;
-                rval = xfs_dir2_sf_addname(&args);
+        args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT;
-        else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
-                return rval;
+        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-        else if (v)
+                rval = xfs_dir2_sf_addname(args);
-                rval = xfs_dir2_block_addname(&args);
+                goto out_free;
-        else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
+        }
-                return rval;
-        else if (v)
+        rval = xfs_dir2_isblock(tp, dp, &v);
-                rval = xfs_dir2_leaf_addname(&args);
+        if (rval)
+                goto out_free;
+        if (v) {
+                rval = xfs_dir2_block_addname(args);
+                goto out_free;
+        }
+        rval = xfs_dir2_isleaf(tp, dp, &v);
+        if (rval)
+                goto out_free;
+        if (v)
+                rval = xfs_dir2_leaf_addname(args);
        else
-                rval = xfs_dir2_node_addname(&args);
+                rval = xfs_dir2_node_addname(args);
+out_free:
+        kmem_free(args);
        return rval;
 }
@@ -282,46 +304,66 @@ xfs_dir_lookup(
        xfs_ino_t       *inum,          /* out: inode number */
        struct xfs_name *ci_name)       /* out: actual name if CI match */
 {
-        xfs_da_args_t   args;
+        struct xfs_da_args *args;
        int             rval;
        int             v;              /* type-checking value */
        ASSERT(S_ISDIR(dp->i_d.di_mode));
        XFS_STATS_INC(xs_dir_lookup);
-        memset(&args, 0, sizeof(xfs_da_args_t));
+        /*
-        args.name = name->name;
+         * We need to use KM_NOFS here so that lockdep will not throw false
-        args.namelen = name->len;
+         * positive deadlock warnings on a non-transactional lookup path. It is
-        args.filetype = name->type;
+         * safe to recurse into inode recalim in that case, but lockdep can't
-        args.hashval = dp->i_mount->m_dirnameops->hashname(name);
+         * easily be taught about it. Hence KM_NOFS avoids having to add more
-        args.dp = dp;
+         * lockdep Doing this avoids having to add a bunch of lockdep class
-        args.whichfork = XFS_DATA_FORK;
+         * annotations into the reclaim path for the ilock.
-        args.trans = tp;
+         */
-        args.op_flags = XFS_DA_OP_OKNOENT;
+        args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+        args->name = name->name;
+        args->namelen = name->len;
+        args->filetype = name->type;
+        args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+        args->dp = dp;
+        args->whichfork = XFS_DATA_FORK;
+        args->trans = tp;
+        args->op_flags = XFS_DA_OP_OKNOENT;
        if (ci_name)
-                args.op_flags |= XFS_DA_OP_CILOOKUP;
+                args->op_flags |= XFS_DA_OP_CILOOKUP;
-        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-                rval = xfs_dir2_sf_lookup(&args);
+                rval = xfs_dir2_sf_lookup(args);
-        else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
+                goto out_check_rval;
-                return rval;
+        }
-        else if (v)
-                rval = xfs_dir2_block_lookup(&args);
+        rval = xfs_dir2_isblock(tp, dp, &v);
-        else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
+        if (rval)
-                return rval;
+                goto out_free;
-        else if (v)
+        if (v) {
-                rval = xfs_dir2_leaf_lookup(&args);
+                rval = xfs_dir2_block_lookup(args);
+                goto out_check_rval;
+        }
+        rval = xfs_dir2_isleaf(tp, dp, &v);
+        if (rval)
+                goto out_free;
+        if (v)
+                rval = xfs_dir2_leaf_lookup(args);
        else
-                rval = xfs_dir2_node_lookup(&args);
+                rval = xfs_dir2_node_lookup(args);
+out_check_rval:
        if (rval == EEXIST)
                rval = 0;
        if (!rval) {
-                *inum = args.inumber;
+                *inum = args->inumber;
                if (ci_name) {
-                        ci_name->name = args.value;
+                        ci_name->name = args->value;
-                        ci_name->len = args.valuelen;
+                        ci_name->len = args->valuelen;
                }
        }
+out_free:
+        kmem_free(args);
        return rval;
 }
@@ -338,38 +380,51 @@ xfs_dir_removename(
        xfs_bmap_free_t *flist,         /* bmap's freeblock list */
        xfs_extlen_t    total)          /* bmap's total block count */
 {
-        xfs_da_args_t   args;
+        struct xfs_da_args *args;
        int             rval;
        int             v;              /* type-checking value */
        ASSERT(S_ISDIR(dp->i_d.di_mode));
        XFS_STATS_INC(xs_dir_remove);
-        memset(&args, 0, sizeof(xfs_da_args_t));
+        args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
-        args.name = name->name;
+        if (!args)
-        args.namelen = name->len;
+                return ENOMEM;
-        args.filetype = name->type;
-        args.hashval = dp->i_mount->m_dirnameops->hashname(name);
+        args->name = name->name;
-        args.inumber = ino;
+        args->namelen = name->len;
-        args.dp = dp;
+        args->filetype = name->type;
-        args.firstblock = first;
+        args->hashval = dp->i_mount->m_dirnameops->hashname(name);
-        args.flist = flist;
+        args->inumber = ino;
-        args.total = total;
+        args->dp = dp;
-        args.whichfork = XFS_DATA_FORK;
+        args->firstblock = first;
-        args.trans = tp;
+        args->flist = flist;
+        args->total = total;
-        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+        args->whichfork = XFS_DATA_FORK;
-                rval = xfs_dir2_sf_removename(&args);
+        args->trans = tp;
-        else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
-                return rval;
+        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-        else if (v)
+                rval = xfs_dir2_sf_removename(args);
-                rval = xfs_dir2_block_removename(&args);
+                goto out_free;
-        else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
+        }
-                return rval;
-        else if (v)
+        rval = xfs_dir2_isblock(tp, dp, &v);
-                rval = xfs_dir2_leaf_removename(&args);
+        if (rval)
+                goto out_free;
+        if (v) {
+                rval = xfs_dir2_block_removename(args);
+                goto out_free;
+        }
+        rval = xfs_dir2_isleaf(tp, dp, &v);
+        if (rval)
+                goto out_free;
+        if (v)
+                rval = xfs_dir2_leaf_removename(args);
        else
-                rval = xfs_dir2_node_removename(&args);
+                rval = xfs_dir2_node_removename(args);
+out_free:
+        kmem_free(args);
        return rval;
 }
@@ -386,40 +441,54 @@ xfs_dir_replace(
        xfs_bmap_free_t *flist,         /* bmap's freeblock list */
        xfs_extlen_t    total)          /* bmap's total block count */
 {
-        xfs_da_args_t   args;
+        struct xfs_da_args *args;
        int             rval;
        int             v;              /* type-checking value */
        ASSERT(S_ISDIR(dp->i_d.di_mode));
-        if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
+        rval = xfs_dir_ino_validate(tp->t_mountp, inum);
+        if (rval)
                return rval;
-        memset(&args, 0, sizeof(xfs_da_args_t));
+        args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
-        args.name = name->name;
+        if (!args)
-        args.namelen = name->len;
+                return ENOMEM;
-        args.filetype = name->type;
-        args.hashval = dp->i_mount->m_dirnameops->hashname(name);
+        args->name = name->name;
-        args.inumber = inum;
+        args->namelen = name->len;
-        args.dp = dp;
+        args->filetype = name->type;
-        args.firstblock = first;
+        args->hashval = dp->i_mount->m_dirnameops->hashname(name);
-        args.flist = flist;
+        args->inumber = inum;
-        args.total = total;
+        args->dp = dp;
-        args.whichfork = XFS_DATA_FORK;
+        args->firstblock = first;
-        args.trans = tp;
+        args->flist = flist;
+        args->total = total;
-        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+        args->whichfork = XFS_DATA_FORK;
-                rval = xfs_dir2_sf_replace(&args);
+        args->trans = tp;
-        else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
-                return rval;
+        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-        else if (v)
+                rval = xfs_dir2_sf_replace(args);
-                rval = xfs_dir2_block_replace(&args);
+                goto out_free;
-        else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
+        }
-                return rval;
-        else if (v)
+        rval = xfs_dir2_isblock(tp, dp, &v);
-                rval = xfs_dir2_leaf_replace(&args);
+        if (rval)
+                goto out_free;
+        if (v) {
+                rval = xfs_dir2_block_replace(args);
+                goto out_free;
+        }
+        rval = xfs_dir2_isleaf(tp, dp, &v);
+        if (rval)
+                goto out_free;
+        if (v)
+                rval = xfs_dir2_leaf_replace(args);
        else
-                rval = xfs_dir2_node_replace(&args);
+                rval = xfs_dir2_node_replace(args);
+out_free:
+        kmem_free(args);
        return rval;
 }
@@ -434,7 +503,7 @@ xfs_dir_canenter(
        struct xfs_name *name,          /* name of entry to add */
        uint            resblks)
 {
-        xfs_da_args_t   args;
+        struct xfs_da_args *args;
        int             rval;
        int             v;              /* type-checking value */
@@ -443,29 +512,42 @@ xfs_dir_canenter(
        ASSERT(S_ISDIR(dp->i_d.di_mode));
-        memset(&args, 0, sizeof(xfs_da_args_t));
+        args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
-        args.name = name->name;
+        if (!args)
-        args.namelen = name->len;
+                return ENOMEM;
-        args.filetype = name->type;
-        args.hashval = dp->i_mount->m_dirnameops->hashname(name);
+        args->name = name->name;
-        args.dp = dp;
+        args->namelen = name->len;
-        args.whichfork = XFS_DATA_FORK;
+        args->filetype = name->type;
-        args.trans = tp;
+        args->hashval = dp->i_mount->m_dirnameops->hashname(name);
-        args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
+        args->dp = dp;
+        args->whichfork = XFS_DATA_FORK;
+        args->trans = tp;
+        args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |
                                                        XFS_DA_OP_OKNOENT;
-        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+        if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
-                rval = xfs_dir2_sf_addname(&args);
+                rval = xfs_dir2_sf_addname(args);
-        else if ((rval = xfs_dir2_isblock(tp, dp, &v)))
+                goto out_free;
-                return rval;
+        }
-        else if (v)
-                rval = xfs_dir2_block_addname(&args);
+        rval = xfs_dir2_isblock(tp, dp, &v);
-        else if ((rval = xfs_dir2_isleaf(tp, dp, &v)))
+        if (rval)
-                return rval;
+                goto out_free;
-        else if (v)
+        if (v) {
-                rval = xfs_dir2_leaf_addname(&args);
+                rval = xfs_dir2_block_addname(args);
+                goto out_free;
+        }
+        rval = xfs_dir2_isleaf(tp, dp, &v);
+        if (rval)
+                goto out_free;
+        if (v)
+                rval = xfs_dir2_leaf_addname(args);
        else
-                rval = xfs_dir2_node_addname(&args);
+                rval = xfs_dir2_node_addname(args);
+out_free:
+        kmem_free(args);
        return rval;
 }
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 90cdbf4b5f19..4f6a38cb83a4 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -89,13 +89,14 @@ xfs_dir3_block_read_verify(
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
-        if ((xfs_sb_version_hascrc(&mp->m_sb) &&
+        if (xfs_sb_version_hascrc(&mp->m_sb) &&
-             !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+             !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
-                                          XFS_DIR3_DATA_CRC_OFF)) ||
+                xfs_buf_ioerror(bp, EFSBADCRC);
-            !xfs_dir3_block_verify(bp)) {
+        else if (!xfs_dir3_block_verify(bp))
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-        }
+        if (bp->b_error)
+                xfs_verifier_error(bp);
 }
 static void
@@ -107,8 +108,8 @@ xfs_dir3_block_write_verify(
        struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
        if (!xfs_dir3_block_verify(bp)) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
                return;
        }
@@ -118,7 +119,7 @@ xfs_dir3_block_write_verify(
        if (bip)
                hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
+        xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
 }
 const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 70acff4ee173..afa4ad523f3f 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -241,7 +241,6 @@ static void
 xfs_dir3_data_reada_verify(
        struct xfs_buf          *bp)
 {
-        struct xfs_mount        *mp = bp->b_target->bt_mount;
        struct xfs_dir2_data_hdr *hdr = bp->b_addr;
        switch (hdr->magic) {
@@ -255,8 +254,8 @@ xfs_dir3_data_reada_verify(
                xfs_dir3_data_verify(bp);
                return;
        default:
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
                break;
        }
 }
@@ -267,13 +266,14 @@ xfs_dir3_data_read_verify(
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
-        if ((xfs_sb_version_hascrc(&mp->m_sb) &&
+        if (xfs_sb_version_hascrc(&mp->m_sb) &&
-             !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+             !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF))
-                                          XFS_DIR3_DATA_CRC_OFF)) ||
+                 xfs_buf_ioerror(bp, EFSBADCRC);
-            !xfs_dir3_data_verify(bp)) {
+        else if (!xfs_dir3_data_verify(bp))
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-        }
+        if (bp->b_error)
+                xfs_verifier_error(bp);
 }
 static void
@@ -285,8 +285,8 @@ xfs_dir3_data_write_verify(
        struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
        if (!xfs_dir3_data_verify(bp)) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
                return;
        }
@@ -296,7 +296,7 @@ xfs_dir3_data_write_verify(
        if (bip)
                hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF);
+        xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);
 }
 const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index ae47ec6e16c4..d36e97df1187 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -179,13 +179,14 @@ __read_verify(
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
-        if ((xfs_sb_version_hascrc(&mp->m_sb) &&
+        if (xfs_sb_version_hascrc(&mp->m_sb) &&
-             !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+             !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF))
-                                          XFS_DIR3_LEAF_CRC_OFF)) ||
+                xfs_buf_ioerror(bp, EFSBADCRC);
-            !xfs_dir3_leaf_verify(bp, magic)) {
+        else if (!xfs_dir3_leaf_verify(bp, magic))
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-        }
+        if (bp->b_error)
+                xfs_verifier_error(bp);
 }
 static void
@@ -198,8 +199,8 @@ __write_verify(
        struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;
        if (!xfs_dir3_leaf_verify(bp, magic)) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
                return;
        }
@@ -209,7 +210,7 @@ __write_verify(
        if (bip)
                hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn);
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF);
+        xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);
 }
 static void
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index 48c7d18f68c3..cb434d732681 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -115,13 +115,14 @@ xfs_dir3_free_read_verify(
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
-        if ((xfs_sb_version_hascrc(&mp->m_sb) &&
+        if (xfs_sb_version_hascrc(&mp->m_sb) &&
-             !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+            !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF))
-                                          XFS_DIR3_FREE_CRC_OFF)) ||
+                xfs_buf_ioerror(bp, EFSBADCRC);
-            !xfs_dir3_free_verify(bp)) {
+        else if (!xfs_dir3_free_verify(bp))
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-        }
+        if (bp->b_error)
+                xfs_verifier_error(bp);
 }
 static void
@@ -133,8 +134,8 @@ xfs_dir3_free_write_verify(
        struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
        if (!xfs_dir3_free_verify(bp)) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
                return;
        }
@@ -144,7 +145,7 @@ xfs_dir3_free_write_verify(
        if (bip)
                hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn);
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF);
+        xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF);
 }
 const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 7aeb4c895b32..868b19f096bf 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -615,7 +615,7 @@ xfs_qm_dqread(
        if (flags & XFS_QMOPT_DQALLOC) {
                tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
-                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_attrsetm,
+                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc,
                                          XFS_QM_DQALLOC_SPACE_RES(mp), 0);
                if (error)
                        goto error1;
diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c
index d401457d2f25..610da8177737 100644
--- a/fs/xfs/xfs_dquot_buf.c
+++ b/fs/xfs/xfs_dquot_buf.c
@@ -257,10 +257,13 @@ xfs_dquot_buf_read_verify(
 {
        struct xfs_mount        *mp = bp->b_target->bt_mount;
-        if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) {
+        if (!xfs_dquot_buf_verify_crc(mp, bp))
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+                xfs_buf_ioerror(bp, EFSBADCRC);
+        else if (!xfs_dquot_buf_verify(mp, bp))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-        }
+        if (bp->b_error)
+                xfs_verifier_error(bp);
 }
 /*
@@ -275,8 +278,8 @@ xfs_dquot_buf_write_verify(
        struct xfs_mount        *mp = bp->b_target->bt_mount;
        if (!xfs_dquot_buf_verify(mp, bp)) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
                return;
        }
 }
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 9995b807d627..edac5b057d28 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -156,7 +156,7 @@ xfs_error_report(
 {
        if (level <= xfs_error_level) {
                xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
-                "Internal error %s at line %d of file %s.  Caller 0x%p",
+                "Internal error %s at line %d of file %s.  Caller %pF",
                            tag, linenum, filename, ra);
                xfs_stack_trace();
@@ -178,3 +178,28 @@ xfs_corruption_error(
        xfs_error_report(tag, level, mp, filename, linenum, ra);
        xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
 }
+/*
+ * Warnings specifically for verifier errors.  Differentiate CRC vs. invalid
+ * values, and omit the stack trace unless the error level is tuned high.
+ */
+void
+xfs_verifier_error(
+        struct xfs_buf          *bp)
+{
+        struct xfs_mount *mp = bp->b_target->bt_mount;
+        xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
+                  bp->b_error == EFSBADCRC ? "CRC error" : "corruption",
+                  __return_address, bp->b_bn);
+        xfs_alert(mp, "Unmount and run xfs_repair");
+        if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
+                xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:");
+                xfs_hex_dump(xfs_buf_offset(bp, 0), 64);
+        }
+        if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
+                xfs_stack_trace();
+}
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 079a367f44ee..c1c57d4a4b5d 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -34,6 +34,7 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
 extern void xfs_corruption_error(const char *tag, int level,
                        struct xfs_mount *mp, void *p, const char *filename,
                        int linenum, inst_t *ra);
+extern void xfs_verifier_error(struct xfs_buf *bp);
 #define XFS_ERROR_REPORT(e, lvl, mp)    \
        xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 64b48eade91d..f7abff8c16ca 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -823,7 +823,8 @@ xfs_file_fallocate(
        if (!S_ISREG(inode->i_mode))
                return -EINVAL;
-        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+                     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
                return -EOPNOTSUPP;
        xfs_ilock(ip, XFS_IOLOCK_EXCL);
@@ -831,6 +832,20 @@ xfs_file_fallocate(
                error = xfs_free_file_space(ip, offset, len);
                if (error)
                        goto out_unlock;
+        } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
+                unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+                if (offset & blksize_mask || len & blksize_mask) {
+                        error = -EINVAL;
+                        goto out_unlock;
+                }
+                ASSERT(offset + len < i_size_read(inode));
+                new_size = i_size_read(inode) - len;
+                error = xfs_collapse_file_space(ip, offset, len);
+                if (error)
+                        goto out_unlock;
        } else {
                if (!(mode & FALLOC_FL_KEEP_SIZE) &&
                    offset + len > i_size_read(inode)) {
@@ -840,8 +855,11 @@ xfs_file_fallocate(
                                goto out_unlock;
                }
-                error = xfs_alloc_file_space(ip, offset, len,
+                if (mode & FALLOC_FL_ZERO_RANGE)
-                                             XFS_BMAPI_PREALLOC);
+                        error = xfs_zero_file_space(ip, offset, len);
+                else
+                        error = xfs_alloc_file_space(ip, offset, len,
+                                                     XFS_BMAPI_PREALLOC);
                if (error)
                        goto out_unlock;
        }
@@ -859,7 +877,7 @@ xfs_file_fallocate(
        if (ip->i_d.di_mode & S_IXGRP)
                ip->i_d.di_mode &= ~S_ISGID;
-        if (!(mode & FALLOC_FL_PUNCH_HOLE))
+        if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
                ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h
index b6ab5a3cfa12..9898f31d05d8 100644
--- a/fs/xfs/xfs_format.h
+++ b/fs/xfs/xfs_format.h
@@ -145,6 +145,8 @@ struct xfs_dsymlink_hdr {
        __be64  sl_lsn;
 };
+#define XFS_SYMLINK_CRC_OFF     offsetof(struct xfs_dsymlink_hdr, sl_crc)
 /*
 * The maximum pathlen is 1024 bytes. Since the minimum file system
 * blocksize is 512 bytes, we can get a max of 3 extents back from
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 5d7f105a1c82..8f711db61a0c 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -363,6 +363,18 @@ xfs_ialloc_ag_alloc(
                args.minleft = args.mp->m_in_maxlevels - 1;
                if ((error = xfs_alloc_vextent(&args)))
                        return error;
+                /*
+                 * This request might have dirtied the transaction if the AG can
+                 * satisfy the request, but the exact block was not available.
+                 * If the allocation did fail, subsequent requests will relax
+                 * the exact agbno requirement and increase the alignment
+                 * instead. It is critical that the total size of the request
+                 * (len + alignment + slop) does not increase from this point
+                 * on, so reset minalignslop to ensure it is not included in
+                 * subsequent requests.
+                 */
+                args.minalignslop = 0;
        } else
                args.fsbno = NULLFSBLOCK;
@@ -1568,18 +1580,17 @@ xfs_agi_read_verify(
        struct xfs_buf  *bp)
 {
        struct xfs_mount *mp = bp->b_target->bt_mount;
-        int             agi_ok = 1;
-        if (xfs_sb_version_hascrc(&mp->m_sb))
-                agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
-                                          offsetof(struct xfs_agi, agi_crc));
-        agi_ok = agi_ok && xfs_agi_verify(bp);
-        if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
+        if (xfs_sb_version_hascrc(&mp->m_sb) &&
-                        XFS_RANDOM_IALLOC_READ_AGI))) {
+            !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
+                xfs_buf_ioerror(bp, EFSBADCRC);
+        else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp,
+                                XFS_ERRTAG_IALLOC_READ_AGI,
+                                XFS_RANDOM_IALLOC_READ_AGI))
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-        }
+        if (bp->b_error)
+                xfs_verifier_error(bp);
 }
 static void
@@ -1590,8 +1601,8 @@ xfs_agi_write_verify(
        struct xfs_buf_log_item *bip = bp->b_fspriv;
        if (!xfs_agi_verify(bp)) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
                return;
        }
@@ -1600,8 +1611,7 @@ xfs_agi_write_verify(
        if (bip)
                XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+        xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF);
-                         offsetof(struct xfs_agi, agi_crc));
 }
 const struct xfs_buf_ops xfs_agi_buf_ops = {
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index c8fa5bbb36de..7e309b11e87d 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -243,12 +243,14 @@ static void
 xfs_inobt_read_verify(
        struct xfs_buf  *bp)
 {
-        if (!(xfs_btree_sblock_verify_crc(bp) &&
+        if (!xfs_btree_sblock_verify_crc(bp))
-              xfs_inobt_verify(bp))) {
+                xfs_buf_ioerror(bp, EFSBADCRC);
-                trace_xfs_btree_corrupt(bp, _RET_IP_);
+        else if (!xfs_inobt_verify(bp))
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                     bp->b_target->bt_mount, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+        if (bp->b_error) {
+                trace_xfs_btree_corrupt(bp, _RET_IP_);
+                xfs_verifier_error(bp);
        }
 }
@@ -258,9 +260,9 @@ xfs_inobt_write_verify(
 {
        if (!xfs_inobt_verify(bp)) {
                trace_xfs_btree_corrupt(bp, _RET_IP_);
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                     bp->b_target->bt_mount, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
+                return;
        }
        xfs_btree_sblock_calc_crc(bp);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 3a137e9f9a7d..5e7a38fa6ee6 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -42,7 +42,6 @@
 #include "xfs_bmap_util.h"
 #include "xfs_error.h"
 #include "xfs_quota.h"
-#include "xfs_dinode.h"
 #include "xfs_filestream.h"
 #include "xfs_cksum.h"
 #include "xfs_trace.h"
@@ -62,6 +61,8 @@ kmem_zone_t *xfs_inode_zone;
 STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
+STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *);
 /*
 * helper function to extract extent size hint from inode
 */
@@ -1115,7 +1116,7 @@ xfs_bumplink(
 {
        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
-        ASSERT(ip->i_d.di_nlink > 0);
+        ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE));
        ip->i_d.di_nlink++;
        inc_nlink(VFS_I(ip));
        if ((ip->i_d.di_version == 1) &&
@@ -1165,10 +1166,7 @@ xfs_create(
        if (XFS_FORCED_SHUTDOWN(mp))
                return XFS_ERROR(EIO);
-        if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+        prid = xfs_get_initial_prid(dp);
-                prid = xfs_get_projid(dp);
-        else
-                prid = XFS_PROJID_DEFAULT;
        /*
         * Make sure that we have allocated dquot(s) on disk.
@@ -1333,6 +1331,113 @@ xfs_create(
 }
 int
+xfs_create_tmpfile(
+        struct xfs_inode        *dp,
+        struct dentry           *dentry,
+        umode_t                 mode)
+{
+        struct xfs_mount        *mp = dp->i_mount;
+        struct xfs_inode        *ip = NULL;
+        struct xfs_trans        *tp = NULL;
+        int                     error;
+        uint                    cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
+        prid_t                  prid;
+        struct xfs_dquot        *udqp = NULL;
+        struct xfs_dquot        *gdqp = NULL;
+        struct xfs_dquot        *pdqp = NULL;
+        struct xfs_trans_res    *tres;
+        uint                    resblks;
+        if (XFS_FORCED_SHUTDOWN(mp))
+                return XFS_ERROR(EIO);
+        prid = xfs_get_initial_prid(dp);
+        /*
+         * Make sure that we have allocated dquot(s) on disk.
+         */
+        error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()),
+                                xfs_kgid_to_gid(current_fsgid()), prid,
+                                XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+                                &udqp, &gdqp, &pdqp);
+        if (error)
+                return error;
+        resblks = XFS_IALLOC_SPACE_RES(mp);
+        tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE);
+        tres = &M_RES(mp)->tr_create_tmpfile;
+        error = xfs_trans_reserve(tp, tres, resblks, 0);
+        if (error == ENOSPC) {
+                /* No space at all so try a "no-allocation" reservation */
+                resblks = 0;
+                error = xfs_trans_reserve(tp, tres, 0, 0);
+        }
+        if (error) {
+                cancel_flags = 0;
+                goto out_trans_cancel;
+        }
+        error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp,
+                                                pdqp, resblks, 1, 0);
+        if (error)
+                goto out_trans_cancel;
+        error = xfs_dir_ialloc(&tp, dp, mode, 1, 0,
+                                prid, resblks > 0, &ip, NULL);
+        if (error) {
+                if (error == ENOSPC)
+                        goto out_trans_cancel;
+                goto out_trans_abort;
+        }
+        if (mp->m_flags & XFS_MOUNT_WSYNC)
+                xfs_trans_set_sync(tp);
+        /*
+         * Attach the dquot(s) to the inodes and modify them incore.
+         * These ids of the inode couldn't have changed since the new
+         * inode has been locked ever since it was created.
+         */
+        xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
+        ip->i_d.di_nlink--;
+        d_tmpfile(dentry, VFS_I(ip));
+        error = xfs_iunlink(tp, ip);
+        if (error)
+                goto out_trans_abort;
+        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+        if (error)
+                goto out_release_inode;
+        xfs_qm_dqrele(udqp);
+        xfs_qm_dqrele(gdqp);
+        xfs_qm_dqrele(pdqp);
+        return 0;
+ out_trans_abort:
+        cancel_flags |= XFS_TRANS_ABORT;
+ out_trans_cancel:
+        xfs_trans_cancel(tp, cancel_flags);
+ out_release_inode:
+        /*
+         * Wait until after the current transaction is aborted to
+         * release the inode.  This prevents recursive transactions
+         * and deadlocks from xfs_inactive.
+         */
+        if (ip)
+                IRELE(ip);
+        xfs_qm_dqrele(udqp);
+        xfs_qm_dqrele(gdqp);
+        xfs_qm_dqrele(pdqp);
+        return error;
+}
+int
 xfs_link(
        xfs_inode_t             *tdp,
        xfs_inode_t             *sip,
@@ -1397,6 +1502,12 @@ xfs_link(
        xfs_bmap_init(&free_list, &first_block);
+        if (sip->i_d.di_nlink == 0) {
+                error = xfs_iunlink_remove(tp, sip);
+                if (error)
+                        goto abort_return;
+        }
        error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,
                                        &first_block, &free_list, resblks);
        if (error)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 65e2350f449c..396cc1fafd0d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -20,6 +20,7 @@
 #include "xfs_inode_buf.h"
 #include "xfs_inode_fork.h"
+#include "xfs_dinode.h"
 /*
 * Kernel only inode definitions
@@ -192,6 +193,15 @@ xfs_set_projid(struct xfs_inode *ip,
        ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);
 }
+static inline prid_t
+xfs_get_initial_prid(struct xfs_inode *dp)
+{
+        if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+                return xfs_get_projid(dp);
+        return XFS_PROJID_DEFAULT;
+}
 /*
 * In-core inode flags.
 */
@@ -323,6 +333,8 @@ int		xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
                           struct xfs_inode **ipp, struct xfs_name *ci_name);
 int             xfs_create(struct xfs_inode *dp, struct xfs_name *name,
                           umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp);
+int             xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry,
+                           umode_t mode);
 int             xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
                           struct xfs_inode *ip);
 int             xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c
index 4fc9f39dd89e..24e993996bdc 100644
--- a/fs/xfs/xfs_inode_buf.c
+++ b/fs/xfs/xfs_inode_buf.c
@@ -102,8 +102,7 @@ xfs_inode_buf_verify(
                        }
                        xfs_buf_ioerror(bp, EFSCORRUPTED);
-                        XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
+                        xfs_verifier_error(bp);
-                                             mp, dip);
 #ifdef DEBUG
                        xfs_alert(mp,
                                "bad inode magic/vsn daddr %lld #%d (magic=%x)",
@@ -306,7 +305,7 @@ xfs_dinode_verify(
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return false;
        if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize,
-                              offsetof(struct xfs_dinode, di_crc)))
+                              XFS_DINODE_CRC_OFF))
                return false;
        if (be64_to_cpu(dip->di_ino) != ip->i_ino)
                return false;
@@ -327,7 +326,7 @@ xfs_dinode_calc_crc(
        ASSERT(xfs_sb_version_hascrc(&mp->m_sb));
        crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize,
-                              offsetof(struct xfs_dinode, di_crc));
+                              XFS_DINODE_CRC_OFF);
        dip->di_crc = xfs_end_cksum(crc);
 }
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 22d1cbea283d..3b80ebae05f5 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -128,7 +128,6 @@ xfs_iomap_write_direct(
        xfs_fsblock_t   firstfsb;
        xfs_extlen_t    extsz, temp;
        int             nimaps;
-        int             bmapi_flag;
        int             quota_flag;
        int             rt;
        xfs_trans_t     *tp;
@@ -200,18 +199,15 @@ xfs_iomap_write_direct(
        xfs_trans_ijoin(tp, ip, 0);
-        bmapi_flag = 0;
-        if (offset < XFS_ISIZE(ip) || extsz)
-                bmapi_flag |= XFS_BMAPI_PREALLOC;
        /*
         * From this point onwards we overwrite the imap pointer that the
         * caller gave to us.
         */
        xfs_bmap_init(&free_list, &firstfsb);
        nimaps = 1;
-        error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag,
+        error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
-                                &firstfsb, 0, imap, &nimaps, &free_list);
+                                XFS_BMAPI_PREALLOC, &firstfsb, 0,
+                                imap, &nimaps, &free_list);
        if (error)
                goto out_bmap_cancel;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 9ddfb8190ca1..89b07e43ca28 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -39,6 +39,7 @@
 #include "xfs_da_btree.h"
 #include "xfs_dir2_priv.h"
 #include "xfs_dinode.h"
+#include "xfs_trans_space.h"
 #include <linux/capability.h>
 #include <linux/xattr.h>
@@ -48,6 +49,18 @@
 #include <linux/fiemap.h>
 #include <linux/slab.h>
+/*
+ * Directories have different lock order w.r.t. mmap_sem compared to regular
+ * files. This is due to readdir potentially triggering page faults on a user
+ * buffer inside filldir(), and this happens with the ilock on the directory
+ * held. For regular files, the lock order is the other way around - the
+ * mmap_sem is taken during the page fault, and then we lock the ilock to do
+ * block mapping. Hence we need a different class for the directory ilock so
+ * that lockdep can tell them apart.
+ */
+static struct lock_class_key xfs_nondir_ilock_class;
+static struct lock_class_key xfs_dir_ilock_class;
 static int
 xfs_initxattrs(
        struct inode            *inode,
@@ -1034,6 +1047,19 @@ xfs_vn_fiemap(
        return 0;
 }
+STATIC int
+xfs_vn_tmpfile(
+        struct inode    *dir,
+        struct dentry   *dentry,
+        umode_t         mode)
+{
+        int             error;
+        error = xfs_create_tmpfile(XFS_I(dir), dentry, mode);
+        return -error;
+}
 static const struct inode_operations xfs_inode_operations = {
        .get_acl                = xfs_get_acl,
        .set_acl                = xfs_set_acl,
@@ -1072,6 +1098,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
        .removexattr            = generic_removexattr,
        .listxattr              = xfs_vn_listxattr,
        .update_time            = xfs_vn_update_time,
+        .tmpfile                = xfs_vn_tmpfile,
 };
 static const struct inode_operations xfs_dir_ci_inode_operations = {
@@ -1099,6 +1126,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
        .removexattr            = generic_removexattr,
        .listxattr              = xfs_vn_listxattr,
        .update_time            = xfs_vn_update_time,
+        .tmpfile                = xfs_vn_tmpfile,
 };
 static const struct inode_operations xfs_symlink_inode_operations = {
@@ -1191,6 +1219,7 @@ xfs_setup_inode(
        xfs_diflags_to_iflags(inode, ip);
        ip->d_ops = ip->i_mount->m_nondir_inode_ops;
+        lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
        switch (inode->i_mode & S_IFMT) {
        case S_IFREG:
                inode->i_op = &xfs_inode_operations;
@@ -1198,6 +1227,7 @@ xfs_setup_inode(
                inode->i_mapping->a_ops = &xfs_address_space_operations;
                break;
        case S_IFDIR:
+                lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
                if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
                        inode->i_op = &xfs_dir_ci_inode_operations;
                else
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index f9bb590acc0e..825249d2dfc1 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -119,6 +119,7 @@ typedef __uint64_t __psunsigned_t;
 #include "xfs_iops.h"
 #include "xfs_aops.h"
 #include "xfs_super.h"
+#include "xfs_cksum.h"
 #include "xfs_buf.h"
 #include "xfs_message.h"
@@ -178,6 +179,7 @@ typedef __uint64_t __psunsigned_t;
 #define ENOATTR         ENODATA         /* Attribute not found */
 #define EWRONGFS        EINVAL          /* Mount with wrong filesystem type */
 #define EFSCORRUPTED    EUCLEAN         /* Filesystem is corrupted */
+#define EFSBADCRC       EBADMSG         /* Bad CRC detected */
 #define SYNCHRONIZE()   barrier()
 #define __return_address __builtin_return_address(0)
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index b0f4ef77fa70..2c4004475e71 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -175,7 +175,7 @@ void	  xlog_iodone(struct xfs_buf *);
 struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
 void      xfs_log_ticket_put(struct xlog_ticket *ticket);
-int     xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
+void    xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
                                xfs_lsn_t *commit_lsn, int flags);
 bool    xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 4ef6fdbced78..7e5455391176 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -499,13 +499,6 @@ xlog_cil_push(
        cil->xc_ctx = new_ctx;
        /*
-         * mirror the new sequence into the cil structure so that we can do
-         * unlocked checks against the current sequence in log forces without
-         * risking deferencing a freed context pointer.
-         */
-        cil->xc_current_sequence = new_ctx->sequence;
-        /*
         * The switch is now done, so we can drop the context lock and move out
         * of a shared context. We can't just go straight to the commit record,
         * though - we need to synchronise with previous and future commits so
@@ -523,8 +516,15 @@ xlog_cil_push(
         * Hence we need to add this context to the committing context list so
         * that higher sequences will wait for us to write out a commit record
         * before they do.
+         *
+         * xfs_log_force_lsn requires us to mirror the new sequence into the cil
+         * structure atomically with the addition of this sequence to the
+         * committing list. This also ensures that we can do unlocked checks
+         * against the current sequence in log forces without risking
+         * deferencing a freed context pointer.
         */
        spin_lock(&cil->xc_push_lock);
+        cil->xc_current_sequence = new_ctx->sequence;
        list_add(&ctx->committing, &cil->xc_committing);
        spin_unlock(&cil->xc_push_lock);
        up_write(&cil->xc_ctx_lock);
@@ -662,8 +662,14 @@ xlog_cil_push_background(
 }
+/*
+ * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence
+ * number that is passed. When it returns, the work will be queued for
+ * @push_seq, but it won't be completed. The caller is expected to do any
+ * waiting for push_seq to complete if it is required.
+ */
 static void
-xlog_cil_push_foreground(
+xlog_cil_push_now(
        struct xlog     *log,
        xfs_lsn_t       push_seq)
 {
@@ -688,10 +694,8 @@ xlog_cil_push_foreground(
        }
        cil->xc_push_seq = push_seq;
+        queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
        spin_unlock(&cil->xc_push_lock);
-        /* do the push now */
-        xlog_cil_push(log);
 }
 bool
@@ -721,7 +725,7 @@ xlog_cil_empty(
 * background commit, returns without it held once background commits are
 * allowed again.
 */
-int
+void
 xfs_log_commit_cil(
        struct xfs_mount        *mp,
        struct xfs_trans        *tp,
@@ -767,7 +771,6 @@ xfs_log_commit_cil(
        xlog_cil_push_background(log);
        up_read(&cil->xc_ctx_lock);
-        return 0;
 }
 /*
@@ -796,7 +799,8 @@ xlog_cil_force_lsn(
         * xlog_cil_push() handles racing pushes for the same sequence,
         * so no need to deal with it here.
         */
-        xlog_cil_push_foreground(log, sequence);
+restart:
+        xlog_cil_push_now(log, sequence);
        /*
         * See if we can find a previous sequence still committing.
@@ -804,7 +808,6 @@ xlog_cil_force_lsn(
         * before allowing the force of push_seq to go ahead. Hence block
         * on commits for those as well.
         */
-restart:
        spin_lock(&cil->xc_push_lock);
        list_for_each_entry(ctx, &cil->xc_committing, committing) {
                if (ctx->sequence > sequence)
@@ -822,6 +825,28 @@ restart:
                /* found it! */
                commit_lsn = ctx->commit_lsn;
        }
+        /*
+         * The call to xlog_cil_push_now() executes the push in the background.
+         * Hence by the time we have got here it our sequence may not have been
+         * pushed yet. This is true if the current sequence still matches the
+         * push sequence after the above wait loop and the CIL still contains
+         * dirty objects.
+         *
+         * When the push occurs, it will empty the CIL and
+         * atomically increment the currect sequence past the push sequence and
+         * move it into the committing list. Of course, if the CIL is clean at
+         * the time of the push, it won't have pushed the CIL at all, so in that
+         * case we should try the push for this sequence again from the start
+         * just in case.
+         */
+        if (sequence == cil->xc_current_sequence &&
+            !list_empty(&cil->xc_cil)) {
+                spin_unlock(&cil->xc_push_lock);
+                goto restart;
+        }
        spin_unlock(&cil->xc_push_lock);
        return commit_lsn;
 }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index f96c05669a9e..993cb19e7d39 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -314,6 +314,9 @@ reread:
                error = bp->b_error;
                if (loud)
                        xfs_warn(mp, "SB validate failed with error %d.", error);
+                /* bad CRC means corrupted metadata */
+                if (error == EFSBADCRC)
+                        error = EFSCORRUPTED;
                goto release_buf;
        }
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index a6a76b2b6a85..ec5ca65c6211 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -842,7 +842,7 @@ xfs_growfs_rt_alloc(
                /*
                 * Reserve space & log for one extent added to the file.
                 */
-                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata,
+                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc,
                                          resblks, 0);
                if (error)
                        goto error_cancel;
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c
index 1e116794bb66..0c0e41bbe4e3 100644
--- a/fs/xfs/xfs_sb.c
+++ b/fs/xfs/xfs_sb.c
@@ -288,6 +288,7 @@ xfs_mount_validate_sb(
            sbp->sb_inodelog < XFS_DINODE_MIN_LOG                       ||
            sbp->sb_inodelog > XFS_DINODE_MAX_LOG                       ||
            sbp->sb_inodesize != (1 << sbp->sb_inodelog)                ||
+            sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||
            (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)   ||
            (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)  ||
            (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)  ||
@@ -610,12 +611,11 @@ xfs_sb_read_verify(
                                                XFS_SB_VERSION_5) ||
             dsb->sb_crc != 0)) {
-                if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+                if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) {
-                                      offsetof(struct xfs_sb, sb_crc))) {
                        /* Only fail bad secondaries on a known V5 filesystem */
                        if (bp->b_bn == XFS_SB_DADDR ||
                            xfs_sb_version_hascrc(&mp->m_sb)) {
-                                error = EFSCORRUPTED;
+                                error = EFSBADCRC;
                                goto out_error;
                        }
                }
@@ -624,10 +624,9 @@ xfs_sb_read_verify(
 out_error:
        if (error) {
-                if (error == EFSCORRUPTED)
-                        XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                             mp, bp->b_addr);
                xfs_buf_ioerror(bp, error);
+                if (error == EFSCORRUPTED || error == EFSBADCRC)
+                        xfs_verifier_error(bp);
        }
 }
@@ -662,9 +661,8 @@ xfs_sb_write_verify(
        error = xfs_sb_verify(bp, false);
        if (error) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
-                                     mp, bp->b_addr);
                xfs_buf_ioerror(bp, error);
+                xfs_verifier_error(bp);
                return;
        }
@@ -674,8 +672,7 @@ xfs_sb_write_verify(
        if (bip)
                XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn);
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+        xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);
-                         offsetof(struct xfs_sb, sb_crc));
 }
 const struct xfs_buf_ops xfs_sb_buf_ops = {
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 35061d4b614c..f7b2fe77c5a5 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -182,6 +182,8 @@ typedef struct xfs_sb {
        /* must be padded to 64 bit alignment */
 } xfs_sb_t;
+#define XFS_SB_CRC_OFF          offsetof(struct xfs_sb, sb_crc)
 /*
 * Superblock - on disk version.  Must match the in core version above.
 * Must be padded to 64 bit alignment.
diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h
index 8c5035a13df1..4484e5151395 100644
--- a/fs/xfs/xfs_shared.h
+++ b/fs/xfs/xfs_shared.h
@@ -104,7 +104,8 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
 #define XFS_TRANS_SB_COUNT              41
 #define XFS_TRANS_CHECKPOINT            42
 #define XFS_TRANS_ICREATE               43
-#define XFS_TRANS_TYPE_MAX              43
+#define XFS_TRANS_CREATE_TMPFILE        44
+#define XFS_TRANS_TYPE_MAX              44
 /* new transaction types need to be reflected in xfs_logprint(8) */
 #define XFS_TRANS_TYPES \
@@ -112,6 +113,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops;
        { XFS_TRANS_SETATTR_SIZE,       "SETATTR_SIZE" }, \
        { XFS_TRANS_INACTIVE,           "INACTIVE" }, \
        { XFS_TRANS_CREATE,             "CREATE" }, \
+        { XFS_TRANS_CREATE_TMPFILE,     "CREATE_TMPFILE" }, \
        { XFS_TRANS_CREATE_TRUNC,       "CREATE_TRUNC" }, \
        { XFS_TRANS_TRUNCATE_FILE,      "TRUNCATE_FILE" }, \
        { XFS_TRANS_REMOVE,             "REMOVE" }, \
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 14e58f2c96bd..52979aa90986 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -80,6 +80,10 @@ xfs_readlink_bmap(
                if (error) {
                        xfs_buf_ioerror_alert(bp, __func__);
                        xfs_buf_relse(bp);
+                        /* bad CRC means corrupted metadata */
+                        if (error == EFSBADCRC)
+                                error = EFSCORRUPTED;
                        goto out;
                }
                byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
@@ -208,10 +212,7 @@ xfs_symlink(
                return XFS_ERROR(ENAMETOOLONG);
        udqp = gdqp = NULL;
-        if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
+        prid = xfs_get_initial_prid(dp);
-                prid = xfs_get_projid(dp);
-        else
-                prid = XFS_PROJID_DEFAULT;
        /*
         * Make sure that we have allocated dquot(s) on disk.
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c
index bf59a2b45f8c..9b32052ff65e 100644
--- a/fs/xfs/xfs_symlink_remote.c
+++ b/fs/xfs/xfs_symlink_remote.c
@@ -133,12 +133,13 @@ xfs_symlink_read_verify(
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return;
-        if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length),
+        if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF))
-                                  offsetof(struct xfs_dsymlink_hdr, sl_crc)) ||
+                xfs_buf_ioerror(bp, EFSBADCRC);
-            !xfs_symlink_verify(bp)) {
+        else if (!xfs_symlink_verify(bp))
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
-        }
+        if (bp->b_error)
+                xfs_verifier_error(bp);
 }
 static void
@@ -153,8 +154,8 @@ xfs_symlink_write_verify(
                return;
        if (!xfs_symlink_verify(bp)) {
-                XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);
                xfs_buf_ioerror(bp, EFSCORRUPTED);
+                xfs_verifier_error(bp);
                return;
        }
@@ -162,8 +163,7 @@ xfs_symlink_write_verify(
                struct xfs_dsymlink_hdr *dsl = bp->b_addr;
                dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
        }
-        xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length),
+        xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF);
-                         offsetof(struct xfs_dsymlink_hdr, sl_crc));
 }
 const struct xfs_buf_ops xfs_symlink_buf_ops = {
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 425dfa45b9a0..a4ae41c179a8 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink);
 DEFINE_INODE_EVENT(xfs_inactive_symlink);
 DEFINE_INODE_EVENT(xfs_alloc_file_space);
 DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_collapse_file_space);
 DEFINE_INODE_EVENT(xfs_readdir);
 #ifdef CONFIG_XFS_POSIX_ACL
 DEFINE_INODE_EVENT(xfs_get_acl);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c812c5c060de..54a57326d85b 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -887,12 +887,7 @@ xfs_trans_commit(
                xfs_trans_apply_sb_deltas(tp);
        xfs_trans_apply_dquot_deltas(tp);
-        error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
+        xfs_log_commit_cil(mp, tp, &commit_lsn, flags);
-        if (error == ENOMEM) {
-                xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
-                error = XFS_ERROR(EIO);
-                goto out_unreserve;
-        }
        current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
        xfs_trans_free(tp);
@@ -902,10 +897,7 @@ xfs_trans_commit(
         * log out now and wait for it.
         */
        if (sync) {
-                if (!error) {
+                error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
-                        error = _xfs_log_force_lsn(mp, commit_lsn,
-                                      XFS_LOG_SYNC, NULL);
-                }
                XFS_STATS_INC(xs_trans_sync);
        } else {
                XFS_STATS_INC(xs_trans_async);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 647b6f1d8923..b8eef0549f3f 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -275,6 +275,10 @@ xfs_trans_read_buf_map(
                        XFS_BUF_UNDONE(bp);
                        xfs_buf_stale(bp);
                        xfs_buf_relse(bp);
+                        /* bad CRC means corrupted metadata */
+                        if (error == EFSBADCRC)
+                                error = EFSCORRUPTED;
                        return error;
                }
 #ifdef DEBUG
@@ -338,6 +342,9 @@ xfs_trans_read_buf_map(
                                if (tp->t_flags & XFS_TRANS_DIRTY)
                                        xfs_force_shutdown(tp->t_mountp,
                                                        SHUTDOWN_META_IO_ERROR);
+                                /* bad CRC means corrupted metadata */
+                                if (error == EFSBADCRC)
+                                        error = EFSCORRUPTED;
                                return error;
                        }
                }
@@ -375,6 +382,10 @@ xfs_trans_read_buf_map(
                if (tp->t_flags & XFS_TRANS_DIRTY)
                        xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
                xfs_buf_relse(bp);
+                /* bad CRC means corrupted metadata */
+                if (error == EFSBADCRC)
+                        error = EFSCORRUPTED;
                return error;
        }
 #ifdef DEBUG
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c
index 2ffd3e331b49..ae368165244d 100644
--- a/fs/xfs/xfs_trans_resv.c
+++ b/fs/xfs/xfs_trans_resv.c
@@ -81,20 +81,28 @@ xfs_calc_buf_res(
 * on disk. Hence we need an inode reservation function that calculates all this
 * correctly. So, we log:
 *
- * - log op headers for object
+ * - 4 log op headers for object
+ *      - for the ilf, the inode core and 2 forks
 * - inode log format object
- * - the entire inode contents (core + 2 forks)
+ * - the inode core
- * - two bmap btree block headers
+ * - two inode forks containing bmap btree root blocks.
+ *      - the btree data contained by both forks will fit into the inode size,
+ *        hence when combined with the inode core above, we have a total of the
+ *        actual inode size.
+ *      - the BMBT headers need to be accounted separately, as they are
+ *        additional to the records and pointers that fit inside the inode
+ *        forks.
 */
 STATIC uint
 xfs_calc_inode_res(
        struct xfs_mount        *mp,
        uint                    ninodes)
 {
-        return ninodes * (sizeof(struct xlog_op_header) +
+        return ninodes *
-                          sizeof(struct xfs_inode_log_format) +
+                (4 * sizeof(struct xlog_op_header) +
-                          mp->m_sb.sb_inodesize +
+                 sizeof(struct xfs_inode_log_format) +
-                          2 * XFS_BMBT_BLOCK_LEN(mp));
+                 mp->m_sb.sb_inodesize +
+                 2 * XFS_BMBT_BLOCK_LEN(mp));
 }
 /*
@@ -204,6 +212,19 @@ xfs_calc_rename_reservation(
 }
 /*
+ * For removing an inode from unlinked list at first, we can modify:
+ *    the agi hash list and counters: sector size
+ *    the on disk inode before ours in the agi hash list: inode cluster size
+ */
+STATIC uint
+xfs_calc_iunlink_remove_reservation(
+        struct xfs_mount        *mp)
+{
+        return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+               max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size);
+}
+/*
 * For creating a link to an inode:
 *    the parent directory inode: inode size
 *    the linked inode: inode size
@@ -220,6 +241,7 @@ xfs_calc_link_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
+                xfs_calc_iunlink_remove_reservation(mp) +
                MAX((xfs_calc_inode_res(mp, 2) +
                     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
                                      XFS_FSB_TO_B(mp, 1))),
@@ -229,6 +251,18 @@ xfs_calc_link_reservation(
 }
 /*
+ * For adding an inode to unlinked list we can modify:
+ *    the agi hash list: sector size
+ *    the unlinked inode: inode size
+ */
+STATIC uint
+xfs_calc_iunlink_add_reservation(xfs_mount_t *mp)
+{
+        return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
+                xfs_calc_inode_res(mp, 1);
+}
+/*
 * For removing a directory entry we can modify:
 *    the parent directory inode: inode size
 *    the removed inode: inode size
@@ -245,10 +279,11 @@ xfs_calc_remove_reservation(
        struct xfs_mount        *mp)
 {
        return XFS_DQUOT_LOGRES(mp) +
-                MAX((xfs_calc_inode_res(mp, 2) +
+                xfs_calc_iunlink_add_reservation(mp) +
+                MAX((xfs_calc_inode_res(mp, 1) +
                     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),
                                      XFS_FSB_TO_B(mp, 1))),
-                    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+                    (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +
                     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),
                                      XFS_FSB_TO_B(mp, 1))));
 }
@@ -343,6 +378,20 @@ xfs_calc_create_reservation(
 }
+STATIC uint
+xfs_calc_create_tmpfile_reservation(
+        struct xfs_mount        *mp)
+{
+        uint    res = XFS_DQUOT_LOGRES(mp);
+        if (xfs_sb_version_hascrc(&mp->m_sb))
+                res += xfs_calc_icreate_resv_alloc(mp);
+        else
+                res += xfs_calc_create_resv_alloc(mp);
+        return res + xfs_calc_iunlink_add_reservation(mp);
+}
 /*
 * Making a new directory is the same as creating a new file.
 */
@@ -383,9 +432,9 @@ xfs_calc_ifree_reservation(
 {
        return XFS_DQUOT_LOGRES(mp) +
                xfs_calc_inode_res(mp, 1) +
-                xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +
+                xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +
                xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) +
-                max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size) +
+                xfs_calc_iunlink_remove_reservation(mp) +
                xfs_calc_buf_res(1, 0) +
                xfs_calc_buf_res(2 + mp->m_ialloc_blks +
                                 mp->m_in_maxlevels, 0) +
@@ -644,15 +693,14 @@ xfs_calc_qm_setqlim_reservation(
 /*
 * Allocating quota on disk if needed.
- *      the write transaction log space: M_RES(mp)->tr_write.tr_logres
+ *      the write transaction log space for quota file extent allocation
 *      the unit of quota allocation: one system block size
 */
 STATIC uint
 xfs_calc_qm_dqalloc_reservation(
        struct xfs_mount        *mp)
 {
-        ASSERT(M_RES(mp)->tr_write.tr_logres);
+        return xfs_calc_write_reservation(mp) +
-        return M_RES(mp)->tr_write.tr_logres +
                xfs_calc_buf_res(1,
                        XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
 }
@@ -729,6 +777,11 @@ xfs_trans_resv_calc(
        resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;
        resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
+        resp->tr_create_tmpfile.tr_logres =
+                        xfs_calc_create_tmpfile_reservation(mp);
+        resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT;
+        resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
        resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);
        resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;
        resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
@@ -784,7 +837,6 @@ xfs_trans_resv_calc(
        /* The following transaction are logged in logical format */
        resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);
        resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp);
-        resp->tr_swrite.tr_logres = xfs_calc_swrite_reservation(mp);
        resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);
        resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);
        resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp);
diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/xfs_trans_resv.h
index de7de9aaad8a..1097d14cd583 100644
--- a/fs/xfs/xfs_trans_resv.h
+++ b/fs/xfs/xfs_trans_resv.h
@@ -38,11 +38,11 @@ struct xfs_trans_resv {
        struct xfs_trans_res    tr_remove;      /* unlink trans */
        struct xfs_trans_res    tr_symlink;     /* symlink trans */
        struct xfs_trans_res    tr_create;      /* create trans */
+        struct xfs_trans_res    tr_create_tmpfile; /* create O_TMPFILE trans */
        struct xfs_trans_res    tr_mkdir;       /* mkdir trans */
        struct xfs_trans_res    tr_ifree;       /* inode free trans */
        struct xfs_trans_res    tr_ichange;     /* inode update trans */
        struct xfs_trans_res    tr_growdata;    /* fs data section grow trans */
-        struct xfs_trans_res    tr_swrite;      /* sync write inode trans */
        struct xfs_trans_res    tr_addafork;    /* add inode attr fork trans */
        struct xfs_trans_res    tr_writeid;     /* write setuid/setgid file */
        struct xfs_trans_res    tr_attrinval;   /* attr fork buffer
@@ -100,6 +100,7 @@ struct xfs_trans_resv {
 #define XFS_ITRUNCATE_LOG_COUNT         2
 #define XFS_INACTIVE_LOG_COUNT          2
 #define XFS_CREATE_LOG_COUNT            2
+#define XFS_CREATE_TMPFILE_LOG_COUNT    2
 #define XFS_MKDIR_LOG_COUNT             3
 #define XFS_SYMLINK_LOG_COUNT           3
 #define XFS_REMOVE_LOG_COUNT            2
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ea80f1cdff06..81048f9bc783 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2550,6 +2550,9 @@ enum {
        /* filesystem does not support filling holes */
        DIO_SKIP_HOLES  = 0x02,
+        /* filesystem can handle aio writes beyond i_size */
+        DIO_ASYNC_EXTEND = 0x04,
 };
 void dio_end_io(struct bio *bio, int error);
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h
index 990c4ccf8b61..d1197ae3723c 100644
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -5,5 +5,40 @@
 #define FALLOC_FL_PUNCH_HOLE    0x02 /* de-allocates range */
 #define FALLOC_FL_NO_HIDE_STALE 0x04 /* reserved codepoint */
+/*
+ * FALLOC_FL_COLLAPSE_RANGE is used to remove a range of a file
+ * without leaving a hole in the file. The contents of the file beyond
+ * the range being removed is appended to the start offset of the range
+ * being removed (i.e. the hole that was punched is "collapsed"),
+ * resulting in a file layout that looks like the range that was
+ * removed never existed. As such collapsing a range of a file changes
+ * the size of the file, reducing it by the same length of the range
+ * that has been removed by the operation.
+ *
+ * Different filesystems may implement different limitations on the
+ * granularity of the operation. Most will limit operations to
+ * filesystem block size boundaries, but this boundary may be larger or
+ * smaller depending on the filesystem and/or the configuration of the
+ * filesystem or file.
+ *
+ * Attempting to collapse a range that crosses the end of the file is
+ * considered an illegal operation - just use ftruncate(2) if you need
+ * to collapse a range that crosses EOF.
+ */
+#define FALLOC_FL_COLLAPSE_RANGE        0x08
+/*
+ * FALLOC_FL_ZERO_RANGE is used to convert a range of file to zeros preferably
+ * without issuing data IO. Blocks should be preallocated for the regions that
+ * span holes in the file, and the entire range is preferable converted to
+ * unwritten extents - even though file system may choose to zero out the
+ * extent or do whatever which will result in reading zeros from the range
+ * while the range remains allocated for the file.
+ *
+ * This can be also used to preallocate blocks past EOF in the same way as
+ * with fallocate. Flag FALLOC_FL_KEEP_SIZE should cause the inode
+ * size to remain the same.
+ */
+#define FALLOC_FL_ZERO_RANGE            0x10
 #endif /* _UAPI_FALLOC_H_ */