Merge branch 'xfs-4.8-iomap-write' into for-next

author: Dave Chinner <david@fromorbit.com> 2016-06-20 20:10:38 -0400
committer: Dave Chinner <david@fromorbit.com> 2016-06-20 20:10:38 -0400
commit: 9b7fad20760b8f47730f0353459dd39a89c415b9 (patch)
tree: a0465f6d4bb6dceaf3a6412e1c30b31096fdf712
parent: 07931b7be70916055b882c6a379a3016f5772681 (diff)
parent: 3c2bdc912a1cc050db7e858aabe564cb382c9c30 (diff)
10 files changed, 367 insertions, 776 deletions
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 5d47b4df61ea..35faf128f36d 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -4,6 +4,7 @@ config XFS_FS
        depends on (64BIT || LBDAF)
        select EXPORTFS
        select LIBCRC32C
+        select FS_IOMAP
        help
          XFS is a high performance journaling filesystem which originated
          on the SGI IRIX platform.  It is completely multi-threaded, can
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4c463b99fe57..80714ebd54c0 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1143,6 +1143,8 @@ __xfs_get_blocks(
        ssize_t                 size;
        int                     new = 0;
+        BUG_ON(create && !direct);
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
@@ -1150,22 +1152,14 @@ __xfs_get_blocks(
        ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
        size = bh_result->b_size;
-        if (!create && direct && offset >= i_size_read(inode))
+        if (!create && offset >= i_size_read(inode))
                return 0;
        /*
         * Direct I/O is usually done on preallocated files, so try getting
-         * a block mapping without an exclusive lock first.  For buffered
+         * a block mapping without an exclusive lock first.
-         * writes we already have the exclusive iolock anyway, so avoiding
-         * a lock roundtrip here by taking the ilock exclusive from the
-         * beginning is a useful micro optimization.
         */
-        if (create && !direct) {
+        lockmode = xfs_ilock_data_map_shared(ip);
-                lockmode = XFS_ILOCK_EXCL;
-                xfs_ilock(ip, lockmode);
-        } else {
-                lockmode = xfs_ilock_data_map_shared(ip);
-        }
        ASSERT(offset <= mp->m_super->s_maxbytes);
        if (offset + size > mp->m_super->s_maxbytes)
@@ -1184,37 +1178,19 @@ __xfs_get_blocks(
             (imap.br_startblock == HOLESTARTBLOCK ||
              imap.br_startblock == DELAYSTARTBLOCK) ||
             (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
-                if (direct || xfs_get_extsz_hint(ip)) {
+                /*
-                        /*
+                 * xfs_iomap_write_direct() expects the shared lock. It
-                         * xfs_iomap_write_direct() expects the shared lock. It
+                 * is unlocked on return.
-                         * is unlocked on return.
+                 */
-                         */
+                if (lockmode == XFS_ILOCK_EXCL)
-                        if (lockmode == XFS_ILOCK_EXCL)
+                        xfs_ilock_demote(ip, lockmode);
-                                xfs_ilock_demote(ip, lockmode);
-                        error = xfs_iomap_write_direct(ip, offset, size,
-                                                       &imap, nimaps);
-                        if (error)
-                                return error;
-                        new = 1;
-                } else {
+                error = xfs_iomap_write_direct(ip, offset, size,
-                        /*
+                                               &imap, nimaps);
-                         * Delalloc reservations do not require a transaction,
+                if (error)
-                         * we can go on without dropping the lock here. If we
+                        return error;
-                         * are allocating a new delalloc block, make sure that
+                new = 1;
-                         * we set the new flag so that we mark the buffer new so
-                         * that we know that it is newly allocated if the write
-                         * fails.
-                         */
-                        if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
-                                new = 1;
-                        error = xfs_iomap_write_delay(ip, offset, size, &imap);
-                        if (error)
-                                goto out_unlock;
-                        xfs_iunlock(ip, lockmode);
-                }
                trace_xfs_get_blocks_alloc(ip, offset, size,
                                ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
                                                   : XFS_IO_DELALLOC, &imap);
@@ -1235,9 +1211,7 @@ __xfs_get_blocks(
        }
        /* trim mapping down to size requested */
-        if (direct || size > (1 << inode->i_blkbits))
+        xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
-                xfs_map_trim_size(inode, iblock, bh_result,
-                                  &imap, offset, size);
        /*
         * For unwritten extents do not report a disk address in the buffered
@@ -1250,7 +1224,7 @@ __xfs_get_blocks(
                if (ISUNWRITTEN(&imap))
                        set_buffer_unwritten(bh_result);
                /* direct IO needs special help */
-                if (create && direct) {
+                if (create) {
                        if (dax_fault)
                                ASSERT(!ISUNWRITTEN(&imap));
                        else
@@ -1279,14 +1253,7 @@ __xfs_get_blocks(
             (new || ISUNWRITTEN(&imap))))
                set_buffer_new(bh_result);
-        if (imap.br_startblock == DELAYSTARTBLOCK) {
+        BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
-                BUG_ON(direct);
-                if (create) {
-                        set_buffer_uptodate(bh_result);
-                        set_buffer_mapped(bh_result);
-                        set_buffer_delay(bh_result);
-                }
-        }
        return 0;
@@ -1427,216 +1394,6 @@ xfs_vm_direct_IO(
                        xfs_get_blocks_direct, endio, NULL, flags);
 }
-/*
- * Punch out the delalloc blocks we have already allocated.
- *
- * Don't bother with xfs_setattr given that nothing can have made it to disk yet
- * as the page is still locked at this point.
- */
-STATIC void
-xfs_vm_kill_delalloc_range(
-        struct inode            *inode,
-        loff_t                  start,
-        loff_t                  end)
-{
-        struct xfs_inode        *ip = XFS_I(inode);
-        xfs_fileoff_t           start_fsb;
-        xfs_fileoff_t           end_fsb;
-        int                     error;
-        start_fsb = XFS_B_TO_FSB(ip->i_mount, start);
-        end_fsb = XFS_B_TO_FSB(ip->i_mount, end);
-        if (end_fsb <= start_fsb)
-                return;
-        xfs_ilock(ip, XFS_ILOCK_EXCL);
-        error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
-                                                end_fsb - start_fsb);
-        if (error) {
-                /* something screwed, just bail */
-                if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-                        xfs_alert(ip->i_mount,
-                "xfs_vm_write_failed: unable to clean up ino %lld",
-                                        ip->i_ino);
-                }
-        }
-        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-}
-STATIC void
-xfs_vm_write_failed(
-        struct inode            *inode,
-        struct page             *page,
-        loff_t                  pos,
-        unsigned                len)
-{
-        loff_t                  block_offset;
-        loff_t                  block_start;
-        loff_t                  block_end;
-        loff_t                  from = pos & (PAGE_SIZE - 1);
-        loff_t                  to = from + len;
-        struct buffer_head      *bh, *head;
-        struct xfs_mount        *mp = XFS_I(inode)->i_mount;
-        /*
-         * The request pos offset might be 32 or 64 bit, this is all fine
-         * on 64-bit platform.  However, for 64-bit pos request on 32-bit
-         * platform, the high 32-bit will be masked off if we evaluate the
-         * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is
-         * 0xfffff000 as an unsigned long, hence the result is incorrect
-         * which could cause the following ASSERT failed in most cases.
-         * In order to avoid this, we can evaluate the block_offset of the
-         * start of the page by using shifts rather than masks the mismatch
-         * problem.
-         */
-        block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT;
-        ASSERT(block_offset + from == pos);
-        head = page_buffers(page);
-        block_start = 0;
-        for (bh = head; bh != head || !block_start;
-             bh = bh->b_this_page, block_start = block_end,
-                                   block_offset += bh->b_size) {
-                block_end = block_start + bh->b_size;
-                /* skip buffers before the write */
-                if (block_end <= from)
-                        continue;
-                /* if the buffer is after the write, we're done */
-                if (block_start >= to)
-                        break;
-                /*
-                 * Process delalloc and unwritten buffers beyond EOF. We can
-                 * encounter unwritten buffers in the event that a file has
-                 * post-EOF unwritten extents and an extending write happens to
-                 * fail (e.g., an unaligned write that also involves a delalloc
-                 * to the same page).
-                 */
-                if (!buffer_delay(bh) && !buffer_unwritten(bh))
-                        continue;
-                if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
-                    block_offset < i_size_read(inode))
-                        continue;
-                if (buffer_delay(bh))
-                        xfs_vm_kill_delalloc_range(inode, block_offset,
-                                                   block_offset + bh->b_size);
-                /*
-                 * This buffer does not contain data anymore. make sure anyone
-                 * who finds it knows that for certain.
-                 */
-                clear_buffer_delay(bh);
-                clear_buffer_uptodate(bh);
-                clear_buffer_mapped(bh);
-                clear_buffer_new(bh);
-                clear_buffer_dirty(bh);
-                clear_buffer_unwritten(bh);
-        }
-}
-/*
- * This used to call block_write_begin(), but it unlocks and releases the page
- * on error, and we need that page to be able to punch stale delalloc blocks out
- * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at
- * the appropriate point.
- */
-STATIC int
-xfs_vm_write_begin(
-        struct file             *file,
-        struct address_space    *mapping,
-        loff_t                  pos,
-        unsigned                len,
-        unsigned                flags,
-        struct page             **pagep,
-        void                    **fsdata)
-{
-        pgoff_t                 index = pos >> PAGE_SHIFT;
-        struct page             *page;
-        int                     status;
-        struct xfs_mount        *mp = XFS_I(mapping->host)->i_mount;
-        ASSERT(len <= PAGE_SIZE);
-        page = grab_cache_page_write_begin(mapping, index, flags);
-        if (!page)
-                return -ENOMEM;
-        status = __block_write_begin(page, pos, len, xfs_get_blocks);
-        if (xfs_mp_fail_writes(mp))
-                status = -EIO;
-        if (unlikely(status)) {
-                struct inode    *inode = mapping->host;
-                size_t          isize = i_size_read(inode);
-                xfs_vm_write_failed(inode, page, pos, len);
-                unlock_page(page);
-                /*
-                 * If the write is beyond EOF, we only want to kill blocks
-                 * allocated in this write, not blocks that were previously
-                 * written successfully.
-                 */
-                if (xfs_mp_fail_writes(mp))
-                        isize = 0;
-                if (pos + len > isize) {
-                        ssize_t start = max_t(ssize_t, pos, isize);
-                        truncate_pagecache_range(inode, start, pos + len);
-                }
-                put_page(page);
-                page = NULL;
-        }
-        *pagep = page;
-        return status;
-}
-/*
- * On failure, we only need to kill delalloc blocks beyond EOF in the range of
- * this specific write because they will never be written. Previous writes
- * beyond EOF where block allocation succeeded do not need to be trashed, so
- * only new blocks from this write should be trashed. For blocks within
- * EOF, generic_write_end() zeros them so they are safe to leave alone and be
- * written with all the other valid data.
- */
-STATIC int
-xfs_vm_write_end(
-        struct file             *file,
-        struct address_space    *mapping,
-        loff_t                  pos,
-        unsigned                len,
-        unsigned                copied,
-        struct page             *page,
-        void                    *fsdata)
-{
-        int                     ret;
-        ASSERT(len <= PAGE_SIZE);
-        ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
-        if (unlikely(ret < len)) {
-                struct inode    *inode = mapping->host;
-                size_t          isize = i_size_read(inode);
-                loff_t          to = pos + len;
-                if (to > isize) {
-                        /* only kill blocks in this write beyond EOF */
-                        if (pos > isize)
-                                isize = pos;
-                        xfs_vm_kill_delalloc_range(inode, isize, to);
-                        truncate_pagecache_range(inode, isize, to);
-                }
-        }
-        return ret;
-}
 STATIC sector_t
 xfs_vm_bmap(
        struct address_space    *mapping,
@@ -1747,8 +1504,6 @@ const struct address_space_operations xfs_address_space_operations = {
        .set_page_dirty         = xfs_vm_set_page_dirty,
        .releasepage            = xfs_vm_releasepage,
        .invalidatepage         = xfs_vm_invalidatepage,
-        .write_begin            = xfs_vm_write_begin,
-        .write_end              = xfs_vm_write_end,
        .bmap                   = xfs_vm_bmap,
        .direct_IO              = xfs_vm_direct_IO,
        .migratepage            = buffer_migrate_page,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 28c42fb0c12a..91bee2db3207 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1087,99 +1087,120 @@ error1:	/* Just cancel transaction */
        return error;
 }
-/*
+static int
- * Zero file bytes between startoff and endoff inclusive.
+xfs_unmap_extent(
- * The iolock is held exclusive and no blocks are buffered.
+        struct xfs_inode        *ip,
- *
+        xfs_fileoff_t           startoffset_fsb,
- * This function is used by xfs_free_file_space() to zero
+        xfs_filblks_t           len_fsb,
- * partial blocks when the range to free is not block aligned.
+        int                     *done)
- * When unreserving space with boundaries that are not block
- * aligned we round up the start and round down the end
- * boundaries and then use this function to zero the parts of
- * the blocks that got dropped during the rounding.
- */
-STATIC int
-xfs_zero_remaining_bytes(
-        xfs_inode_t             *ip,
-        xfs_off_t               startoff,
-        xfs_off_t               endoff)
 {
-        xfs_bmbt_irec_t         imap;
+        struct xfs_mount        *mp = ip->i_mount;
-        xfs_fileoff_t           offset_fsb;
+        struct xfs_trans        *tp;
-        xfs_off_t               lastoffset;
+        struct xfs_bmap_free    free_list;
-        xfs_off_t               offset;
+        xfs_fsblock_t           firstfsb;
-        xfs_buf_t               *bp;
+        uint                    resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
-        xfs_mount_t             *mp = ip->i_mount;
+        int                     error;
-        int                     nimap;
-        int                     error = 0;
-        /*
+        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
-         * Avoid doing I/O beyond eof - it's not necessary
+        if (error) {
-         * since nothing can read beyond eof.  The space will
+                ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
-         * be zeroed when the file is extended anyway.
+                return error;
-         */
+        }
-        if (startoff >= XFS_ISIZE(ip))
-                return 0;
-        if (endoff > XFS_ISIZE(ip))
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
-                endoff = XFS_ISIZE(ip);
+        error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot,
+                        ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS);
+        if (error)
+                goto out_trans_cancel;
-        for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
+        xfs_trans_ijoin(tp, ip, 0);
-                uint lock_mode;
-                offset_fsb = XFS_B_TO_FSBT(mp, offset);
+        xfs_bmap_init(&free_list, &firstfsb);
-                nimap = 1;
+        error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb,
+                        &free_list, done);
+        if (error)
+                goto out_bmap_cancel;
-                lock_mode = xfs_ilock_data_map_shared(ip);
+        error = xfs_bmap_finish(&tp, &free_list, NULL);
-                error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0);
+        if (error)
-                xfs_iunlock(ip, lock_mode);
+                goto out_bmap_cancel;
-                if (error || nimap < 1)
+        error = xfs_trans_commit(tp);
-                        break;
+out_unlock:
-                ASSERT(imap.br_blockcount >= 1);
+        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                ASSERT(imap.br_startoff == offset_fsb);
+        return error;
-                ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
-                if (imap.br_startblock == HOLESTARTBLOCK ||
+out_bmap_cancel:
-                    imap.br_state == XFS_EXT_UNWRITTEN) {
+        xfs_bmap_cancel(&free_list);
-                        /* skip the entire extent */
+out_trans_cancel:
-                        lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff +
+        xfs_trans_cancel(tp);
-                                                      imap.br_blockcount) - 1;
+        goto out_unlock;
-                        continue;
+}
-                }
-                lastoffset = XFS_FSB_TO_B(mp, imap.br_startoff + 1) - 1;
+static int
-                if (lastoffset > endoff)
+xfs_adjust_extent_unmap_boundaries(
-                        lastoffset = endoff;
+        struct xfs_inode        *ip,
+        xfs_fileoff_t           *startoffset_fsb,
+        xfs_fileoff_t           *endoffset_fsb)
+{
+        struct xfs_mount        *mp = ip->i_mount;
+        struct xfs_bmbt_irec    imap;
+        int                     nimap, error;
+        xfs_extlen_t            mod = 0;
-                /* DAX can just zero the backing device directly */
+        nimap = 1;
-                if (IS_DAX(VFS_I(ip))) {
+        error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0);
-                        error = dax_zero_page_range(VFS_I(ip), offset,
+        if (error)
-                                                    lastoffset - offset + 1,
+                return error;
-                                                    xfs_get_blocks_direct);
-                        if (error)
-                                return error;
-                        continue;
-                }
-                error = xfs_buf_read_uncached(XFS_IS_REALTIME_INODE(ip) ?
+        if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
-                                mp->m_rtdev_targp : mp->m_ddev_targp,
+                xfs_daddr_t     block;
-                                xfs_fsb_to_db(ip, imap.br_startblock),
-                                BTOBB(mp->m_sb.sb_blocksize),
-                                0, &bp, NULL);
-                if (error)
-                        return error;
-                memset(bp->b_addr +
+                ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
-                                (offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
+                block = imap.br_startblock;
-                       0, lastoffset - offset + 1);
+                mod = do_div(block, mp->m_sb.sb_rextsize);
+                if (mod)
+                        *startoffset_fsb += mp->m_sb.sb_rextsize - mod;
+        }
-                error = xfs_bwrite(bp);
+        nimap = 1;
-                xfs_buf_relse(bp);
+        error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0);
-                if (error)
+        if (error)
-                        return error;
+                return error;
+        if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
+                ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
+                mod++;
+                if (mod && mod != mp->m_sb.sb_rextsize)
+                        *endoffset_fsb -= mod;
        }
-        return error;
+        return 0;
+}
+static int
+xfs_flush_unmap_range(
+        struct xfs_inode        *ip,
+        xfs_off_t               offset,
+        xfs_off_t               len)
+{
+        struct xfs_mount        *mp = ip->i_mount;
+        struct inode            *inode = VFS_I(ip);
+        xfs_off_t               rounding, start, end;
+        int                     error;
+        /* wait for the completion of any pending DIOs */
+        inode_dio_wait(inode);
+        rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
+        start = round_down(offset, rounding);
+        end = round_up(offset + len, rounding) - 1;
+        error = filemap_write_and_wait_range(inode->i_mapping, start, end);
+        if (error)
+                return error;
+        truncate_pagecache_range(inode, start, end);
+        return 0;
 }
 int
@@ -1188,24 +1209,10 @@ xfs_free_file_space(
        xfs_off_t               offset,
        xfs_off_t               len)
 {
-        int                     done;
+        struct xfs_mount        *mp = ip->i_mount;
-        xfs_fileoff_t           endoffset_fsb;
-        int                     error;
-        xfs_fsblock_t           firstfsb;
-        xfs_bmap_free_t         free_list;
-        xfs_bmbt_irec_t         imap;
-        xfs_off_t               ioffset;
-        xfs_off_t               iendoffset;
-        xfs_extlen_t            mod=0;
-        xfs_mount_t             *mp;
-        int                     nimap;
-        uint                    resblks;
-        xfs_off_t               rounding;
-        int                     rt;
        xfs_fileoff_t           startoffset_fsb;
-        xfs_trans_t             *tp;
+        xfs_fileoff_t           endoffset_fsb;
+        int                     done = 0, error;
-        mp = ip->i_mount;
        trace_xfs_free_file_space(ip);
@@ -1213,135 +1220,45 @@ xfs_free_file_space(
        if (error)
                return error;
-        error = 0;
        if (len <= 0)   /* if nothing being freed */
-                return error;
+                return 0;
-        rt = XFS_IS_REALTIME_INODE(ip);
-        startoffset_fsb = XFS_B_TO_FSB(mp, offset);
-        endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
-        /* wait for the completion of any pending DIOs */
-        inode_dio_wait(VFS_I(ip));
-        rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
+        error = xfs_flush_unmap_range(ip, offset, len);
-        ioffset = round_down(offset, rounding);
-        iendoffset = round_up(offset + len, rounding) - 1;
-        error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset,
-                                             iendoffset);
        if (error)
-                goto out;
+                return error;
-        truncate_pagecache_range(VFS_I(ip), ioffset, iendoffset);
+        startoffset_fsb = XFS_B_TO_FSB(mp, offset);
+        endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
        /*
-         * Need to zero the stuff we're not freeing, on disk.
+         * Need to zero the stuff we're not freeing, on disk.  If it's a RT file
-         * If it's a realtime file & can't use unwritten extents then we
+         * and we can't use unwritten extents then we actually need to ensure
-         * actually need to zero the extent edges.  Otherwise xfs_bunmapi
+         * to zero the whole extent, otherwise we just need to take of block
-         * will take care of it for us.
+         * boundaries, and xfs_bunmapi will handle the rest.
         */
-        if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
+        if (XFS_IS_REALTIME_INODE(ip) &&
-                nimap = 1;
+            !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
-                error = xfs_bmapi_read(ip, startoffset_fsb, 1,
+                error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb,
-                                        &imap, &nimap, 0);
+                                &endoffset_fsb);
                if (error)
-                        goto out;
+                        return error;
-                ASSERT(nimap == 0 || nimap == 1);
-                if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
-                        xfs_daddr_t     block;
-                        ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
-                        block = imap.br_startblock;
-                        mod = do_div(block, mp->m_sb.sb_rextsize);
-                        if (mod)
-                                startoffset_fsb += mp->m_sb.sb_rextsize - mod;
-                }
-                nimap = 1;
-                error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
-                                        &imap, &nimap, 0);
-                if (error)
-                        goto out;
-                ASSERT(nimap == 0 || nimap == 1);
-                if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
-                        ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
-                        mod++;
-                        if (mod && (mod != mp->m_sb.sb_rextsize))
-                                endoffset_fsb -= mod;
-                }
-        }
-        if ((done = (endoffset_fsb <= startoffset_fsb)))
-                /*
-                 * One contiguous piece to clear
-                 */
-                error = xfs_zero_remaining_bytes(ip, offset, offset + len - 1);
-        else {
-                /*
-                 * Some full blocks, possibly two pieces to clear
-                 */
-                if (offset < XFS_FSB_TO_B(mp, startoffset_fsb))
-                        error = xfs_zero_remaining_bytes(ip, offset,
-                                XFS_FSB_TO_B(mp, startoffset_fsb) - 1);
-                if (!error &&
-                    XFS_FSB_TO_B(mp, endoffset_fsb) < offset + len)
-                        error = xfs_zero_remaining_bytes(ip,
-                                XFS_FSB_TO_B(mp, endoffset_fsb),
-                                offset + len - 1);
        }
-        /*
+        if (endoffset_fsb > startoffset_fsb) {
-         * free file space until done or until there is an error
+                while (!done) {
-         */
+                        error = xfs_unmap_extent(ip, startoffset_fsb,
-        resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
+                                        endoffset_fsb - startoffset_fsb, &done);
-        while (!error && !done) {
+                        if (error)
+                                return error;
-                /*
-                 * allocate and setup the transaction. Allow this
-                 * transaction to dip into the reserve blocks to ensure
-                 * the freeing of the space succeeds at ENOSPC.
-                 */
-                error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0,
-                                &tp);
-                if (error) {
-                        ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
-                        break;
                }
-                xfs_ilock(ip, XFS_ILOCK_EXCL);
-                error = xfs_trans_reserve_quota(tp, mp,
-                                ip->i_udquot, ip->i_gdquot, ip->i_pdquot,
-                                resblks, 0, XFS_QMOPT_RES_REGBLKS);
-                if (error)
-                        goto error1;
-                xfs_trans_ijoin(tp, ip, 0);
-                /*
-                 * issue the bunmapi() call to free the blocks
-                 */
-                xfs_bmap_init(&free_list, &firstfsb);
-                error = xfs_bunmapi(tp, ip, startoffset_fsb,
-                                  endoffset_fsb - startoffset_fsb,
-                                  0, 2, &firstfsb, &free_list, &done);
-                if (error)
-                        goto error0;
-                /*
-                 * complete the transaction
-                 */
-                error = xfs_bmap_finish(&tp, &free_list, NULL);
-                if (error)
-                        goto error0;
-                error = xfs_trans_commit(tp);
-                xfs_iunlock(ip, XFS_ILOCK_EXCL);
        }
- out:
+        /*
-        return error;
+         * Now that we've unmap all full blocks we'll have to zero out any
+         * partial block at the beginning and/or end.  xfs_zero_range is
- error0:
+         * smart enough to skip any holes, including those we just created.
-        xfs_bmap_cancel(&free_list);
+         */
- error1:
+        return xfs_zero_range(ip, offset, len, NULL);
-        xfs_trans_cancel(tp);
-        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-        goto out;
 }
 /*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 47fc63295422..713991c22781 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -37,6 +37,7 @@
 #include "xfs_log.h"
 #include "xfs_icache.h"
 #include "xfs_pnfs.h"
+#include "xfs_iomap.h"
 #include <linux/dcache.h>
 #include <linux/falloc.h>
@@ -80,61 +81,17 @@ xfs_rw_ilock_demote(
 }
 /*
- * xfs_iozero clears the specified range supplied via the page cache (except in
+ * Clear the specified ranges to zero through either the pagecache or DAX.
- * the DAX case). Writes through the page cache will allocate blocks over holes,
+ * Holes and unwritten extents will be left as-is as they already are zeroed.
- * though the callers usually map the holes first and avoid them. If a block is
- * not completely zeroed, then it will be read from disk before being partially
- * zeroed.
- *
- * In the DAX case, we can just directly write to the underlying pages. This
- * will not allocate blocks, but will avoid holes and unwritten extents and so
- * not do unnecessary work.
 */
 int
-xfs_iozero(
+xfs_zero_range(
-        struct xfs_inode        *ip,    /* inode                        */
+        struct xfs_inode        *ip,
-        loff_t                  pos,    /* offset in file               */
+        xfs_off_t               pos,
-        size_t                  count)  /* size of data to zero         */
+        xfs_off_t               count,
+        bool                    *did_zero)
 {
-        struct page             *page;
+        return iomap_zero_range(VFS_I(ip), pos, count, NULL, &xfs_iomap_ops);
-        struct address_space    *mapping;
-        int                     status = 0;
-        mapping = VFS_I(ip)->i_mapping;
-        do {
-                unsigned offset, bytes;
-                void *fsdata;
-                offset = (pos & (PAGE_SIZE -1)); /* Within page */
-                bytes = PAGE_SIZE - offset;
-                if (bytes > count)
-                        bytes = count;
-                if (IS_DAX(VFS_I(ip))) {
-                        status = dax_zero_page_range(VFS_I(ip), pos, bytes,
-                                                     xfs_get_blocks_direct);
-                        if (status)
-                                break;
-                } else {
-                        status = pagecache_write_begin(NULL, mapping, pos, bytes,
-                                                AOP_FLAG_UNINTERRUPTIBLE,
-                                                &page, &fsdata);
-                        if (status)
-                                break;
-                        zero_user(page, offset, bytes);
-                        status = pagecache_write_end(NULL, mapping, pos, bytes,
-                                                bytes, page, fsdata);
-                        WARN_ON(status <= 0); /* can't return less than zero! */
-                        status = 0;
-                }
-                pos += bytes;
-                count -= bytes;
-        } while (count);
-        return status;
 }
 int
@@ -424,49 +381,6 @@ out:
 }
 /*
- * This routine is called to handle zeroing any space in the last block of the
- * file that is beyond the EOF.  We do this since the size is being increased
- * without writing anything to that block and we don't want to read the
- * garbage on the disk.
- */
-STATIC int                              /* error (positive) */
-xfs_zero_last_block(
-        struct xfs_inode        *ip,
-        xfs_fsize_t             offset,
-        xfs_fsize_t             isize,
-        bool                    *did_zeroing)
-{
-        struct xfs_mount        *mp = ip->i_mount;
-        xfs_fileoff_t           last_fsb = XFS_B_TO_FSBT(mp, isize);
-        int                     zero_offset = XFS_B_FSB_OFFSET(mp, isize);
-        int                     zero_len;
-        int                     nimaps = 1;
-        int                     error = 0;
-        struct xfs_bmbt_irec    imap;
-        xfs_ilock(ip, XFS_ILOCK_EXCL);
-        error = xfs_bmapi_read(ip, last_fsb, 1, &imap, &nimaps, 0);
-        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-        if (error)
-                return error;
-        ASSERT(nimaps > 0);
-        /*
-         * If the block underlying isize is just a hole, then there
-         * is nothing to zero.
-         */
-        if (imap.br_startblock == HOLESTARTBLOCK)
-                return 0;
-        zero_len = mp->m_sb.sb_blocksize - zero_offset;
-        if (isize + zero_len > offset)
-                zero_len = offset - isize;
-        *did_zeroing = true;
-        return xfs_iozero(ip, isize, zero_len);
-}
-/*
 * Zero any on disk space between the current EOF and the new, larger EOF.
 *
 * This handles the normal case of zeroing the remainder of the last block in
@@ -484,94 +398,11 @@ xfs_zero_eof(
        xfs_fsize_t             isize,          /* current inode size */
        bool                    *did_zeroing)
 {
-        struct xfs_mount        *mp = ip->i_mount;
-        xfs_fileoff_t           start_zero_fsb;
-        xfs_fileoff_t           end_zero_fsb;
-        xfs_fileoff_t           zero_count_fsb;
-        xfs_fileoff_t           last_fsb;
-        xfs_fileoff_t           zero_off;
-        xfs_fsize_t             zero_len;
-        int                     nimaps;
-        int                     error = 0;
-        struct xfs_bmbt_irec    imap;
        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
        ASSERT(offset > isize);
        trace_xfs_zero_eof(ip, isize, offset - isize);
+        return xfs_zero_range(ip, isize, offset - isize, did_zeroing);
-        /*
-         * First handle zeroing the block on which isize resides.
-         *
-         * We only zero a part of that block so it is handled specially.
-         */
-        if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
-                error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
-                if (error)
-                        return error;
-        }
-        /*
-         * Calculate the range between the new size and the old where blocks
-         * needing to be zeroed may exist.
-         *
-         * To get the block where the last byte in the file currently resides,
-         * we need to subtract one from the size and truncate back to a block
-         * boundary.  We subtract 1 in case the size is exactly on a block
-         * boundary.
-         */
-        last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
-        start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
-        end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
-        ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
-        if (last_fsb == end_zero_fsb) {
-                /*
-                 * The size was only incremented on its last block.
-                 * We took care of that above, so just return.
-                 */
-                return 0;
-        }
-        ASSERT(start_zero_fsb <= end_zero_fsb);
-        while (start_zero_fsb <= end_zero_fsb) {
-                nimaps = 1;
-                zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
-                xfs_ilock(ip, XFS_ILOCK_EXCL);
-                error = xfs_bmapi_read(ip, start_zero_fsb, zero_count_fsb,
-                                          &imap, &nimaps, 0);
-                xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                if (error)
-                        return error;
-                ASSERT(nimaps > 0);
-                if (imap.br_state == XFS_EXT_UNWRITTEN ||
-                    imap.br_startblock == HOLESTARTBLOCK) {
-                        start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-                        ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-                        continue;
-                }
-                /*
-                 * There are blocks we need to zero.
-                 */
-                zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
-                zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
-                if ((zero_off + zero_len) > offset)
-                        zero_len = offset - zero_off;
-                error = xfs_iozero(ip, zero_off, zero_len);
-                if (error)
-                        return error;
-                *did_zeroing = true;
-                start_zero_fsb = imap.br_startoff + imap.br_blockcount;
-                ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-        }
-        return 0;
 }
 /*
@@ -841,7 +672,7 @@ xfs_file_buffered_aio_write(
 write_retry:
        trace_xfs_file_buffered_write(ip, iov_iter_count(from),
                                      iocb->ki_pos, 0);
-        ret = generic_perform_write(file, from, iocb->ki_pos);
+        ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
        if (likely(ret >= 0))
                iocb->ki_pos += ret;
@@ -1553,7 +1384,7 @@ xfs_filemap_page_mkwrite(
        if (IS_DAX(inode)) {
                ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault);
        } else {
-                ret = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+                ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
                ret = block_page_mkwrite_return(ret);
        }
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 99d75223ff2e..0c19d3d05a91 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -427,7 +427,8 @@ int	xfs_update_prealloc_flags(struct xfs_inode *ip,
                                  enum xfs_prealloc_flags flags);
 int     xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
                     xfs_fsize_t isize, bool *did_zeroing);
-int     xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
+int     xfs_zero_range(struct xfs_inode *ip, xfs_off_t pos, xfs_off_t count,
+                bool *did_zero);
 loff_t  __xfs_seek_hole_data(struct inode *inode, loff_t start,
                             loff_t eof, int whence);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 58391355a44d..620fc9120444 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -15,6 +15,7 @@
 * along with this program; if not, write the Free Software Foundation,
 * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
+#include <linux/iomap.h>
 #include "xfs.h"
 #include "xfs_fs.h"
 #include "xfs_shared.h"
@@ -940,3 +941,173 @@ error_on_bmapi_transaction:
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        return error;
 }
+void
+xfs_bmbt_to_iomap(
+        struct xfs_inode        *ip,
+        struct iomap            *iomap,
+        struct xfs_bmbt_irec    *imap)
+{
+        struct xfs_mount        *mp = ip->i_mount;
+        if (imap->br_startblock == HOLESTARTBLOCK) {
+                iomap->blkno = IOMAP_NULL_BLOCK;
+                iomap->type = IOMAP_HOLE;
+        } else if (imap->br_startblock == DELAYSTARTBLOCK) {
+                iomap->blkno = IOMAP_NULL_BLOCK;
+                iomap->type = IOMAP_DELALLOC;
+        } else {
+                iomap->blkno = xfs_fsb_to_db(ip, imap->br_startblock);
+                if (imap->br_state == XFS_EXT_UNWRITTEN)
+                        iomap->type = IOMAP_UNWRITTEN;
+                else
+                        iomap->type = IOMAP_MAPPED;
+        }
+        iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
+        iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
+        iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
+}
+static inline bool imap_needs_alloc(struct xfs_bmbt_irec *imap, int nimaps)
+{
+        return !nimaps ||
+                imap->br_startblock == HOLESTARTBLOCK ||
+                imap->br_startblock == DELAYSTARTBLOCK;
+}
+static int
+xfs_file_iomap_begin(
+        struct inode            *inode,
+        loff_t                  offset,
+        loff_t                  length,
+        unsigned                flags,
+        struct iomap            *iomap)
+{
+        struct xfs_inode        *ip = XFS_I(inode);
+        struct xfs_mount        *mp = ip->i_mount;
+        struct xfs_bmbt_irec    imap;
+        xfs_fileoff_t           offset_fsb, end_fsb;
+        int                     nimaps = 1, error = 0;
+        if (XFS_FORCED_SHUTDOWN(mp))
+                return -EIO;
+        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        ASSERT(offset <= mp->m_super->s_maxbytes);
+        if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes)
+                length = mp->m_super->s_maxbytes - offset;
+        offset_fsb = XFS_B_TO_FSBT(mp, offset);
+        end_fsb = XFS_B_TO_FSB(mp, offset + length);
+        error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+                               &nimaps, XFS_BMAPI_ENTIRE);
+        if (error) {
+                xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                return error;
+        }
+        if ((flags & IOMAP_WRITE) && imap_needs_alloc(&imap, nimaps)) {
+                /*
+                 * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
+                 * pages to keep the chunks of work done where somewhat symmetric
+                 * with the work writeback does. This is a completely arbitrary
+                 * number pulled out of thin air as a best guess for initial
+                 * testing.
+                 *
+                 * Note that the values needs to be less than 32-bits wide until
+                 * the lower level functions are updated.
+                 */
+                length = min_t(loff_t, length, 1024 * PAGE_SIZE);
+                if (xfs_get_extsz_hint(ip)) {
+                        /*
+                         * xfs_iomap_write_direct() expects the shared lock. It
+                         * is unlocked on return.
+                         */
+                        xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
+                        error = xfs_iomap_write_direct(ip, offset, length, &imap,
+                                        nimaps);
+                } else {
+                        error = xfs_iomap_write_delay(ip, offset, length, &imap);
+                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                }
+                if (error)
+                        return error;
+                trace_xfs_iomap_alloc(ip, offset, length, 0, &imap);
+                xfs_bmbt_to_iomap(ip, iomap, &imap);
+        } else if (nimaps) {
+                xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                trace_xfs_iomap_found(ip, offset, length, 0, &imap);
+                xfs_bmbt_to_iomap(ip, iomap, &imap);
+        } else {
+                xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                trace_xfs_iomap_not_found(ip, offset, length, 0, &imap);
+                iomap->blkno = IOMAP_NULL_BLOCK;
+                iomap->type = IOMAP_HOLE;
+                iomap->offset = offset;
+                iomap->length = length;
+        }
+        return 0;
+}
+static int
+xfs_file_iomap_end_delalloc(
+        struct xfs_inode        *ip,
+        loff_t                  offset,
+        loff_t                  length,
+        ssize_t                 written)
+{
+        struct xfs_mount        *mp = ip->i_mount;
+        xfs_fileoff_t           start_fsb;
+        xfs_fileoff_t           end_fsb;
+        int                     error = 0;
+        start_fsb = XFS_B_TO_FSB(mp, offset + written);
+        end_fsb = XFS_B_TO_FSB(mp, offset + length);
+        /*
+         * Trim back delalloc blocks if we didn't manage to write the whole
+         * range reserved.
+         *
+         * We don't need to care about racing delalloc as we hold i_mutex
+         * across the reserve/allocate/unreserve calls. If there are delalloc
+         * blocks in the range, they are ours.
+         */
+        if (start_fsb < end_fsb) {
+                xfs_ilock(ip, XFS_ILOCK_EXCL);
+                error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+                                               end_fsb - start_fsb);
+                xfs_iunlock(ip, XFS_ILOCK_EXCL);
+                if (error && !XFS_FORCED_SHUTDOWN(mp)) {
+                        xfs_alert(mp, "%s: unable to clean up ino %lld",
+                                __func__, ip->i_ino);
+                        return error;
+                }
+        }
+        return 0;
+}
+static int
+xfs_file_iomap_end(
+        struct inode            *inode,
+        loff_t                  offset,
+        loff_t                  length,
+        ssize_t                 written,
+        unsigned                flags,
+        struct iomap            *iomap)
+{
+        if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
+                return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
+                                length, written);
+        return 0;
+}
+struct iomap_ops xfs_iomap_ops = {
+        .iomap_begin            = xfs_file_iomap_begin,
+        .iomap_end              = xfs_file_iomap_end,
+};
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 8688e663d744..e066d045e2ff 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -18,6 +18,8 @@
 #ifndef __XFS_IOMAP_H__
 #define __XFS_IOMAP_H__
+#include <linux/iomap.h>
 struct xfs_inode;
 struct xfs_bmbt_irec;
@@ -29,4 +31,9 @@ int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t,
                        struct xfs_bmbt_irec *);
 int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t);
+void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
+                struct xfs_bmbt_irec *);
+extern struct iomap_ops xfs_iomap_ops;
 #endif /* __XFS_IOMAP_H__*/
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index c5d4eba6972e..ab820f84ed50 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -38,12 +38,13 @@
 #include "xfs_dir2.h"
 #include "xfs_trans_space.h"
 #include "xfs_pnfs.h"
+#include "xfs_iomap.h"
 #include <linux/capability.h>
 #include <linux/xattr.h>
 #include <linux/posix_acl.h>
 #include <linux/security.h>
-#include <linux/fiemap.h>
+#include <linux/iomap.h>
 #include <linux/slab.h>
 /*
@@ -801,20 +802,30 @@ xfs_setattr_size(
                return error;
        /*
+         * Wait for all direct I/O to complete.
+         */
+        inode_dio_wait(inode);
+        /*
         * File data changes must be complete before we start the transaction to
         * modify the inode.  This needs to be done before joining the inode to
         * the transaction because the inode cannot be unlocked once it is a
         * part of the transaction.
         *
-         * Start with zeroing any data block beyond EOF that we may expose on
+         * Start with zeroing any data beyond EOF that we may expose on file
-         * file extension.
+         * extension, or zeroing out the rest of the block on a downward
+         * truncate.
         */
        if (newsize > oldsize) {
                error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
-                if (error)
+        } else {
-                        return error;
+                error = iomap_truncate_page(inode, newsize, &did_zeroing,
+                                &xfs_iomap_ops);
        }
+        if (error)
+                return error;
        /*
         * We are going to log the inode size change in this transaction so
         * any previous writes that are beyond the on disk EOF and the new
@@ -823,17 +834,14 @@ xfs_setattr_size(
         * problem. Note that this includes any block zeroing we did above;
         * otherwise those blocks may not be zeroed after a crash.
         */
-        if (newsize > ip->i_d.di_size &&
+        if (did_zeroing ||
-            (oldsize != ip->i_d.di_size || did_zeroing)) {
+            (newsize > ip->i_d.di_size && oldsize != ip->i_d.di_size)) {
                error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
                                                      ip->i_d.di_size, newsize);
                if (error)
                        return error;
        }
-        /* Now wait for all direct I/O to complete. */
-        inode_dio_wait(inode);
        /*
         * We've already locked out new page faults, so now we can safely remove
         * pages from the page cache knowing they won't get refaulted until we
@@ -851,13 +859,6 @@ xfs_setattr_size(
         * to hope that the caller sees ENOMEM and retries the truncate
         * operation.
         */
-        if (IS_DAX(inode))
-                error = dax_truncate_page(inode, newsize, xfs_get_blocks_direct);
-        else
-                error = block_truncate_page(inode->i_mapping, newsize,
-                                            xfs_get_blocks);
-        if (error)
-                return error;
        truncate_setsize(inode, newsize);
        error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
@@ -998,51 +999,6 @@ xfs_vn_update_time(
        return xfs_trans_commit(tp);
 }
-#define XFS_FIEMAP_FLAGS        (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
-/*
- * Call fiemap helper to fill in user data.
- * Returns positive errors to xfs_getbmap.
- */
-STATIC int
-xfs_fiemap_format(
-        void                    **arg,
-        struct getbmapx         *bmv,
-        int                     *full)
-{
-        int                     error;
-        struct fiemap_extent_info *fieinfo = *arg;
-        u32                     fiemap_flags = 0;
-        u64                     logical, physical, length;
-        /* Do nothing for a hole */
-        if (bmv->bmv_block == -1LL)
-                return 0;
-        logical = BBTOB(bmv->bmv_offset);
-        physical = BBTOB(bmv->bmv_block);
-        length = BBTOB(bmv->bmv_length);
-        if (bmv->bmv_oflags & BMV_OF_PREALLOC)
-                fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
-        else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
-                fiemap_flags |= (FIEMAP_EXTENT_DELALLOC |
-                                 FIEMAP_EXTENT_UNKNOWN);
-                physical = 0;   /* no block yet */
-        }
-        if (bmv->bmv_oflags & BMV_OF_LAST)
-                fiemap_flags |= FIEMAP_EXTENT_LAST;
-        error = fiemap_fill_next_extent(fieinfo, logical, physical,
-                                        length, fiemap_flags);
-        if (error > 0) {
-                error = 0;
-                *full = 1;      /* user array now full */
-        }
-        return error;
-}
 STATIC int
 xfs_vn_fiemap(
        struct inode            *inode,
@@ -1050,38 +1006,13 @@ xfs_vn_fiemap(
        u64                     start,
        u64                     length)
 {
-        xfs_inode_t             *ip = XFS_I(inode);
-        struct getbmapx         bm;
        int                     error;
-        error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
+        xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED);
-        if (error)
+        error = iomap_fiemap(inode, fieinfo, start, length, &xfs_iomap_ops);
-                return error;
+        xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
-        /* Set up bmap header for xfs internal routine */
-        bm.bmv_offset = BTOBBT(start);
-        /* Special case for whole file */
-        if (length == FIEMAP_MAX_OFFSET)
-                bm.bmv_length = -1LL;
-        else
-                bm.bmv_length = BTOBB(start + length) - bm.bmv_offset;
-        /* We add one because in getbmap world count includes the header */
-        bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
-                                        fieinfo->fi_extents_max + 1;
-        bm.bmv_count = min_t(__s32, bm.bmv_count,
-                             (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
-        bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
-        if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
-                bm.bmv_iflags |= BMV_IF_ATTRFORK;
-        if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
-                bm.bmv_iflags |= BMV_IF_DELALLOC;
-        error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
-        if (error)
-                return error;
-        return 0;
+        return error;
 }
 STATIC int
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index db3c7df52e30..0f14b2e4bf6c 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -80,32 +80,6 @@ xfs_fs_get_uuid(
        return 0;
 }
-static void
-xfs_bmbt_to_iomap(
-        struct xfs_inode        *ip,
-        struct iomap            *iomap,
-        struct xfs_bmbt_irec    *imap)
-{
-        struct xfs_mount        *mp = ip->i_mount;
-        if (imap->br_startblock == HOLESTARTBLOCK) {
-                iomap->blkno = IOMAP_NULL_BLOCK;
-                iomap->type = IOMAP_HOLE;
-        } else if (imap->br_startblock == DELAYSTARTBLOCK) {
-                iomap->blkno = IOMAP_NULL_BLOCK;
-                iomap->type = IOMAP_DELALLOC;
-        } else {
-                iomap->blkno =
-                        XFS_FSB_TO_DADDR(ip->i_mount, imap->br_startblock);
-                if (imap->br_state == XFS_EXT_UNWRITTEN)
-                        iomap->type = IOMAP_UNWRITTEN;
-                else
-                        iomap->type = IOMAP_MAPPED;
-        }
-        iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
-        iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
-}
 /*
 * Get a layout for the pNFS client.
 */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index ea94ee0fe5ea..bb24ce7b0280 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1295,6 +1295,9 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
 DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
 DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
 DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
+DEFINE_IOMAP_EVENT(xfs_iomap_alloc);
+DEFINE_IOMAP_EVENT(xfs_iomap_found);
+DEFINE_IOMAP_EVENT(xfs_iomap_not_found);
 DECLARE_EVENT_CLASS(xfs_simple_io_class,
        TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
author	Dave Chinner <david@fromorbit.com>	2016-06-20 20:10:38 -0400
committer	Dave Chinner <david@fromorbit.com>	2016-06-20 20:10:38 -0400
commit	9b7fad20760b8f47730f0353459dd39a89c415b9 (patch)
tree	a0465f6d4bb6dceaf3a6412e1c30b31096fdf712
parent	07931b7be70916055b882c6a379a3016f5772681 (diff)
parent	3c2bdc912a1cc050db7e858aabe564cb382c9c30 (diff)