Merge tag 'xfs-for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs

Pull xfs updates from Dave Chinner: "There is nothing really major here - the only significant addition is the per-mount operation statistics infrastructure. Otherwises there's various ACL, xattr, DAX, AIO and logging fixes, and a smattering of small cleanups and fixes elsewhere. Summary: - per-mount operational statistics in sysfs - fixes for concurrent aio append write submission - various logging fixes - detection of zeroed logs and invalid log sequence numbers on v5 filesystems - memory allocation failure message improvements - a bunch of xattr/ACL fixes - fdatasync optimisation - miscellaneous other fixes and cleanups" * tag 'xfs-for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (39 commits) xfs: give all workqueues rescuer threads xfs: fix log recovery op header validation assert xfs: Fix error path in xfs_get_acl xfs: optimise away log forces on timestamp updates for fdatasync xfs: don't leak uuid table on rmmod xfs: invalidate cached acl if set via ioctl xfs: Plug memory leak in xfs_attrmulti_attr_set xfs: Validate the length of on-disk ACLs xfs: invalidate cached acl if set directly via xattr xfs: xfs_filemap_pmd_fault treats read faults as write faults xfs: add ->pfn_mkwrite support for DAX xfs: DAX does not use IO completion callbacks xfs: Don't use unwritten extents for DAX xfs: introduce BMAPI_ZERO for allocating zeroed extents xfs: fix inode size update overflow in xfs_map_direct() xfs: clear PF_NOFREEZE for xfsaild kthread xfs: fix an error code in xfs_fs_fill_super() xfs: stats are no longer dependent on CONFIG_PROC_FS xfs: simplify /proc teardown & error handling xfs: per-filesystem stats counter implementation ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2015-11-11 23:18:48 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-11-11 23:18:48 -0500
commit: 5d50ac70fe98518dbf620bfba8184254663125eb (patch)
tree: bc32329b72516ece58142444bbfd520d27a7f6ca /fs
parent: 31c1febd7a45229edb3e5d86f354e3c1df543cbb (diff)
parent: 4e14e49a91e18098fd8ef30743972e0c3cb727c1 (diff)
62 files changed, 994 insertions, 411 deletions
diff --git a/fs/dax.c b/fs/dax.c
index a86d3cc2b389..131fd35ae39d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -29,6 +29,11 @@
 #include <linux/uio.h>
 #include <linux/vmstat.h>
+/*
+ * dax_clear_blocks() is called from within transaction context from XFS,
+ * and hence this means the stack from this point must follow GFP_NOFS
+ * semantics for all operations.
+ */
 int dax_clear_blocks(struct inode *inode, sector_t block, long size)
 {
        struct block_device *bdev = inode->i_sb->s_bdev;
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index a096841bd06c..f64639176670 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -84,6 +84,7 @@ xfs-y				+= xfs_aops.o \
                                   xfs_message.o \
                                   xfs_mount.o \
                                   xfs_mru_cache.o \
+                                   xfs_stats.o \
                                   xfs_super.o \
                                   xfs_symlink.o \
                                   xfs_sysfs.o \
@@ -118,7 +119,6 @@ xfs-$(CONFIG_XFS_QUOTA)		+= xfs_dquot.o \
 xfs-$(CONFIG_XFS_RT)            += xfs_rtalloc.o
 xfs-$(CONFIG_XFS_POSIX_ACL)     += xfs_acl.o
-xfs-$(CONFIG_PROC_FS)           += xfs_stats.o
 xfs-$(CONFIG_SYSCTL)            += xfs_sysctl.o
 xfs-$(CONFIG_COMPAT)            += xfs_ioctl32.o
 xfs-$(CONFIG_NFSD_PNFS)         += xfs_pnfs.o
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index a7a3a63bb360..686ba6fb20dd 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -55,8 +55,9 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
                        return ptr;
                if (!(++retries % 100))
                        xfs_err(NULL,
-                "possible memory allocation deadlock in %s (mode:0x%x)",
+        "%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)",
-                                        __func__, lflags);
+                                current->comm, current->pid,
+                                (unsigned int)size, __func__, lflags);
                congestion_wait(BLK_RW_ASYNC, HZ/50);
        } while (1);
 }
@@ -120,8 +121,9 @@ kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags)
                        return ptr;
                if (!(++retries % 100))
                        xfs_err(NULL,
-                "possible memory allocation deadlock in %s (mode:0x%x)",
+                "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
-                                        __func__, lflags);
+                                current->comm, current->pid,
+                                __func__, lflags);
                congestion_wait(BLK_RW_ASYNC, HZ/50);
        } while (1);
 }
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index ffad7f20342f..3479294c1d58 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -482,7 +482,9 @@ xfs_agfl_verify(
                    be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
                        return false;
        }
-        return true;
+        return xfs_log_check_lsn(mp,
+                                 be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
 }
 static void
@@ -651,8 +653,8 @@ xfs_alloc_ag_vextent(
                                 -((long)(args->len)));
        }
-        XFS_STATS_INC(xs_allocx);
+        XFS_STATS_INC(args->mp, xs_allocx);
-        XFS_STATS_ADD(xs_allocb, args->len);
+        XFS_STATS_ADD(args->mp, xs_allocb, args->len);
        return error;
 }
@@ -1808,8 +1810,8 @@ xfs_free_ag_extent(
        if (!isfl)
                xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
-        XFS_STATS_INC(xs_freex);
+        XFS_STATS_INC(mp, xs_freex);
-        XFS_STATS_ADD(xs_freeb, len);
+        XFS_STATS_ADD(mp, xs_freeb, len);
        trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
@@ -2259,9 +2261,13 @@ xfs_agf_verify(
 {
        struct xfs_agf  *agf = XFS_BUF_TO_AGF(bp);
-        if (xfs_sb_version_hascrc(&mp->m_sb) &&
+        if (xfs_sb_version_hascrc(&mp->m_sb)) {
-            !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
+                if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
                        return false;
+                if (!xfs_log_check_lsn(mp,
+                                be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
+                        return false;
+        }
        if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
              XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
@@ -2503,7 +2509,7 @@ xfs_alloc_vextent(
                 * Try near allocation first, then anywhere-in-ag after
                 * the first a.g. fails.
                 */
-                if ((args->userdata  == XFS_ALLOC_INITIAL_USER_DATA) &&
+                if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) &&
                    (mp->m_flags & XFS_MOUNT_32BITINODES)) {
                        args->fsbno = XFS_AGB_TO_FSB(mp,
                                        ((mp->m_agfrotor / rotorstep) %
@@ -2634,6 +2640,14 @@ xfs_alloc_vextent(
                XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
                        args->len);
 #endif
+                /* Zero the extent if we were asked to do so */
+                if (args->userdata & XFS_ALLOC_USERDATA_ZERO) {
+                        error = xfs_zero_extent(args->ip, args->fsbno, args->len);
+                        if (error)
+                                goto error0;
+                }
        }
        xfs_perag_put(args->pag);
        return 0;
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index ca1c8168373a..0ecde4d5cac8 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -101,6 +101,7 @@ typedef struct xfs_alloc_arg {
        struct xfs_mount *mp;           /* file system mount point */
        struct xfs_buf  *agbp;          /* buffer for a.g. freelist header */
        struct xfs_perag *pag;          /* per-ag struct for this agno */
+        struct xfs_inode *ip;           /* for userdata zeroing method */
        xfs_fsblock_t   fsbno;          /* file system block number */
        xfs_agnumber_t  agno;           /* allocation group number */
        xfs_agblock_t   agbno;          /* allocation group-relative block # */
@@ -120,15 +121,16 @@ typedef struct xfs_alloc_arg {
        char            wasdel;         /* set if allocation was prev delayed */
        char            wasfromfl;      /* set if allocation is from freelist */
        char            isfl;           /* set if is freelist blocks - !acctg */
-        char            userdata;       /* set if this is user data */
+        char            userdata;       /* mask defining userdata treatment */
        xfs_fsblock_t   firstblock;     /* io first block allocated */
 } xfs_alloc_arg_t;
 /*
 * Defines for userdata
 */
-#define XFS_ALLOC_USERDATA              1       /* allocation is for user data*/
+#define XFS_ALLOC_USERDATA              (1 << 0)/* allocation is for user data*/
-#define XFS_ALLOC_INITIAL_USER_DATA     2       /* special case start of file */
+#define XFS_ALLOC_INITIAL_USER_DATA     (1 << 1)/* special case start of file */
+#define XFS_ALLOC_USERDATA_ZERO         (1 << 2)/* zero extent on allocation */
 xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
                struct xfs_perag *pag, xfs_extlen_t need);
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index ff065578969f..f949818fa1c7 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -125,7 +125,7 @@ xfs_attr_get(
        uint                    lock_mode;
        int                     error;
-        XFS_STATS_INC(xs_attr_get);
+        XFS_STATS_INC(ip->i_mount, xs_attr_get);
        if (XFS_FORCED_SHUTDOWN(ip->i_mount))
                return -EIO;
@@ -209,7 +209,7 @@ xfs_attr_set(
        int                     rsvd = (flags & ATTR_ROOT) != 0;
        int                     error, err2, committed, local;
-        XFS_STATS_INC(xs_attr_set);
+        XFS_STATS_INC(mp, xs_attr_set);
        if (XFS_FORCED_SHUTDOWN(dp->i_mount))
                return -EIO;
@@ -412,7 +412,7 @@ xfs_attr_remove(
        xfs_fsblock_t           firstblock;
        int                     error;
-        XFS_STATS_INC(xs_attr_remove);
+        XFS_STATS_INC(mp, xs_attr_remove);
        if (XFS_FORCED_SHUTDOWN(dp->i_mount))
                return -EIO;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 33df52d97ec7..aa187f7ba2dd 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -41,6 +41,7 @@
 #include "xfs_buf_item.h"
 #include "xfs_cksum.h"
 #include "xfs_dir2.h"
+#include "xfs_log.h"
 /*
@@ -266,6 +267,8 @@ xfs_attr3_leaf_verify(
                        return false;
                if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
                        return false;
+                if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
+                        return false;
        } else {
                if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
                        return false;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index f38f9bd81557..5ab95ffa4ae9 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -107,7 +107,7 @@ xfs_attr3_rmt_verify(
        if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
                return false;
        if (be32_to_cpu(rmt->rm_offset) +
-                                be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX)
+                                be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX)
                return false;
        if (rmt->rm_owner == 0)
                return false;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 8e2010d53b07..119c2422aac7 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -948,14 +948,16 @@ xfs_bmap_local_to_extents(
        bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
        /*
-         * Initialise the block and copy the data
+         * Initialize the block, copy the data and log the remote buffer.
         *
-         * Note: init_fn must set the buffer log item type correctly!
+         * The callout is responsible for logging because the remote format
+         * might differ from the local format and thus we don't know how much to
+         * log here. Note that init_fn must also set the buffer log item type
+         * correctly.
         */
        init_fn(tp, bp, ip, ifp);
-        /* account for the change in fork size and log everything */
+        /* account for the change in fork size */
-        xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
        xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
        xfs_bmap_local_to_extents_empty(ip, whichfork);
        flags |= XFS_ILOG_CORE;
@@ -1435,7 +1437,7 @@ xfs_bmap_search_extents(
        xfs_ifork_t     *ifp;           /* inode fork pointer */
        xfs_bmbt_rec_host_t  *ep;            /* extent record pointer */
-        XFS_STATS_INC(xs_look_exlist);
+        XFS_STATS_INC(ip->i_mount, xs_look_exlist);
        ifp = XFS_IFORK_PTR(ip, fork);
        ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
@@ -1732,7 +1734,7 @@ xfs_bmap_add_extent_delay_real(
        ASSERT(!bma->cur ||
               (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
-        XFS_STATS_INC(xs_add_exlist);
+        XFS_STATS_INC(mp, xs_add_exlist);
 #define LEFT            r[0]
 #define RIGHT           r[1]
@@ -2286,7 +2288,7 @@ xfs_bmap_add_extent_unwritten_real(
        ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
        ASSERT(!isnullstartblock(new->br_startblock));
-        XFS_STATS_INC(xs_add_exlist);
+        XFS_STATS_INC(mp, xs_add_exlist);
 #define LEFT            r[0]
 #define RIGHT           r[1]
@@ -2946,7 +2948,7 @@ xfs_bmap_add_extent_hole_real(
        ASSERT(!bma->cur ||
               !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
-        XFS_STATS_INC(xs_add_exlist);
+        XFS_STATS_INC(mp, xs_add_exlist);
        state = 0;
        if (whichfork == XFS_ATTR_FORK)
@@ -3800,8 +3802,13 @@ xfs_bmap_btalloc(
        args.wasdel = ap->wasdel;
        args.isfl = 0;
        args.userdata = ap->userdata;
-        if ((error = xfs_alloc_vextent(&args)))
+        if (ap->userdata & XFS_ALLOC_USERDATA_ZERO)
+                args.ip = ap->ip;
+        error = xfs_alloc_vextent(&args);
+        if (error)
                return error;
        if (tryagain && args.fsbno == NULLFSBLOCK) {
                /*
                 * Exact allocation failed. Now try with alignment
@@ -4036,7 +4043,7 @@ xfs_bmapi_read(
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
-        XFS_STATS_INC(xs_blk_mapr);
+        XFS_STATS_INC(mp, xs_blk_mapr);
        ifp = XFS_IFORK_PTR(ip, whichfork);
@@ -4221,7 +4228,7 @@ xfs_bmapi_delay(
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
-        XFS_STATS_INC(xs_blk_mapw);
+        XFS_STATS_INC(mp, xs_blk_mapw);
        if (!(ifp->if_flags & XFS_IFEXTENTS)) {
                error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
@@ -4300,11 +4307,14 @@ xfs_bmapi_allocate(
        /*
         * Indicate if this is the first user data in the file, or just any
-         * user data.
+         * user data. And if it is userdata, indicate whether it needs to
+         * be initialised to zero during allocation.
         */
        if (!(bma->flags & XFS_BMAPI_METADATA)) {
                bma->userdata = (bma->offset == 0) ?
                        XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
+                if (bma->flags & XFS_BMAPI_ZERO)
+                        bma->userdata |= XFS_ALLOC_USERDATA_ZERO;
        }
        bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
@@ -4419,6 +4429,17 @@ xfs_bmapi_convert_unwritten(
        mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
                                ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
+        /*
+         * Before insertion into the bmbt, zero the range being converted
+         * if required.
+         */
+        if (flags & XFS_BMAPI_ZERO) {
+                error = xfs_zero_extent(bma->ip, mval->br_startblock,
+                                        mval->br_blockcount);
+                if (error)
+                        return error;
+        }
        error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
                        &bma->cur, mval, bma->firstblock, bma->flist,
                        &tmp_logflags);
@@ -4512,6 +4533,18 @@ xfs_bmapi_write(
        ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+        /* zeroing is for currently only for data extents, not metadata */
+        ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
+                        (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
+        /*
+         * we can allocate unwritten extents or pre-zero allocated blocks,
+         * but it makes no sense to do both at once. This would result in
+         * zeroing the unwritten extent twice, but it still being an
+         * unwritten extent....
+         */
+        ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
+                        (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
        if (unlikely(XFS_TEST_ERROR(
            (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
             XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
@@ -4525,7 +4558,7 @@ xfs_bmapi_write(
        ifp = XFS_IFORK_PTR(ip, whichfork);
-        XFS_STATS_INC(xs_blk_mapw);
+        XFS_STATS_INC(mp, xs_blk_mapw);
        if (*firstblock == NULLFSBLOCK) {
                if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
@@ -4718,12 +4751,12 @@ xfs_bmap_del_extent(
        xfs_filblks_t           temp2;  /* for indirect length calculations */
        int                     state = 0;
-        XFS_STATS_INC(xs_del_exlist);
+        mp = ip->i_mount;
+        XFS_STATS_INC(mp, xs_del_exlist);
        if (whichfork == XFS_ATTR_FORK)
                state |= BMAP_ATTRFORK;
-        mp = ip->i_mount;
        ifp = XFS_IFORK_PTR(ip, whichfork);
        ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
                (uint)sizeof(xfs_bmbt_rec_t)));
@@ -5070,7 +5103,7 @@ xfs_bunmapi(
                *done = 1;
                return 0;
        }
-        XFS_STATS_INC(xs_blk_unmap);
+        XFS_STATS_INC(mp, xs_blk_unmap);
        isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
        start = bno;
        bno = start + len - 1;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 6aaa0c1c7200..a160f8a5a3fc 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -52,9 +52,9 @@ struct xfs_bmalloca {
        xfs_extlen_t            minleft; /* amount must be left after alloc */
        bool                    eof;    /* set if allocating past last extent */
        bool                    wasdel; /* replacing a delayed allocation */
-        bool                    userdata;/* set if is user data */
        bool                    aeof;   /* allocated space at eof */
        bool                    conv;   /* overwriting unwritten extents */
+        char                    userdata;/* userdata mask */
        int                     flags;
 };
@@ -109,6 +109,14 @@ typedef	struct xfs_bmap_free
 */
 #define XFS_BMAPI_CONVERT       0x040
+/*
+ * allocate zeroed extents - this requires all newly allocated user data extents
+ * to be initialised to zero. It will be ignored if XFS_BMAPI_METADATA is set.
+ * Use in conjunction with XFS_BMAPI_CONVERT to convert unwritten extents found
+ * during the allocation range to zeroed written extents.
+ */
+#define XFS_BMAPI_ZERO          0x080
 #define XFS_BMAPI_FLAGS \
        { XFS_BMAPI_ENTIRE,     "ENTIRE" }, \
        { XFS_BMAPI_METADATA,   "METADATA" }, \
@@ -116,7 +124,8 @@ typedef	struct xfs_bmap_free
        { XFS_BMAPI_PREALLOC,   "PREALLOC" }, \
        { XFS_BMAPI_IGSTATE,    "IGSTATE" }, \
        { XFS_BMAPI_CONTIG,     "CONTIG" }, \
-        { XFS_BMAPI_CONVERT,    "CONVERT" }
+        { XFS_BMAPI_CONVERT,    "CONVERT" }, \
+        { XFS_BMAPI_ZERO,       "ZERO" }
 static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index f7d7ee7a2607..af1bbee5586e 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -32,6 +32,7 @@
 #include "xfs_trace.h"
 #include "xfs_cksum.h"
 #include "xfs_alloc.h"
+#include "xfs_log.h"
 /*
 * Cursor allocation zone.
@@ -222,7 +223,7 @@ xfs_btree_check_ptr(
 * long-form btree header.
 *
 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
- * it into the buffer so recovery knows what the last modifcation was that made
+ * it into the buffer so recovery knows what the last modification was that made
 * it to disk.
 */
 void
@@ -243,8 +244,14 @@ bool
 xfs_btree_lblock_verify_crc(
        struct xfs_buf          *bp)
 {
-        if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+        struct xfs_mount        *mp = bp->b_target->bt_mount;
+        if (xfs_sb_version_hascrc(&mp->m_sb)) {
+                if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
+                        return false;
                return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
+        }
        return true;
 }
@@ -254,7 +261,7 @@ xfs_btree_lblock_verify_crc(
 * short-form btree header.
 *
 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
- * it into the buffer so recovery knows what the last modifcation was that made
+ * it into the buffer so recovery knows what the last modification was that made
 * it to disk.
 */
 void
@@ -275,8 +282,14 @@ bool
 xfs_btree_sblock_verify_crc(
        struct xfs_buf          *bp)
 {
-        if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb))
+        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
+        struct xfs_mount        *mp = bp->b_target->bt_mount;
+        if (xfs_sb_version_hascrc(&mp->m_sb)) {
+                if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
+                        return false;
                return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
+        }
        return true;
 }
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 8f18bab73ea5..992dec0638f3 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -84,31 +84,38 @@ union xfs_btree_rec {
 /*
 * Generic stats interface
 */
-#define __XFS_BTREE_STATS_INC(type, stat) \
+#define __XFS_BTREE_STATS_INC(mp, type, stat) \
-        XFS_STATS_INC(xs_ ## type ## _2_ ## stat)
+        XFS_STATS_INC(mp, xs_ ## type ## _2_ ## stat)
-#define XFS_BTREE_STATS_INC(cur, stat)  \
+#define XFS_BTREE_STATS_INC(cur, stat)  \
 do {    \
+        struct xfs_mount *__mp = cur->bc_mp; \
        switch (cur->bc_btnum) {  \
-        case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break;   \
+        case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(__mp, abtb, stat); break; \
-        case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break;   \
+        case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(__mp, abtc, stat); break; \
-        case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break;  \
+        case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \
-        case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break;    \
+        case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \
-        case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break;  \
+        case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \
        case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break;       \
        }       \
 } while (0)
-#define __XFS_BTREE_STATS_ADD(type, stat, val) \
+#define __XFS_BTREE_STATS_ADD(mp, type, stat, val) \
-        XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val)
+        XFS_STATS_ADD(mp, xs_ ## type ## _2_ ## stat, val)
 #define XFS_BTREE_STATS_ADD(cur, stat, val)  \
 do {    \
+        struct xfs_mount *__mp = cur->bc_mp; \
        switch (cur->bc_btnum) {  \
-        case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \
+        case XFS_BTNUM_BNO:     \
-        case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \
+                __XFS_BTREE_STATS_ADD(__mp, abtb, stat, val); break; \
-        case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \
+        case XFS_BTNUM_CNT:     \
-        case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \
+                __XFS_BTREE_STATS_ADD(__mp, abtc, stat, val); break; \
-        case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \
+        case XFS_BTNUM_BMAP:    \
-        case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break;       \
+                __XFS_BTREE_STATS_ADD(__mp, bmbt, stat, val); break; \
+        case XFS_BTNUM_INO:     \
+                __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \
+        case XFS_BTNUM_FINO:    \
+                __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \
+        case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
        }       \
 } while (0)
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index be43248a5822..e89a0f8f827c 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -39,6 +39,7 @@
 #include "xfs_trace.h"
 #include "xfs_cksum.h"
 #include "xfs_buf_item.h"
+#include "xfs_log.h"
 /*
 * xfs_da_btree.c
@@ -150,6 +151,8 @@ xfs_da3_node_verify(
                        return false;
                if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
                        return false;
+                if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
+                        return false;
        } else {
                if (ichdr.magic != XFS_DA_NODE_MAGIC)
                        return false;
@@ -322,6 +325,7 @@ xfs_da3_node_create(
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
+                memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
                ichdr.magic = XFS_DA3_NODE_MAGIC;
                hdr3->info.blkno = cpu_to_be64(bp->b_bn);
                hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 9de401d297e5..2fb53a5c0a74 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -271,7 +271,7 @@ xfs_dir_createname(
                rval = xfs_dir_ino_validate(tp->t_mountp, inum);
                if (rval)
                        return rval;
-                XFS_STATS_INC(xs_dir_create);
+                XFS_STATS_INC(dp->i_mount, xs_dir_create);
        }
        args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
@@ -365,7 +365,7 @@ xfs_dir_lookup(
        int             lock_mode;
        ASSERT(S_ISDIR(dp->i_d.di_mode));
-        XFS_STATS_INC(xs_dir_lookup);
+        XFS_STATS_INC(dp->i_mount, xs_dir_lookup);
        /*
         * We need to use KM_NOFS here so that lockdep will not throw false
@@ -444,7 +444,7 @@ xfs_dir_removename(
        int             v;              /* type-checking value */
        ASSERT(S_ISDIR(dp->i_d.di_mode));
-        XFS_STATS_INC(xs_dir_remove);
+        XFS_STATS_INC(dp->i_mount, xs_dir_remove);
        args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
        if (!args)
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 4778d1dd511a..9c10e2b8cfcb 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -34,6 +34,7 @@
 #include "xfs_error.h"
 #include "xfs_trace.h"
 #include "xfs_cksum.h"
+#include "xfs_log.h"
 /*
 * Local function prototypes.
@@ -71,6 +72,8 @@ xfs_dir3_block_verify(
                        return false;
                if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
                        return false;
+                if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
+                        return false;
        } else {
                if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
                        return false;
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 824131e71bc5..af71a84f343c 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -31,6 +31,7 @@
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_cksum.h"
+#include "xfs_log.h"
 /*
 * Check the consistency of the data block.
@@ -224,6 +225,8 @@ xfs_dir3_data_verify(
                        return false;
                if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
                        return false;
+                if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
+                        return false;
        } else {
                if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
                        return false;
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index f300240ebb8d..3923e1f94697 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -33,6 +33,7 @@
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_cksum.h"
+#include "xfs_log.h"
 /*
 * Local function declarations.
@@ -164,6 +165,8 @@ xfs_dir3_leaf_verify(
                        return false;
                if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
                        return false;
+                if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
+                        return false;
        } else {
                if (leaf->hdr.info.magic != cpu_to_be16(magic))
                        return false;
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index cc28e924545b..70b0cb2fd556 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -33,6 +33,7 @@
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
 #include "xfs_cksum.h"
+#include "xfs_log.h"
 /*
 * Function declarations.
@@ -97,6 +98,8 @@ xfs_dir3_free_verify(
                        return false;
                if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
                        return false;
+                if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
+                        return false;
        } else {
                if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC))
                        return false;
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 9590a069e556..8774498ce0ff 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -60,6 +60,14 @@ struct xfs_ifork;
 #define XFS_SB_VERSION_MOREBITSBIT      0x8000
 /*
+ * The size of a single extended attribute on disk is limited by
+ * the size of index values within the attribute entries themselves.
+ * These are be16 fields, so we can only support attribute data
+ * sizes up to 2^16 bytes in length.
+ */
+#define XFS_XATTR_SIZE_MAX (1 << 16)
+/*
 * Supported feature bit list is just all bits in the versionnum field because
 * we've used them all up and understand them all. Except, of course, for the
 * shared superblock bit, which nobody knows what it does and so is unsupported.
@@ -1483,13 +1491,17 @@ struct xfs_acl {
 */
 #define XFS_ACL_MAX_ENTRIES(mp) \
        (xfs_sb_version_hascrc(&mp->m_sb) \
-                ?  (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \
+                ?  (XFS_XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \
                                                sizeof(struct xfs_acl_entry) \
                : 25)
-#define XFS_ACL_MAX_SIZE(mp) \
+#define XFS_ACL_SIZE(cnt) \
        (sizeof(struct xfs_acl) + \
-                sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp)))
+                sizeof(struct xfs_acl_entry) * cnt)
+#define XFS_ACL_MAX_SIZE(mp) \
+        XFS_ACL_SIZE(XFS_ACL_MAX_ENTRIES((mp)))
 /* On-disk XFS extended attribute names */
 #define SGI_ACL_FILE            "SGI_ACL_FILE"
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 89689c6a43e2..b2b73a998d42 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -490,6 +490,16 @@ typedef struct xfs_swapext
 #define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH         0x2     /* don't flush log nor data */
 /*
+ * ioctl limits
+ */
+#ifdef XATTR_LIST_MAX
+#  define XFS_XATTR_LIST_MAX XATTR_LIST_MAX
+#else
+#  define XFS_XATTR_LIST_MAX 65536
+#endif
+/*
 * ioctl commands that are used by Linux filesystems
 */
 #define XFS_IOC_GETXFLAGS       FS_IOC_GETFLAGS
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 54deb2d12ac6..70c1db99f6a7 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -38,6 +38,7 @@
 #include "xfs_icreate_item.h"
 #include "xfs_icache.h"
 #include "xfs_trace.h"
+#include "xfs_log.h"
 /*
@@ -2500,9 +2501,14 @@ xfs_agi_verify(
        struct xfs_mount *mp = bp->b_target->bt_mount;
        struct xfs_agi  *agi = XFS_BUF_TO_AGI(bp);
-        if (xfs_sb_version_hascrc(&mp->m_sb) &&
+        if (xfs_sb_version_hascrc(&mp->m_sb)) {
-            !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
+                if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
+                        return false;
+                if (!xfs_log_check_lsn(mp,
+                                be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn)))
                        return false;
+        }
        /*
         * Validate the magic number of the agi block.
         */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 47425140f343..a0b071d881a0 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -35,6 +35,7 @@
 #include "xfs_bmap_btree.h"
 #include "xfs_alloc_btree.h"
 #include "xfs_ialloc_btree.h"
+#include "xfs_log.h"
 /*
 * Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -163,6 +164,15 @@ xfs_mount_validate_sb(
 "Filesystem can not be safely mounted by this kernel.");
                        return -EINVAL;
                }
+        } else if (xfs_sb_version_hascrc(sbp)) {
+                /*
+                 * We can't read verify the sb LSN because the read verifier is
+                 * called before the log is allocated and processed. We know the
+                 * log is set up before write verifier (!check_version) calls,
+                 * so just check it here.
+                 */
+                if (!xfs_log_check_lsn(mp, sbp->sb_lsn))
+                        return -EFSCORRUPTED;
        }
        if (xfs_sb_version_has_pquotino(sbp)) {
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 8f8af05b3f13..cb6fd20a4d3d 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -31,6 +31,7 @@
 #include "xfs_cksum.h"
 #include "xfs_trans.h"
 #include "xfs_buf_item.h"
+#include "xfs_log.h"
 /*
@@ -60,6 +61,7 @@ xfs_symlink_hdr_set(
        if (!xfs_sb_version_hascrc(&mp->m_sb))
                return 0;
+        memset(dsl, 0, sizeof(struct xfs_dsymlink_hdr));
        dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
        dsl->sl_offset = cpu_to_be32(offset);
        dsl->sl_bytes = cpu_to_be32(size);
@@ -116,6 +118,8 @@ xfs_symlink_verify(
                return false;
        if (dsl->sl_owner == 0)
                return false;
+        if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn)))
+                return false;
        return true;
 }
@@ -183,6 +187,7 @@ xfs_symlink_local_to_remote(
        if (!xfs_sb_version_hascrc(&mp->m_sb)) {
                bp->b_ops = NULL;
                memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
+                xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
                return;
        }
@@ -198,4 +203,6 @@ xfs_symlink_local_to_remote(
        buf = bp->b_addr;
        buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
        memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
+        xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) +
+                                        ifp->if_bytes - 1);
 }
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 4b641676f258..6bb470fbb8e8 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -37,16 +37,19 @@
 STATIC struct posix_acl *
 xfs_acl_from_disk(
-        struct xfs_acl  *aclp,
+        const struct xfs_acl    *aclp,
-        int             max_entries)
+        int                     len,
+        int                     max_entries)
 {
        struct posix_acl_entry *acl_e;
        struct posix_acl *acl;
-        struct xfs_acl_entry *ace;
+        const struct xfs_acl_entry *ace;
        unsigned int count, i;
+        if (len < sizeof(*aclp))
+                return ERR_PTR(-EFSCORRUPTED);
        count = be32_to_cpu(aclp->acl_cnt);
-        if (count > max_entries)
+        if (count > max_entries || XFS_ACL_SIZE(count) != len)
                return ERR_PTR(-EFSCORRUPTED);
        acl = posix_acl_alloc(count, GFP_KERNEL);
@@ -160,10 +163,11 @@ xfs_get_acl(struct inode *inode, int type)
                 */
                if (error == -ENOATTR)
                        goto out_update_cache;
+                acl = ERR_PTR(error);
                goto out;
        }
-        acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount));
+        acl = xfs_acl_from_disk(xfs_acl, len, XFS_ACL_MAX_ENTRIES(ip->i_mount));
        if (IS_ERR(acl))
                goto out;
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 3841b07f27bf..52f8255d6bdf 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -20,7 +20,6 @@
 struct inode;
 struct posix_acl;
-struct xfs_inode;
 #ifdef CONFIG_XFS_POSIX_ACL
 extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
@@ -36,4 +35,7 @@ static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
 # define posix_acl_access_exists(inode)                 0
 # define posix_acl_default_exists(inode)                0
 #endif /* CONFIG_XFS_POSIX_ACL */
+extern void xfs_forget_acl(struct inode *inode, const char *name, int xflags);
 #endif  /* __XFS_ACL_H__ */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 50ab2879b9da..29e7e5dd5178 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -172,6 +172,12 @@ xfs_setfilesize_ioend(
        current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
        __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
+        /* we abort the update if there was an IO error */
+        if (ioend->io_error) {
+                xfs_trans_cancel(tp);
+                return ioend->io_error;
+        }
        return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
 }
@@ -212,14 +218,17 @@ xfs_end_io(
                ioend->io_error = -EIO;
                goto done;
        }
-        if (ioend->io_error)
-                goto done;
        /*
         * For unwritten extents we need to issue transactions to convert a
         * range to normal written extens after the data I/O has finished.
+         * Detecting and handling completion IO errors is done individually
+         * for each case as different cleanup operations need to be performed
+         * on error.
         */
        if (ioend->io_type == XFS_IO_UNWRITTEN) {
+                if (ioend->io_error)
+                        goto done;
                error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
                                                  ioend->io_size);
        } else if (ioend->io_append_trans) {
@@ -1250,13 +1259,28 @@ xfs_vm_releasepage(
 * the DIO. There is only going to be one reference to the ioend and its life
 * cycle is constrained by the DIO completion code. hence we don't need
 * reference counting here.
+ *
+ * Note that for DIO, an IO to the highest supported file block offset (i.e.
+ * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
+ * bit variable. Hence if we see this overflow, we have to assume that the IO is
+ * extending the file size. We won't know for sure until IO completion is run
+ * and the actual max write offset is communicated to the IO completion
+ * routine.
+ *
+ * For DAX page faults, we are preparing to never see unwritten extents here,
+ * nor should we ever extend the inode size. Hence we will soon have nothing to
+ * do here for this case, ensuring we don't have to provide an IO completion
+ * callback to free an ioend that we don't actually need for a fault into the
+ * page at offset (2^63 - 1FSB) bytes.
 */
 static void
 xfs_map_direct(
        struct inode            *inode,
        struct buffer_head      *bh_result,
        struct xfs_bmbt_irec    *imap,
-        xfs_off_t               offset)
+        xfs_off_t               offset,
+        bool                    dax_fault)
 {
        struct xfs_ioend        *ioend;
        xfs_off_t               size = bh_result->b_size;
@@ -1269,6 +1293,13 @@ xfs_map_direct(
        trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
+        if (dax_fault) {
+                ASSERT(type == XFS_IO_OVERWRITE);
+                trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
+                                            imap);
+                return;
+        }
        if (bh_result->b_private) {
                ioend = bh_result->b_private;
                ASSERT(ioend->io_size > 0);
@@ -1283,7 +1314,8 @@ xfs_map_direct(
                                              ioend->io_size, ioend->io_type,
                                              imap);
        } else if (type == XFS_IO_UNWRITTEN ||
-                   offset + size > i_size_read(inode)) {
+                   offset + size > i_size_read(inode) ||
+                   offset + size < 0) {
                ioend = xfs_alloc_ioend(inode, type);
                ioend->io_offset = offset;
                ioend->io_size = size;
@@ -1345,7 +1377,8 @@ __xfs_get_blocks(
        sector_t                iblock,
        struct buffer_head      *bh_result,
        int                     create,
-        bool                    direct)
+        bool                    direct,
+        bool                    dax_fault)
 {
        struct xfs_inode        *ip = XFS_I(inode);
        struct xfs_mount        *mp = ip->i_mount;
@@ -1393,18 +1426,20 @@ __xfs_get_blocks(
        if (error)
                goto out_unlock;
+        /* for DAX, we convert unwritten extents directly */
        if (create &&
            (!nimaps ||
             (imap.br_startblock == HOLESTARTBLOCK ||
-              imap.br_startblock == DELAYSTARTBLOCK))) {
+              imap.br_startblock == DELAYSTARTBLOCK) ||
+             (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
                if (direct || xfs_get_extsz_hint(ip)) {
                        /*
-                         * Drop the ilock in preparation for starting the block
+                         * xfs_iomap_write_direct() expects the shared lock. It
-                         * allocation transaction.  It will be retaken
+                         * is unlocked on return.
-                         * exclusively inside xfs_iomap_write_direct for the
-                         * actual allocation.
                         */
-                        xfs_iunlock(ip, lockmode);
+                        if (lockmode == XFS_ILOCK_EXCL)
+                                xfs_ilock_demote(ip, lockmode);
                        error = xfs_iomap_write_direct(ip, offset, size,
                                                       &imap, nimaps);
                        if (error)
@@ -1441,6 +1476,12 @@ __xfs_get_blocks(
                goto out_unlock;
        }
+        if (IS_DAX(inode) && create) {
+                ASSERT(!ISUNWRITTEN(&imap));
+                /* zeroing is not needed at a higher layer */
+                new = 0;
+        }
        /* trim mapping down to size requested */
        if (direct || size > (1 << inode->i_blkbits))
                xfs_map_trim_size(inode, iblock, bh_result,
@@ -1458,7 +1499,8 @@ __xfs_get_blocks(
                        set_buffer_unwritten(bh_result);
                /* direct IO needs special help */
                if (create && direct)
-                        xfs_map_direct(inode, bh_result, &imap, offset);
+                        xfs_map_direct(inode, bh_result, &imap, offset,
+                                       dax_fault);
        }
        /*
@@ -1505,7 +1547,7 @@ xfs_get_blocks(
        struct buffer_head      *bh_result,
        int                     create)
 {
-        return __xfs_get_blocks(inode, iblock, bh_result, create, false);
+        return __xfs_get_blocks(inode, iblock, bh_result, create, false, false);
 }
 int
@@ -1515,7 +1557,17 @@ xfs_get_blocks_direct(
        struct buffer_head      *bh_result,
        int                     create)
 {
-        return __xfs_get_blocks(inode, iblock, bh_result, create, true);
+        return __xfs_get_blocks(inode, iblock, bh_result, create, true, false);
+}
+int
+xfs_get_blocks_dax_fault(
+        struct inode            *inode,
+        sector_t                iblock,
+        struct buffer_head      *bh_result,
+        int                     create)
+{
+        return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
 }
 static void
@@ -1614,45 +1666,6 @@ xfs_end_io_direct_write(
        __xfs_end_io_direct_write(inode, ioend, offset, size);
 }
-/*
- * For DAX we need a mapping buffer callback for unwritten extent conversion
- * when page faults allocate blocks and then zero them. Note that in this
- * case the mapping indicated by the ioend may extend beyond EOF. We most
- * definitely do not want to extend EOF here, so we trim back the ioend size to
- * EOF.
- */
-#ifdef CONFIG_FS_DAX
-void
-xfs_end_io_dax_write(
-        struct buffer_head      *bh,
-        int                     uptodate)
-{
-        struct xfs_ioend        *ioend = bh->b_private;
-        struct inode            *inode = ioend->io_inode;
-        ssize_t                 size = ioend->io_size;
-        ASSERT(IS_DAX(ioend->io_inode));
-        /* if there was an error zeroing, then don't convert it */
-        if (!uptodate)
-                ioend->io_error = -EIO;
-        /*
-         * Trim update to EOF, so we don't extend EOF during unwritten extent
-         * conversion of partial EOF blocks.
-         */
-        spin_lock(&XFS_I(inode)->i_flags_lock);
-        if (ioend->io_offset + size > i_size_read(inode))
-                size = i_size_read(inode) - ioend->io_offset;
-        spin_unlock(&XFS_I(inode)->i_flags_lock);
-        __xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size);
-}
-#else
-void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { }
-#endif
 static inline ssize_t
 xfs_vm_do_dio(
        struct inode            *inode,
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 86afd1ac7895..f6ffc9ae5ceb 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -58,7 +58,8 @@ int	xfs_get_blocks(struct inode *inode, sector_t offset,
                       struct buffer_head *map_bh, int create);
 int     xfs_get_blocks_direct(struct inode *inode, sector_t offset,
                              struct buffer_head *map_bh, int create);
-void    xfs_end_io_dax_write(struct buffer_head *bh, int uptodate);
+int     xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
+                                 struct buffer_head *map_bh, int create);
 extern void xfs_count_page_state(struct page *, int *, int *);
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 65fb37a18e92..0ef7c2ed3f8a 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -511,7 +511,7 @@ xfs_attr_list_int(
        xfs_inode_t *dp = context->dp;
        uint            lock_mode;
-        XFS_STATS_INC(xs_attr_list);
+        XFS_STATS_INC(dp->i_mount, xs_attr_list);
        if (XFS_FORCED_SHUTDOWN(dp->i_mount))
                return -EIO;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 3bf4ad0d19e4..dbae6490a79a 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -57,6 +57,35 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
 }
 /*
+ * Routine to zero an extent on disk allocated to the specific inode.
+ *
+ * The VFS functions take a linearised filesystem block offset, so we have to
+ * convert the sparse xfs fsb to the right format first.
+ * VFS types are real funky, too.
+ */
+int
+xfs_zero_extent(
+        struct xfs_inode *ip,
+        xfs_fsblock_t   start_fsb,
+        xfs_off_t       count_fsb)
+{
+        struct xfs_mount *mp = ip->i_mount;
+        xfs_daddr_t     sector = xfs_fsb_to_db(ip, start_fsb);
+        sector_t        block = XFS_BB_TO_FSBT(mp, sector);
+        ssize_t         size = XFS_FSB_TO_B(mp, count_fsb);
+        if (IS_DAX(VFS_I(ip)))
+                return dax_clear_blocks(VFS_I(ip), block, size);
+        /*
+         * let the block layer decide on the fastest method of
+         * implementing the zeroing.
+         */
+        return sb_issue_zeroout(mp->m_super, block, count_fsb, GFP_NOFS);
+}
+/*
 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
 * caller.  Frees all the extents that need freeing, which must be done
 * last due to locking considerations.  We never free any extents in
@@ -229,6 +258,13 @@ xfs_bmap_rtalloc(
                xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
                        ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
                                        XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
+                /* Zero the extent if we were asked to do so */
+                if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) {
+                        error = xfs_zero_extent(ap->ip, ap->blkno, ap->length);
+                        if (error)
+                                return error;
+                }
        } else {
                ap->length = 0;
        }
@@ -1027,7 +1063,7 @@ xfs_alloc_file_space(
                xfs_bmap_init(&free_list, &firstfsb);
                error = xfs_bmapi_write(tp, ip, startoffset_fsb,
                                        allocatesize_fsb, alloc_type, &firstfsb,
-                                        0, imapp, &nimaps, &free_list);
+                                        resblks, imapp, &nimaps, &free_list);
                if (error) {
                        goto error0;
                }
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8ecffb35935b..3243cdf97f33 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -201,7 +201,7 @@ _xfs_buf_alloc(
        atomic_set(&bp->b_pin_count, 0);
        init_waitqueue_head(&bp->b_waiters);
-        XFS_STATS_INC(xb_create);
+        XFS_STATS_INC(target->bt_mount, xb_create);
        trace_xfs_buf_init(bp, _RET_IP_);
        return bp;
@@ -354,15 +354,16 @@ retry:
                         */
                        if (!(++retries % 100))
                                xfs_err(NULL,
-                "possible memory allocation deadlock in %s (mode:0x%x)",
+                "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
+                                        current->comm, current->pid,
                                        __func__, gfp_mask);
-                        XFS_STATS_INC(xb_page_retries);
+                        XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries);
                        congestion_wait(BLK_RW_ASYNC, HZ/50);
                        goto retry;
                }
-                XFS_STATS_INC(xb_page_found);
+                XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found);
                nbytes = min_t(size_t, size, PAGE_SIZE - offset);
                size -= nbytes;
@@ -516,7 +517,7 @@ _xfs_buf_find(
                new_bp->b_pag = pag;
                spin_unlock(&pag->pag_buf_lock);
        } else {
-                XFS_STATS_INC(xb_miss_locked);
+                XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
                spin_unlock(&pag->pag_buf_lock);
                xfs_perag_put(pag);
        }
@@ -529,11 +530,11 @@ found:
        if (!xfs_buf_trylock(bp)) {
                if (flags & XBF_TRYLOCK) {
                        xfs_buf_rele(bp);
-                        XFS_STATS_INC(xb_busy_locked);
+                        XFS_STATS_INC(btp->bt_mount, xb_busy_locked);
                        return NULL;
                }
                xfs_buf_lock(bp);
-                XFS_STATS_INC(xb_get_locked_waited);
+                XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited);
        }
        /*
@@ -549,7 +550,7 @@ found:
        }
        trace_xfs_buf_find(bp, flags, _RET_IP_);
-        XFS_STATS_INC(xb_get_locked);
+        XFS_STATS_INC(btp->bt_mount, xb_get_locked);
        return bp;
 }
@@ -603,7 +604,7 @@ found:
                }
        }
-        XFS_STATS_INC(xb_get);
+        XFS_STATS_INC(target->bt_mount, xb_get);
        trace_xfs_buf_get(bp, flags, _RET_IP_);
        return bp;
 }
@@ -643,7 +644,7 @@ xfs_buf_read_map(
                trace_xfs_buf_read(bp, flags, _RET_IP_);
                if (!XFS_BUF_ISDONE(bp)) {
-                        XFS_STATS_INC(xb_get_read);
+                        XFS_STATS_INC(target->bt_mount, xb_get_read);
                        bp->b_ops = ops;
                        _xfs_buf_read(bp, flags);
                } else if (flags & XBF_ASYNC) {
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index a989a9c7edb7..642d55d10075 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -666,7 +666,7 @@ xfs_readdir(
                return -EIO;
        ASSERT(S_ISDIR(dp->i_d.di_mode));
-        XFS_STATS_INC(xs_dir_getdents);
+        XFS_STATS_INC(dp->i_mount, xs_dir_getdents);
        args.dp = dp;
        args.geo = dp->i_mount->m_dir_geo;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 30cb3afb67f0..7ac6c5c586cb 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -75,9 +75,9 @@ xfs_qm_dqdestroy(
        ASSERT(list_empty(&dqp->q_lru));
        mutex_destroy(&dqp->q_qlock);
-        kmem_zone_free(xfs_qm_dqzone, dqp);
-        XFS_STATS_DEC(xs_qm_dquot);
+        XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot);
+        kmem_zone_free(xfs_qm_dqzone, dqp);
 }
 /*
@@ -605,7 +605,7 @@ xfs_qm_dqread(
                break;
        }
-        XFS_STATS_INC(xs_qm_dquot);
+        XFS_STATS_INC(mp, xs_qm_dquot);
        trace_xfs_dqread(dqp);
@@ -747,12 +747,12 @@ restart:
                mutex_unlock(&qi->qi_tree_lock);
                trace_xfs_dqget_hit(dqp);
-                XFS_STATS_INC(xs_qm_dqcachehits);
+                XFS_STATS_INC(mp, xs_qm_dqcachehits);
                *O_dqpp = dqp;
                return 0;
        }
        mutex_unlock(&qi->qi_tree_lock);
-        XFS_STATS_INC(xs_qm_dqcachemisses);
+        XFS_STATS_INC(mp, xs_qm_dqcachemisses);
        /*
         * Dquot cache miss. We don't want to keep the inode lock across
@@ -806,7 +806,7 @@ restart:
                mutex_unlock(&qi->qi_tree_lock);
                trace_xfs_dqget_dup(dqp);
                xfs_qm_dqdestroy(dqp);
-                XFS_STATS_INC(xs_qm_dquot_dups);
+                XFS_STATS_INC(mp, xs_qm_dquot_dups);
                goto restart;
        }
@@ -846,7 +846,7 @@ xfs_qm_dqput(
                trace_xfs_dqput_free(dqp);
                if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
-                        XFS_STATS_INC(xs_qm_dquot_unused);
+                        XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused);
        }
        xfs_dqunlock(dqp);
 }
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f80e90f95ad8..f5392ab2def1 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -242,19 +242,30 @@ xfs_file_fsync(
        }
        /*
-         * All metadata updates are logged, which means that we just have
+         * All metadata updates are logged, which means that we just have to
-         * to flush the log up to the latest LSN that touched the inode.
+         * flush the log up to the latest LSN that touched the inode. If we have
+         * concurrent fsync/fdatasync() calls, we need them to all block on the
+         * log force before we clear the ili_fsync_fields field. This ensures
+         * that we don't get a racing sync operation that does not wait for the
+         * metadata to hit the journal before returning. If we race with
+         * clearing the ili_fsync_fields, then all that will happen is the log
+         * force will do nothing as the lsn will already be on disk. We can't
+         * race with setting ili_fsync_fields because that is done under
+         * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
+         * until after the ili_fsync_fields is cleared.
         */
        xfs_ilock(ip, XFS_ILOCK_SHARED);
        if (xfs_ipincount(ip)) {
                if (!datasync ||
-                    (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP))
+                    (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
                        lsn = ip->i_itemp->ili_last_lsn;
        }
-        xfs_iunlock(ip, XFS_ILOCK_SHARED);
-        if (lsn)
+        if (lsn) {
                error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
+                ip->i_itemp->ili_fsync_fields = 0;
+        }
+        xfs_iunlock(ip, XFS_ILOCK_SHARED);
        /*
         * If we only have a single device, and the log force about was
@@ -287,7 +298,7 @@ xfs_file_read_iter(
        xfs_fsize_t             n;
        loff_t                  pos = iocb->ki_pos;
-        XFS_STATS_INC(xs_read_calls);
+        XFS_STATS_INC(mp, xs_read_calls);
        if (unlikely(iocb->ki_flags & IOCB_DIRECT))
                ioflags |= XFS_IO_ISDIRECT;
@@ -365,7 +376,7 @@ xfs_file_read_iter(
        ret = generic_file_read_iter(iocb, to);
        if (ret > 0)
-                XFS_STATS_ADD(xs_read_bytes, ret);
+                XFS_STATS_ADD(mp, xs_read_bytes, ret);
        xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
        return ret;
@@ -383,7 +394,7 @@ xfs_file_splice_read(
        int                     ioflags = 0;
        ssize_t                 ret;
-        XFS_STATS_INC(xs_read_calls);
+        XFS_STATS_INC(ip->i_mount, xs_read_calls);
        if (infilp->f_mode & FMODE_NOCMTIME)
                ioflags |= XFS_IO_INVIS;
@@ -401,7 +412,7 @@ xfs_file_splice_read(
        else
                ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
        if (ret > 0)
-                XFS_STATS_ADD(xs_read_bytes, ret);
+                XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
        xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
        return ret;
@@ -482,6 +493,8 @@ xfs_zero_eof(
        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
        ASSERT(offset > isize);
+        trace_xfs_zero_eof(ip, isize, offset - isize);
        /*
         * First handle zeroing the block on which isize resides.
         *
@@ -574,6 +587,7 @@ xfs_file_aio_write_checks(
        struct xfs_inode        *ip = XFS_I(inode);
        ssize_t                 error = 0;
        size_t                  count = iov_iter_count(from);
+        bool                    drained_dio = false;
 restart:
        error = generic_write_checks(iocb, from);
@@ -611,12 +625,13 @@ restart:
                bool    zero = false;
                spin_unlock(&ip->i_flags_lock);
-                if (*iolock == XFS_IOLOCK_SHARED) {
+                if (!drained_dio) {
-                        xfs_rw_iunlock(ip, *iolock);
+                        if (*iolock == XFS_IOLOCK_SHARED) {
-                        *iolock = XFS_IOLOCK_EXCL;
+                                xfs_rw_iunlock(ip, *iolock);
-                        xfs_rw_ilock(ip, *iolock);
+                                *iolock = XFS_IOLOCK_EXCL;
-                        iov_iter_reexpand(from, count);
+                                xfs_rw_ilock(ip, *iolock);
+                                iov_iter_reexpand(from, count);
+                        }
                        /*
                         * We now have an IO submission barrier in place, but
                         * AIO can do EOF updates during IO completion and hence
@@ -626,6 +641,7 @@ restart:
                         * no-op.
                         */
                        inode_dio_wait(inode);
+                        drained_dio = true;
                        goto restart;
                }
                error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
@@ -867,7 +883,7 @@ xfs_file_write_iter(
        ssize_t                 ret;
        size_t                  ocount = iov_iter_count(from);
-        XFS_STATS_INC(xs_write_calls);
+        XFS_STATS_INC(ip->i_mount, xs_write_calls);
        if (ocount == 0)
                return 0;
@@ -883,7 +899,7 @@ xfs_file_write_iter(
        if (ret > 0) {
                ssize_t err;
-                XFS_STATS_ADD(xs_write_bytes, ret);
+                XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
                /* Handle various SYNC-type writes */
                err = generic_write_sync(file, iocb->ki_pos - ret, ret);
@@ -1477,7 +1493,7 @@ xfs_file_llseek(
 *
 * mmap_sem (MM)
 *   sb_start_pagefault(vfs, freeze)
- *     i_mmap_lock (XFS - truncate serialisation)
+ *     i_mmaplock (XFS - truncate serialisation)
 *       page_lock (MM)
 *         i_lock (XFS - extent map serialisation)
 */
@@ -1503,8 +1519,7 @@ xfs_filemap_page_mkwrite(
        xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
        if (IS_DAX(inode)) {
-                ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct,
+                ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault, NULL);
-                                    xfs_end_io_dax_write);
        } else {
                ret = block_page_mkwrite(vma, vmf, xfs_get_blocks);
                ret = block_page_mkwrite_return(ret);
@@ -1538,7 +1553,7 @@ xfs_filemap_fault(
                 * changes to xfs_get_blocks_direct() to map unwritten extent
                 * ioend for conversion on read-only mappings.
                 */
-                ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL);
+                ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault, NULL);
        } else
                ret = filemap_fault(vma, vmf);
        xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
@@ -1546,6 +1561,13 @@ xfs_filemap_fault(
        return ret;
 }
+/*
+ * Similar to xfs_filemap_fault(), the DAX fault path can call into here on
+ * both read and write faults. Hence we need to handle both cases. There is no
+ * ->pmd_mkwrite callout for huge pages, so we have a single function here to
+ * handle both cases here. @flags carries the information on the type of fault
+ * occuring.
+ */
 STATIC int
 xfs_filemap_pmd_fault(
        struct vm_area_struct   *vma,
@@ -1562,15 +1584,54 @@ xfs_filemap_pmd_fault(
        trace_xfs_filemap_pmd_fault(ip);
-        sb_start_pagefault(inode->i_sb);
+        if (flags & FAULT_FLAG_WRITE) {
-        file_update_time(vma->vm_file);
+                sb_start_pagefault(inode->i_sb);
+                file_update_time(vma->vm_file);
+        }
        xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
-        ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_direct,
+        ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault,
-                                    xfs_end_io_dax_write);
+                              NULL);
        xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
-        sb_end_pagefault(inode->i_sb);
+        if (flags & FAULT_FLAG_WRITE)
+                sb_end_pagefault(inode->i_sb);
+        return ret;
+}
+/*
+ * pfn_mkwrite was originally inteneded to ensure we capture time stamp
+ * updates on write faults. In reality, it's need to serialise against
+ * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite()
+ * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault
+ * barrier in place.
+ */
+static int
+xfs_filemap_pfn_mkwrite(
+        struct vm_area_struct   *vma,
+        struct vm_fault         *vmf)
+{
+        struct inode            *inode = file_inode(vma->vm_file);
+        struct xfs_inode        *ip = XFS_I(inode);
+        int                     ret = VM_FAULT_NOPAGE;
+        loff_t                  size;
+        trace_xfs_filemap_pfn_mkwrite(ip);
+        sb_start_pagefault(inode->i_sb);
+        file_update_time(vma->vm_file);
+        /* check if the faulting page hasn't raced with truncate */
+        xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+        size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+        if (vmf->pgoff >= size)
+                ret = VM_FAULT_SIGBUS;
+        xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+        sb_end_pagefault(inode->i_sb);
        return ret;
 }
 static const struct vm_operations_struct xfs_file_vm_ops = {
@@ -1578,6 +1639,7 @@ static const struct vm_operations_struct xfs_file_vm_ops = {
        .pmd_fault      = xfs_filemap_pmd_fault,
        .map_pages      = filemap_map_pages,
        .page_mkwrite   = xfs_filemap_page_mkwrite,
+        .pfn_mkwrite    = xfs_filemap_pfn_mkwrite,
 };
 STATIC int
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 0a326bd64d4e..d7a490f24ead 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -63,7 +63,7 @@ xfs_inode_alloc(
                return NULL;
        }
-        XFS_STATS_INC(vn_active);
+        XFS_STATS_INC(mp, vn_active);
        ASSERT(atomic_read(&ip->i_pincount) == 0);
        ASSERT(!spin_is_locked(&ip->i_flags_lock));
        ASSERT(!xfs_isiflocked(ip));
@@ -129,7 +129,7 @@ xfs_inode_free(
        /* asserts to verify all state is correct here */
        ASSERT(atomic_read(&ip->i_pincount) == 0);
        ASSERT(!xfs_isiflocked(ip));
-        XFS_STATS_DEC(vn_active);
+        XFS_STATS_DEC(ip->i_mount, vn_active);
        call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
 }
@@ -159,7 +159,7 @@ xfs_iget_cache_hit(
        spin_lock(&ip->i_flags_lock);
        if (ip->i_ino != ino) {
                trace_xfs_iget_skip(ip);
-                XFS_STATS_INC(xs_ig_frecycle);
+                XFS_STATS_INC(mp, xs_ig_frecycle);
                error = -EAGAIN;
                goto out_error;
        }
@@ -177,7 +177,7 @@ xfs_iget_cache_hit(
         */
        if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
                trace_xfs_iget_skip(ip);
-                XFS_STATS_INC(xs_ig_frecycle);
+                XFS_STATS_INC(mp, xs_ig_frecycle);
                error = -EAGAIN;
                goto out_error;
        }
@@ -259,7 +259,7 @@ xfs_iget_cache_hit(
                xfs_ilock(ip, lock_flags);
        xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
-        XFS_STATS_INC(xs_ig_found);
+        XFS_STATS_INC(mp, xs_ig_found);
        return 0;
@@ -342,7 +342,7 @@ xfs_iget_cache_miss(
        error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
        if (unlikely(error)) {
                WARN_ON(error != -EEXIST);
-                XFS_STATS_INC(xs_ig_dup);
+                XFS_STATS_INC(mp, xs_ig_dup);
                error = -EAGAIN;
                goto out_preload_end;
        }
@@ -412,7 +412,7 @@ xfs_iget(
        if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
                return -EINVAL;
-        XFS_STATS_INC(xs_ig_attempts);
+        XFS_STATS_INC(mp, xs_ig_attempts);
        /* get the perag structure and ensure that it's inode capable */
        pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
@@ -429,7 +429,7 @@ again:
                        goto out_error_or_again;
        } else {
                rcu_read_unlock();
-                XFS_STATS_INC(xs_ig_missed);
+                XFS_STATS_INC(mp, xs_ig_missed);
                error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
                                                        flags, lock_flags);
@@ -965,7 +965,7 @@ reclaim:
        xfs_ifunlock(ip);
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
-        XFS_STATS_INC(xs_ig_reclaims);
+        XFS_STATS_INC(ip->i_mount, xs_ig_reclaims);
        /*
         * Remove the inode from the per-AG radix tree.
         *
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index dc40a6d5ae0d..8ee393996b7d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2365,6 +2365,7 @@ retry:
                        iip->ili_last_fields = iip->ili_fields;
                        iip->ili_fields = 0;
+                        iip->ili_fsync_fields = 0;
                        iip->ili_logged = 1;
                        xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
                                                &iip->ili_item.li_lsn);
@@ -3271,8 +3272,8 @@ xfs_iflush_cluster(
        }
        if (clcount) {
-                XFS_STATS_INC(xs_icluster_flushcnt);
+                XFS_STATS_INC(mp, xs_icluster_flushcnt);
-                XFS_STATS_ADD(xs_icluster_flushinode, clcount);
+                XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount);
        }
 out_free:
@@ -3345,7 +3346,7 @@ xfs_iflush(
        struct xfs_dinode       *dip;
        int                     error;
-        XFS_STATS_INC(xs_iflush_count);
+        XFS_STATS_INC(mp, xs_iflush_count);
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
        ASSERT(xfs_isiflocked(ip));
@@ -3560,6 +3561,7 @@ xfs_iflush_int(
         */
        iip->ili_last_fields = iip->ili_fields;
        iip->ili_fields = 0;
+        iip->ili_fsync_fields = 0;
        iip->ili_logged = 1;
        xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 62bd80f4edd9..d14b12b8cfef 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -719,6 +719,7 @@ xfs_iflush_abort(
                 * attempted.
                 */
                iip->ili_fields = 0;
+                iip->ili_fsync_fields = 0;
        }
        /*
         * Release the inode's flush lock since we're done with it.
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 488d81254e28..4c7722e325b3 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -34,6 +34,7 @@ typedef struct xfs_inode_log_item {
        unsigned short          ili_logged;        /* flushed logged data */
        unsigned int            ili_last_fields;   /* fields when flushed */
        unsigned int            ili_fields;        /* fields to be logged */
+        unsigned int            ili_fsync_fields;  /* logged since last fsync */
 } xfs_inode_log_item_t;
 static inline int xfs_inode_clean(xfs_inode_t *ip)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index ea7d85af5310..d42738deec6d 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -40,6 +40,7 @@
 #include "xfs_symlink.h"
 #include "xfs_trans.h"
 #include "xfs_pnfs.h"
+#include "xfs_acl.h"
 #include <linux/capability.h>
 #include <linux/dcache.h>
@@ -411,7 +412,7 @@ xfs_attrlist_by_handle(
        if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
                return -EFAULT;
        if (al_hreq.buflen < sizeof(struct attrlist) ||
-            al_hreq.buflen > XATTR_LIST_MAX)
+            al_hreq.buflen > XFS_XATTR_LIST_MAX)
                return -EINVAL;
        /*
@@ -455,7 +456,7 @@ xfs_attrmulti_attr_get(
        unsigned char           *kbuf;
        int                     error = -EFAULT;
-        if (*len > XATTR_SIZE_MAX)
+        if (*len > XFS_XATTR_SIZE_MAX)
                return -EINVAL;
        kbuf = kmem_zalloc_large(*len, KM_SLEEP);
        if (!kbuf)
@@ -482,17 +483,22 @@ xfs_attrmulti_attr_set(
        __uint32_t              flags)
 {
        unsigned char           *kbuf;
+        int                     error;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                return -EPERM;
-        if (len > XATTR_SIZE_MAX)
+        if (len > XFS_XATTR_SIZE_MAX)
                return -EINVAL;
        kbuf = memdup_user(ubuf, len);
        if (IS_ERR(kbuf))
                return PTR_ERR(kbuf);
-        return xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
+        error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
+        if (!error)
+                xfs_forget_acl(inode, name, flags);
+        kfree(kbuf);
+        return error;
 }
 int
@@ -501,9 +507,14 @@ xfs_attrmulti_attr_remove(
        unsigned char           *name,
        __uint32_t              flags)
 {
+        int                     error;
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                return -EPERM;
-        return xfs_attr_remove(XFS_I(inode), name, flags);
+        error = xfs_attr_remove(XFS_I(inode), name, flags);
+        if (!error)
+                xfs_forget_acl(inode, name, flags);
+        return error;
 }
 STATIC int
@@ -1028,7 +1039,7 @@ xfs_ioctl_setattr_xflags(
        xfs_diflags_to_linux(ip);
        xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-        XFS_STATS_INC(xs_ig_attrchg);
+        XFS_STATS_INC(mp, xs_ig_attrchg);
        return 0;
 }
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index b88bdc85dd3d..1a05d8ae327d 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -356,7 +356,7 @@ xfs_compat_attrlist_by_handle(
                           sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
                return -EFAULT;
        if (al_hreq.buflen < sizeof(struct attrlist) ||
-            al_hreq.buflen > XATTR_LIST_MAX)
+            al_hreq.buflen > XFS_XATTR_LIST_MAX)
                return -EINVAL;
        /*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 1f86033171c8..f4f5b43cf647 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -131,20 +131,30 @@ xfs_iomap_write_direct(
        uint            qblocks, resblks, resrtextents;
        int             committed;
        int             error;
+        int             lockmode;
-        error = xfs_qm_dqattach(ip, 0);
+        int             bmapi_flags = XFS_BMAPI_PREALLOC;
-        if (error)
-                return error;
        rt = XFS_IS_REALTIME_INODE(ip);
        extsz = xfs_get_extsz_hint(ip);
+        lockmode = XFS_ILOCK_SHARED;    /* locked by caller */
+        ASSERT(xfs_isilocked(ip, lockmode));
        offset_fsb = XFS_B_TO_FSBT(mp, offset);
        last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
        if ((offset + count) > XFS_ISIZE(ip)) {
+                /*
+                 * Assert that the in-core extent list is present since this can
+                 * call xfs_iread_extents() and we only have the ilock shared.
+                 * This should be safe because the lock was held around a bmapi
+                 * call in the caller and we only need it to access the in-core
+                 * list.
+                 */
+                ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags &
+                                                                XFS_IFEXTENTS);
                error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
                if (error)
-                        return error;
+                        goto out_unlock;
        } else {
                if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
                        last_fsb = MIN(last_fsb, (xfs_fileoff_t)
@@ -174,9 +184,35 @@ xfs_iomap_write_direct(
        }
        /*
+         * Drop the shared lock acquired by the caller, attach the dquot if
+         * necessary and move on to transaction setup.
+         */
+        xfs_iunlock(ip, lockmode);
+        error = xfs_qm_dqattach(ip, 0);
+        if (error)
+                return error;
+        /*
         * Allocate and setup the transaction
         */
        tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+        /*
+         * For DAX, we do not allocate unwritten extents, but instead we zero
+         * the block before we commit the transaction.  Ideally we'd like to do
+         * this outside the transaction context, but if we commit and then crash
+         * we may not have zeroed the blocks and this will be exposed on
+         * recovery of the allocation. Hence we must zero before commit.
+         * Further, if we are mapping unwritten extents here, we need to zero
+         * and convert them to written so that we don't need an unwritten extent
+         * callback for DAX. This also means that we need to be able to dip into
+         * the reserve block pool if there is no space left but we need to do
+         * unwritten extent conversion.
+         */
+        if (IS_DAX(VFS_I(ip))) {
+                bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
+                tp->t_flags |= XFS_TRANS_RESERVE;
+        }
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
                                  resblks, resrtextents);
        /*
@@ -187,7 +223,8 @@ xfs_iomap_write_direct(
                return error;
        }
-        xfs_ilock(ip, XFS_ILOCK_EXCL);
+        lockmode = XFS_ILOCK_EXCL;
+        xfs_ilock(ip, lockmode);
        error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
        if (error)
@@ -202,8 +239,8 @@ xfs_iomap_write_direct(
        xfs_bmap_init(&free_list, &firstfsb);
        nimaps = 1;
        error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
-                                XFS_BMAPI_PREALLOC, &firstfsb, 0,
+                                bmapi_flags, &firstfsb, resblks, imap,
-                                imap, &nimaps, &free_list);
+                                &nimaps, &free_list);
        if (error)
                goto out_bmap_cancel;
@@ -213,6 +250,7 @@ xfs_iomap_write_direct(
        error = xfs_bmap_finish(&tp, &free_list, &committed);
        if (error)
                goto out_bmap_cancel;
        error = xfs_trans_commit(tp);
        if (error)
                goto out_unlock;
@@ -229,7 +267,7 @@ xfs_iomap_write_direct(
                error = xfs_alert_fsblock_zero(ip, imap);
 out_unlock:
-        xfs_iunlock(ip, XFS_ILOCK_EXCL);
+        xfs_iunlock(ip, lockmode);
        return error;
 out_bmap_cancel:
@@ -670,7 +708,7 @@ xfs_iomap_write_allocate(
        count_fsb = imap->br_blockcount;
        map_start_fsb = imap->br_startoff;
-        XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
+        XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
        while (count_fsb != 0) {
                /*
@@ -750,9 +788,9 @@ xfs_iomap_write_allocate(
                         * pointer that the caller gave to us.
                         */
                        error = xfs_bmapi_write(tp, ip, map_start_fsb,
-                                                count_fsb, 0,
+                                                count_fsb, 0, &first_block,
-                                                &first_block, 1,
+                                                nres, imap, &nimaps,
-                                                imap, &nimaps, &free_list);
+                                                &free_list);
                        if (error)
                                goto trans_cancel;
@@ -777,7 +815,7 @@ xfs_iomap_write_allocate(
                if ((offset_fsb >= imap->br_startoff) &&
                    (offset_fsb < (imap->br_startoff +
                                   imap->br_blockcount))) {
-                        XFS_STATS_INC(xs_xstrat_quick);
+                        XFS_STATS_INC(mp, xs_xstrat_quick);
                        return 0;
                }
@@ -866,8 +904,8 @@ xfs_iomap_write_unwritten(
                xfs_bmap_init(&free_list, &firstfsb);
                nimaps = 1;
                error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
-                                  XFS_BMAPI_CONVERT, &firstfsb,
+                                        XFS_BMAPI_CONVERT, &firstfsb, resblks,
-                                  1, &imap, &nimaps, &free_list);
+                                        &imap, &nimaps, &free_list);
                if (error)
                        goto error_on_bmapi_transaction;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 8294132e6a3c..245268a0cdf0 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -695,7 +695,7 @@ xfs_setattr_nonsize(
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-        XFS_STATS_INC(xs_ig_attrchg);
+        XFS_STATS_INC(mp, xs_ig_attrchg);
        if (mp->m_flags & XFS_MOUNT_WSYNC)
                xfs_trans_set_sync(tp);
@@ -922,7 +922,7 @@ xfs_setattr_size(
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-        XFS_STATS_INC(xs_ig_attrchg);
+        XFS_STATS_INC(mp, xs_ig_attrchg);
        if (mp->m_flags & XFS_MOUNT_WSYNC)
                xfs_trans_set_sync(tp);
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 85f883dd6207..ec0e239a0fa9 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -171,6 +171,13 @@ struct xfs_kobj {
        struct completion       complete;
 };
+struct xstats {
+        struct xfsstats __percpu        *xs_stats;
+        struct xfs_kobj                 xs_kobj;
+};
+extern struct xstats xfsstats;
 /* Kernel uid/gid conversion. These are used to convert to/from the on disk
 * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally.
 * The conversion here is type only, the value will remain the same since we
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index aaadee0969c9..f52c72a1a06f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -268,7 +268,7 @@ xlog_grant_head_wait(
                __set_current_state(TASK_UNINTERRUPTIBLE);
                spin_unlock(&head->lock);
-                XFS_STATS_INC(xs_sleep_logspace);
+                XFS_STATS_INC(log->l_mp, xs_sleep_logspace);
                trace_xfs_log_grant_sleep(log, tic);
                schedule();
@@ -379,7 +379,7 @@ xfs_log_regrant(
        if (XLOG_FORCED_SHUTDOWN(log))
                return -EIO;
-        XFS_STATS_INC(xs_try_logspace);
+        XFS_STATS_INC(mp, xs_try_logspace);
        /*
         * This is a new transaction on the ticket, so we need to change the
@@ -448,7 +448,7 @@ xfs_log_reserve(
        if (XLOG_FORCED_SHUTDOWN(log))
                return -EIO;
-        XFS_STATS_INC(xs_try_logspace);
+        XFS_STATS_INC(mp, xs_try_logspace);
        ASSERT(*ticp == NULL);
        tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
@@ -1768,7 +1768,7 @@ xlog_sync(
        int             v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
        int             size;
-        XFS_STATS_INC(xs_log_writes);
+        XFS_STATS_INC(log->l_mp, xs_log_writes);
        ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
        /* Add for LR header */
@@ -1805,7 +1805,7 @@ xlog_sync(
        bp = iclog->ic_bp;
        XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
-        XFS_STATS_ADD(xs_log_blocks, BTOBB(count));
+        XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count));
        /* Do we need to split this write into 2 parts? */
        if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) {
@@ -2422,11 +2422,20 @@ xlog_write(
                                                     &partial_copy_len);
                        xlog_verify_dest_ptr(log, ptr);
-                        /* copy region */
+                        /*
+                         * Copy region.
+                         *
+                         * Unmount records just log an opheader, so can have
+                         * empty payloads with no data region to copy. Hence we
+                         * only copy the payload if the vector says it has data
+                         * to copy.
+                         */
                        ASSERT(copy_len >= 0);
-                        memcpy(ptr, reg->i_addr + copy_off, copy_len);
+                        if (copy_len > 0) {
-                        xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len);
+                                memcpy(ptr, reg->i_addr + copy_off, copy_len);
+                                xlog_write_adv_cnt(&ptr, &len, &log_offset,
+                                                   copy_len);
+                        }
                        copy_len += start_rec_copy + sizeof(xlog_op_header_t);
                        record_cnt++;
                        data_cnt += contwr ? copy_len : 0;
@@ -2913,7 +2922,7 @@ restart:
        iclog = log->l_iclog;
        if (iclog->ic_state != XLOG_STATE_ACTIVE) {
-                XFS_STATS_INC(xs_log_noiclogs);
+                XFS_STATS_INC(log->l_mp, xs_log_noiclogs);
                /* Wait for log writes to have flushed */
                xlog_wait(&log->l_flush_wait, &log->l_icloglock);
@@ -3165,11 +3174,19 @@ xlog_state_switch_iclogs(
        }
        if (log->l_curr_block >= log->l_logBBsize) {
+                /*
+                 * Rewind the current block before the cycle is bumped to make
+                 * sure that the combined LSN never transiently moves forward
+                 * when the log wraps to the next cycle. This is to support the
+                 * unlocked sample of these fields from xlog_valid_lsn(). Most
+                 * other cases should acquire l_icloglock.
+                 */
+                log->l_curr_block -= log->l_logBBsize;
+                ASSERT(log->l_curr_block >= 0);
+                smp_wmb();
                log->l_curr_cycle++;
                if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM)
                        log->l_curr_cycle++;
-                log->l_curr_block -= log->l_logBBsize;
-                ASSERT(log->l_curr_block >= 0);
        }
        ASSERT(iclog == log->l_iclog);
        log->l_iclog = iclog->ic_next;
@@ -3212,7 +3229,7 @@ _xfs_log_force(
        struct xlog_in_core     *iclog;
        xfs_lsn_t               lsn;
-        XFS_STATS_INC(xs_log_force);
+        XFS_STATS_INC(mp, xs_log_force);
        xlog_cil_force(log);
@@ -3297,7 +3314,7 @@ maybe_sleep:
                        spin_unlock(&log->l_icloglock);
                        return -EIO;
                }
-                XFS_STATS_INC(xs_log_force_sleep);
+                XFS_STATS_INC(mp, xs_log_force_sleep);
                xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
                /*
                 * No need to grab the log lock here since we're
@@ -3362,7 +3379,7 @@ _xfs_log_force_lsn(
        ASSERT(lsn != 0);
-        XFS_STATS_INC(xs_log_force);
+        XFS_STATS_INC(mp, xs_log_force);
        lsn = xlog_cil_force_lsn(log, lsn);
        if (lsn == NULLCOMMITLSN)
@@ -3411,7 +3428,7 @@ try_again:
                             (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
                                ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
-                                XFS_STATS_INC(xs_log_force_sleep);
+                                XFS_STATS_INC(mp, xs_log_force_sleep);
                                xlog_wait(&iclog->ic_prev->ic_write_wait,
                                                        &log->l_icloglock);
@@ -3441,7 +3458,7 @@ try_again:
                                spin_unlock(&log->l_icloglock);
                                return -EIO;
                        }
-                        XFS_STATS_INC(xs_log_force_sleep);
+                        XFS_STATS_INC(mp, xs_log_force_sleep);
                        xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
                        /*
                         * No need to grab the log lock here since we're
@@ -4023,3 +4040,45 @@ xlog_iclogs_empty(
        return 1;
 }
+/*
+ * Verify that an LSN stamped into a piece of metadata is valid. This is
+ * intended for use in read verifiers on v5 superblocks.
+ */
+bool
+xfs_log_check_lsn(
+        struct xfs_mount        *mp,
+        xfs_lsn_t               lsn)
+{
+        struct xlog             *log = mp->m_log;
+        bool                    valid;
+        /*
+         * norecovery mode skips mount-time log processing and unconditionally
+         * resets the in-core LSN. We can't validate in this mode, but
+         * modifications are not allowed anyways so just return true.
+         */
+        if (mp->m_flags & XFS_MOUNT_NORECOVERY)
+                return true;
+        /*
+         * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is
+         * handled by recovery and thus safe to ignore here.
+         */
+        if (lsn == NULLCOMMITLSN)
+                return true;
+        valid = xlog_valid_lsn(mp->m_log, lsn);
+        /* warn the user about what's gone wrong before verifier failure */
+        if (!valid) {
+                spin_lock(&log->l_icloglock);
+                xfs_warn(mp,
+"Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). "
+"Please unmount and run xfs_repair (>= v4.3) to resolve.",
+                         CYCLE_LSN(lsn), BLOCK_LSN(lsn),
+                         log->l_curr_cycle, log->l_curr_block);
+                spin_unlock(&log->l_icloglock);
+        }
+        return valid;
+}
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 09d91d3166cd..aa533a7d50f2 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -181,5 +181,6 @@ bool	xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
 void    xfs_log_work_queue(struct xfs_mount *mp);
 void    xfs_log_worker(struct work_struct *work);
 void    xfs_log_quiesce(struct xfs_mount *mp);
+bool    xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
 #endif  /* __XFS_LOG_H__ */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 950f3f94720c..8daba7491b13 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -560,4 +560,55 @@ static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
        remove_wait_queue(wq, &wait);
 }
+/*
+ * The LSN is valid so long as it is behind the current LSN. If it isn't, this
+ * means that the next log record that includes this metadata could have a
+ * smaller LSN. In turn, this means that the modification in the log would not
+ * replay.
+ */
+static inline bool
+xlog_valid_lsn(
+        struct xlog     *log,
+        xfs_lsn_t       lsn)
+{
+        int             cur_cycle;
+        int             cur_block;
+        bool            valid = true;
+        /*
+         * First, sample the current lsn without locking to avoid added
+         * contention from metadata I/O. The current cycle and block are updated
+         * (in xlog_state_switch_iclogs()) and read here in a particular order
+         * to avoid false negatives (e.g., thinking the metadata LSN is valid
+         * when it is not).
+         *
+         * The current block is always rewound before the cycle is bumped in
+         * xlog_state_switch_iclogs() to ensure the current LSN is never seen in
+         * a transiently forward state. Instead, we can see the LSN in a
+         * transiently behind state if we happen to race with a cycle wrap.
+         */
+        cur_cycle = ACCESS_ONCE(log->l_curr_cycle);
+        smp_rmb();
+        cur_block = ACCESS_ONCE(log->l_curr_block);
+        if ((CYCLE_LSN(lsn) > cur_cycle) ||
+            (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) {
+                /*
+                 * If the metadata LSN appears invalid, it's possible the check
+                 * above raced with a wrap to the next log cycle. Grab the lock
+                 * to check for sure.
+                 */
+                spin_lock(&log->l_icloglock);
+                cur_cycle = log->l_curr_cycle;
+                cur_block = log->l_curr_block;
+                spin_unlock(&log->l_icloglock);
+                if ((CYCLE_LSN(lsn) > cur_cycle) ||
+                    (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block))
+                        valid = false;
+        }
+        return valid;
+}
 #endif  /* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 512a0945d52a..c5ecaacdd218 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3431,7 +3431,7 @@ xlog_recover_add_to_cont_trans(
         * previous record. Copy the rest of the header.
         */
        if (list_empty(&trans->r_itemq)) {
-                ASSERT(len < sizeof(struct xfs_trans_header));
+                ASSERT(len <= sizeof(struct xfs_trans_header));
                if (len > sizeof(struct xfs_trans_header)) {
                        xfs_warn(log->l_mp, "%s: bad header length", __func__);
                        return -EIO;
@@ -4609,9 +4609,19 @@ xlog_recover(
        int             error;
        /* find the tail of the log */
-        if ((error = xlog_find_tail(log, &head_blk, &tail_blk)))
+        error = xlog_find_tail(log, &head_blk, &tail_blk);
+        if (error)
                return error;
+        /*
+         * The superblock was read before the log was available and thus the LSN
+         * could not be verified. Check the superblock LSN against the current
+         * LSN now that it's known.
+         */
+        if (xfs_sb_version_hascrc(&log->l_mp->m_sb) &&
+            !xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn))
+                return -EINVAL;
        if (tail_blk != head_blk) {
                /* There used to be a comment here:
                 *
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index d8b67547ab34..11792d888e4e 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -17,6 +17,7 @@
 #include "xfs.h"
 #include "xfs_fs.h"
+#include "xfs_error.h"
 #include "xfs_format.h"
 #include "xfs_log_format.h"
 #include "xfs_trans_resv.h"
@@ -43,6 +44,7 @@ void func(const struct xfs_mount *mp, const char *fmt, ...)	\
 {                                                               \
        struct va_format        vaf;                            \
        va_list                 args;                           \
+        int                     level;                          \
                                                                \
        va_start(args, fmt);                                    \
                                                                \
@@ -51,6 +53,11 @@ void func(const struct xfs_mount *mp, const char *fmt, ...)	\
                                                                \
        __xfs_printk(kern_level, mp, &vaf);                     \
        va_end(args);                                           \
+                                                                \
+        if (!kstrtoint(kern_level, 0, &level) &&                \
+            level <= LOGLEVEL_ERR &&                            \
+            xfs_error_level >= XFS_ERRLEVEL_HIGH)               \
+                xfs_stack_trace();                              \
 }                                                               \
 define_xfs_printk_level(xfs_emerg, KERN_EMERG);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index bf92e0c037c7..bb753b359bee 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -47,6 +47,16 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex);
 static int xfs_uuid_table_size;
 static uuid_t *xfs_uuid_table;
+void
+xfs_uuid_table_free(void)
+{
+        if (xfs_uuid_table_size == 0)
+                return;
+        kmem_free(xfs_uuid_table);
+        xfs_uuid_table = NULL;
+        xfs_uuid_table_size = 0;
+}
 /*
 * See if the UUID is unique among mounted XFS filesystems.
 * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
@@ -693,10 +703,15 @@ xfs_mountfs(
        if (error)
                goto out;
-        error = xfs_uuid_mount(mp);
+        error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype,
+                               &mp->m_kobj, "stats");
        if (error)
                goto out_remove_sysfs;
+        error = xfs_uuid_mount(mp);
+        if (error)
+                goto out_del_stats;
        /*
         * Set the minimum read and write sizes
         */
@@ -971,6 +986,8 @@ xfs_mountfs(
        xfs_da_unmount(mp);
 out_remove_uuid:
        xfs_uuid_unmount(mp);
+ out_del_stats:
+        xfs_sysfs_del(&mp->m_stats.xs_kobj);
 out_remove_sysfs:
        xfs_sysfs_del(&mp->m_kobj);
 out:
@@ -1047,6 +1064,7 @@ xfs_unmountfs(
                xfs_warn(mp, "Unable to update superblock counters. "
                                "Freespace may not be correct on next mount.");
        xfs_log_unmount(mp);
        xfs_da_unmount(mp);
        xfs_uuid_unmount(mp);
@@ -1056,6 +1074,7 @@ xfs_unmountfs(
 #endif
        xfs_free_perag(mp);
+        xfs_sysfs_del(&mp->m_stats.xs_kobj);
        xfs_sysfs_del(&mp->m_kobj);
 }
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7999e91cd49a..b57098481c10 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -127,6 +127,7 @@ typedef struct xfs_mount {
        int64_t                 m_low_space[XFS_LOWSP_MAX];
                                                /* low free space thresholds */
        struct xfs_kobj         m_kobj;
+        struct xstats           m_stats;        /* per-fs stats */
        struct workqueue_struct *m_buf_workqueue;
        struct workqueue_struct *m_data_workqueue;
@@ -312,6 +313,7 @@ typedef struct xfs_perag {
        int             pagb_count;     /* pagb slots in use */
 } xfs_perag_t;
+extern void     xfs_uuid_table_free(void);
 extern int      xfs_log_sbcount(xfs_mount_t *);
 extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
 extern int      xfs_mountfs(xfs_mount_t *mp);
@@ -336,4 +338,7 @@ extern int	xfs_dev_is_read_only(struct xfs_mount *, char *);
 extern void     xfs_set_low_space_thresholds(struct xfs_mount *);
+int     xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
+                        xfs_off_t count_fsb);
 #endif  /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index ab4a6066f7ca..dc6221942b85 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -181,6 +181,11 @@ xfs_fs_map_blocks(
                ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
                if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) {
+                        /*
+                         * xfs_iomap_write_direct() expects to take ownership of
+                         * the shared ilock.
+                         */
+                        xfs_ilock(ip, XFS_ILOCK_SHARED);
                        error = xfs_iomap_write_direct(ip, offset, length,
                                                       &imap, nimaps);
                        if (error)
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 587174fd4f2c..532ab79d38fe 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -184,7 +184,7 @@ xfs_qm_dqpurge(
         */
        ASSERT(!list_empty(&dqp->q_lru));
        list_lru_del(&qi->qi_lru, &dqp->q_lru);
-        XFS_STATS_DEC(xs_qm_dquot_unused);
+        XFS_STATS_DEC(mp, xs_qm_dquot_unused);
        xfs_qm_dqdestroy(dqp);
        return 0;
@@ -448,11 +448,11 @@ xfs_qm_dquot_isolate(
         */
        if (dqp->q_nrefs) {
                xfs_dqunlock(dqp);
-                XFS_STATS_INC(xs_qm_dqwants);
+                XFS_STATS_INC(dqp->q_mount, xs_qm_dqwants);
                trace_xfs_dqreclaim_want(dqp);
                list_lru_isolate(lru, &dqp->q_lru);
-                XFS_STATS_DEC(xs_qm_dquot_unused);
+                XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
                return LRU_REMOVED;
        }
@@ -496,19 +496,19 @@ xfs_qm_dquot_isolate(
        ASSERT(dqp->q_nrefs == 0);
        list_lru_isolate_move(lru, &dqp->q_lru, &isol->dispose);
-        XFS_STATS_DEC(xs_qm_dquot_unused);
+        XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
        trace_xfs_dqreclaim_done(dqp);
-        XFS_STATS_INC(xs_qm_dqreclaims);
+        XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims);
        return LRU_REMOVED;
 out_miss_busy:
        trace_xfs_dqreclaim_busy(dqp);
-        XFS_STATS_INC(xs_qm_dqreclaim_misses);
+        XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
        return LRU_SKIP;
 out_unlock_dirty:
        trace_xfs_dqreclaim_busy(dqp);
-        XFS_STATS_INC(xs_qm_dqreclaim_misses);
+        XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
        xfs_dqunlock(dqp);
        spin_lock(lru_lock);
        return LRU_RETRY;
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index f2240383d4bb..8686df6c7609 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -18,20 +18,21 @@
 #include "xfs.h"
 #include <linux/proc_fs.h>
-DEFINE_PER_CPU(struct xfsstats, xfsstats);
+struct xstats xfsstats;
-static int counter_val(int idx)
+static int counter_val(struct xfsstats __percpu *stats, int idx)
 {
        int val = 0, cpu;
        for_each_possible_cpu(cpu)
-                val += *(((__u32 *)&per_cpu(xfsstats, cpu) + idx));
+                val += *(((__u32 *)per_cpu_ptr(stats, cpu) + idx));
        return val;
 }
-static int xfs_stat_proc_show(struct seq_file *m, void *v)
+int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
 {
        int             i, j;
+        int             len = 0;
        __uint64_t      xs_xstrat_bytes = 0;
        __uint64_t      xs_write_bytes = 0;
        __uint64_t      xs_read_bytes = 0;
@@ -65,54 +66,59 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
        };
        /* Loop over all stats groups */
        for (i = j = 0; i < ARRAY_SIZE(xstats); i++) {
-                seq_printf(m, "%s", xstats[i].desc);
+                len += snprintf(buf + len, PATH_MAX - len, "%s",
+                                xstats[i].desc);
                /* inner loop does each group */
                for (; j < xstats[i].endpoint; j++)
-                        seq_printf(m, " %u", counter_val(j));
+                        len += snprintf(buf + len, PATH_MAX - len, " %u",
-                seq_putc(m, '\n');
+                                        counter_val(stats, j));
+                len += snprintf(buf + len, PATH_MAX - len, "\n");
        }
        /* extra precision counters */
        for_each_possible_cpu(i) {
-                xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
+                xs_xstrat_bytes += per_cpu_ptr(stats, i)->xs_xstrat_bytes;
-                xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
+                xs_write_bytes += per_cpu_ptr(stats, i)->xs_write_bytes;
-                xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
+                xs_read_bytes += per_cpu_ptr(stats, i)->xs_read_bytes;
        }
-        seq_printf(m, "xpc %Lu %Lu %Lu\n",
+        len += snprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
                        xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
-        seq_printf(m, "debug %u\n",
+        len += snprintf(buf + len, PATH_MAX-len, "debug %u\n",
 #if defined(DEBUG)
                1);
 #else
                0);
 #endif
-        return 0;
+        return len;
 }
-static int xfs_stat_proc_open(struct inode *inode, struct file *file)
+void xfs_stats_clearall(struct xfsstats __percpu *stats)
 {
-        return single_open(file, xfs_stat_proc_show, NULL);
+        int             c;
+        __uint32_t      vn_active;
+        xfs_notice(NULL, "Clearing xfsstats");
+        for_each_possible_cpu(c) {
+                preempt_disable();
+                /* save vn_active, it's a universal truth! */
+                vn_active = per_cpu_ptr(stats, c)->vn_active;
+                memset(per_cpu_ptr(stats, c), 0, sizeof(*stats));
+                per_cpu_ptr(stats, c)->vn_active = vn_active;
+                preempt_enable();
+        }
 }
-static const struct file_operations xfs_stat_proc_fops = {
-        .owner          = THIS_MODULE,
-        .open           = xfs_stat_proc_open,
-        .read           = seq_read,
-        .llseek         = seq_lseek,
-        .release        = single_release,
-};
 /* legacy quota interfaces */
 #ifdef CONFIG_XFS_QUOTA
 static int xqm_proc_show(struct seq_file *m, void *v)
 {
        /* maximum; incore; ratio free to inuse; freelist */
        seq_printf(m, "%d\t%d\t%d\t%u\n",
-                        0,
+                   0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT),
-                        counter_val(XFSSTAT_END_XQMSTAT),
+                   0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT + 1));
-                        0,
-                        counter_val(XFSSTAT_END_XQMSTAT + 1));
        return 0;
 }
@@ -136,7 +142,7 @@ static int xqmstat_proc_show(struct seq_file *m, void *v)
        seq_printf(m, "qm");
        for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++)
-                seq_printf(m, " %u", counter_val(j));
+                seq_printf(m, " %u", counter_val(xfsstats.xs_stats, j));
        seq_putc(m, '\n');
        return 0;
 }
@@ -155,44 +161,35 @@ static const struct file_operations xqmstat_proc_fops = {
 };
 #endif /* CONFIG_XFS_QUOTA */
+#ifdef CONFIG_PROC_FS
 int
 xfs_init_procfs(void)
 {
        if (!proc_mkdir("fs/xfs", NULL))
+                return -ENOMEM;
+        if (!proc_symlink("fs/xfs/stat", NULL,
+                          "/sys/fs/xfs/stats/stats"))
                goto out;
-        if (!proc_create("fs/xfs/stat", 0, NULL,
-                         &xfs_stat_proc_fops))
-                goto out_remove_xfs_dir;
 #ifdef CONFIG_XFS_QUOTA
        if (!proc_create("fs/xfs/xqmstat", 0, NULL,
                         &xqmstat_proc_fops))
-                goto out_remove_stat_file;
+                goto out;
        if (!proc_create("fs/xfs/xqm", 0, NULL,
                         &xqm_proc_fops))
-                goto out_remove_xqmstat_file;
+                goto out;
 #endif
        return 0;
-#ifdef CONFIG_XFS_QUOTA
+out:
- out_remove_xqmstat_file:
+        remove_proc_subtree("fs/xfs", NULL);
-        remove_proc_entry("fs/xfs/xqmstat", NULL);
- out_remove_stat_file:
-        remove_proc_entry("fs/xfs/stat", NULL);
-#endif
- out_remove_xfs_dir:
-        remove_proc_entry("fs/xfs", NULL);
- out:
        return -ENOMEM;
 }
 void
 xfs_cleanup_procfs(void)
 {
-#ifdef CONFIG_XFS_QUOTA
+        remove_proc_subtree("fs/xfs", NULL);
-        remove_proc_entry("fs/xfs/xqm", NULL);
-        remove_proc_entry("fs/xfs/xqmstat", NULL);
-#endif
-        remove_proc_entry("fs/xfs/stat", NULL);
-        remove_proc_entry("fs/xfs", NULL);
 }
+#endif /* CONFIG_PROC_FS */
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index c8f238b8299a..483b0eff1988 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -19,8 +19,6 @@
 #define __XFS_STATS_H__
-#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
 #include <linux/percpu.h>
 /*
@@ -215,15 +213,29 @@ struct xfsstats {
        __uint64_t              xs_read_bytes;
 };
-DECLARE_PER_CPU(struct xfsstats, xfsstats);
+int xfs_stats_format(struct xfsstats __percpu *stats, char *buf);
+void xfs_stats_clearall(struct xfsstats __percpu *stats);
+extern struct xstats xfsstats;
-/*
+#define XFS_STATS_INC(mp, v)                                    \
- * We don't disable preempt, not too worried about poking the
+do {                                                            \
- * wrong CPU's stat for now (also aggregated before reporting).
+        per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v++;     \
- */
+        per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v++;  \
-#define XFS_STATS_INC(v)        (per_cpu(xfsstats, current_cpu()).v++)
+} while (0)
-#define XFS_STATS_DEC(v)        (per_cpu(xfsstats, current_cpu()).v--)
-#define XFS_STATS_ADD(v, inc)   (per_cpu(xfsstats, current_cpu()).v += (inc))
+#define XFS_STATS_DEC(mp, v)                                    \
+do {                                                            \
+        per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v--;     \
+        per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v--;  \
+} while (0)
+#define XFS_STATS_ADD(mp, v, inc)                                       \
+do {                                                                    \
+        per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v += (inc);      \
+        per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v += (inc);   \
+} while (0)
+#if defined(CONFIG_PROC_FS)
 extern int xfs_init_procfs(void);
 extern void xfs_cleanup_procfs(void);
@@ -231,10 +243,6 @@ extern void xfs_cleanup_procfs(void);
 #else   /* !CONFIG_PROC_FS */
-# define XFS_STATS_INC(count)
-# define XFS_STATS_DEC(count)
-# define XFS_STATS_ADD(count, inc)
 static inline int xfs_init_procfs(void)
 {
        return 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 904f637cfa5f..36bd8825bfb0 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -838,17 +838,18 @@ xfs_init_mount_workqueues(
                goto out_destroy_unwritten;
        mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
-                        WQ_FREEZABLE, 0, mp->m_fsname);
+                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
        if (!mp->m_reclaim_workqueue)
                goto out_destroy_cil;
        mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
-                        WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname);
+                        WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
+                        mp->m_fsname);
        if (!mp->m_log_workqueue)
                goto out_destroy_reclaim;
        mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
-                        WQ_FREEZABLE, 0, mp->m_fsname);
+                        WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
        if (!mp->m_eofblocks_workqueue)
                goto out_destroy_log;
@@ -922,7 +923,7 @@ xfs_fs_destroy_inode(
        trace_xfs_destroy_inode(ip);
-        XFS_STATS_INC(vn_reclaim);
+        XFS_STATS_INC(ip->i_mount, vn_reclaim);
        ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
@@ -983,8 +984,8 @@ xfs_fs_evict_inode(
        truncate_inode_pages_final(&inode->i_data);
        clear_inode(inode);
-        XFS_STATS_INC(vn_rele);
+        XFS_STATS_INC(ip->i_mount, vn_rele);
-        XFS_STATS_INC(vn_remove);
+        XFS_STATS_INC(ip->i_mount, vn_remove);
        xfs_inactive(ip);
 }
@@ -1474,9 +1475,16 @@ xfs_fs_fill_super(
        if (error)
                goto out_destroy_workqueues;
+        /* Allocate stats memory before we do operations that might use it */
+        mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
+        if (!mp->m_stats.xs_stats) {
+                error = -ENOMEM;
+                goto out_destroy_counters;
+        }
        error = xfs_readsb(mp, flags);
        if (error)
-                goto out_destroy_counters;
+                goto out_free_stats;
        error = xfs_finish_flags(mp);
        if (error)
@@ -1545,9 +1553,11 @@ xfs_fs_fill_super(
        xfs_filestream_unmount(mp);
 out_free_sb:
        xfs_freesb(mp);
+ out_free_stats:
+        free_percpu(mp->m_stats.xs_stats);
 out_destroy_counters:
        xfs_destroy_percpu_counters(mp);
-out_destroy_workqueues:
+ out_destroy_workqueues:
        xfs_destroy_mount_workqueues(mp);
 out_close_devices:
        xfs_close_devices(mp);
@@ -1574,6 +1584,7 @@ xfs_fs_put_super(
        xfs_unmountfs(mp);
        xfs_freesb(mp);
+        free_percpu(mp->m_stats.xs_stats);
        xfs_destroy_percpu_counters(mp);
        xfs_destroy_mount_workqueues(mp);
        xfs_close_devices(mp);
@@ -1838,19 +1849,32 @@ init_xfs_fs(void)
        xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
        if (!xfs_kset) {
                error = -ENOMEM;
-                goto out_sysctl_unregister;;
+                goto out_sysctl_unregister;
        }
+        xfsstats.xs_kobj.kobject.kset = xfs_kset;
+        xfsstats.xs_stats = alloc_percpu(struct xfsstats);
+        if (!xfsstats.xs_stats) {
+                error = -ENOMEM;
+                goto out_kset_unregister;
+        }
+        error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
+                               "stats");
+        if (error)
+                goto out_free_stats;
 #ifdef DEBUG
        xfs_dbg_kobj.kobject.kset = xfs_kset;
        error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
        if (error)
-                goto out_kset_unregister;
+                goto out_remove_stats_kobj;
 #endif
        error = xfs_qm_init();
        if (error)
-                goto out_remove_kobj;
+                goto out_remove_dbg_kobj;
        error = register_filesystem(&xfs_fs_type);
        if (error)
@@ -1859,11 +1883,15 @@ init_xfs_fs(void)
 out_qm_exit:
        xfs_qm_exit();
- out_remove_kobj:
+ out_remove_dbg_kobj:
 #ifdef DEBUG
        xfs_sysfs_del(&xfs_dbg_kobj);
- out_kset_unregister:
+ out_remove_stats_kobj:
 #endif
+        xfs_sysfs_del(&xfsstats.xs_kobj);
+ out_free_stats:
+        free_percpu(xfsstats.xs_stats);
+ out_kset_unregister:
        kset_unregister(xfs_kset);
 out_sysctl_unregister:
        xfs_sysctl_unregister();
@@ -1889,6 +1917,8 @@ exit_xfs_fs(void)
 #ifdef DEBUG
        xfs_sysfs_del(&xfs_dbg_kobj);
 #endif
+        xfs_sysfs_del(&xfsstats.xs_kobj);
+        free_percpu(xfsstats.xs_stats);
        kset_unregister(xfs_kset);
        xfs_sysctl_unregister();
        xfs_cleanup_procfs();
@@ -1896,6 +1926,7 @@ exit_xfs_fs(void)
        xfs_mru_cache_uninit();
        xfs_destroy_workqueues();
        xfs_destroy_zones();
+        xfs_uuid_table_free();
 }
 module_init(init_xfs_fs);
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index a0c8067cea6f..aed74d3f8da9 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -19,6 +19,7 @@
 #include <linux/sysctl.h>
 #include <linux/proc_fs.h>
 #include "xfs_error.h"
+#include "xfs_stats.h"
 static struct ctl_table_header *xfs_table_header;
@@ -31,22 +32,12 @@ xfs_stats_clear_proc_handler(
        size_t                  *lenp,
        loff_t                  *ppos)
 {
-        int             c, ret, *valp = ctl->data;
+        int             ret, *valp = ctl->data;
-        __uint32_t      vn_active;
        ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
        if (!ret && write && *valp) {
-                xfs_notice(NULL, "Clearing xfsstats");
+                xfs_stats_clearall(xfsstats.xs_stats);
-                for_each_possible_cpu(c) {
-                        preempt_disable();
-                        /* save vn_active, it's a universal truth! */
-                        vn_active = per_cpu(xfsstats, c).vn_active;
-                        memset(&per_cpu(xfsstats, c), 0,
-                               sizeof(struct xfsstats));
-                        per_cpu(xfsstats, c).vn_active = vn_active;
-                        preempt_enable();
-                }
                xfs_stats_clear = 0;
        }
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index aa03670851d8..ee70f5dec9dc 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -21,11 +21,13 @@
 #include "xfs_log_format.h"
 #include "xfs_log.h"
 #include "xfs_log_priv.h"
+#include "xfs_stats.h"
 struct xfs_sysfs_attr {
        struct attribute attr;
-        ssize_t (*show)(char *buf, void *data);
+        ssize_t (*show)(struct kobject *kobject, char *buf);
-        ssize_t (*store)(const char *buf, size_t count, void *data);
+        ssize_t (*store)(struct kobject *kobject, const char *buf,
+                         size_t count);
 };
 static inline struct xfs_sysfs_attr *
@@ -38,6 +40,8 @@ to_attr(struct attribute *attr)
        static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name)
 #define XFS_SYSFS_ATTR_RO(name) \
        static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name)
+#define XFS_SYSFS_ATTR_WO(name) \
+        static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_WO(name)
 #define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
@@ -51,14 +55,42 @@ struct kobj_type xfs_mp_ktype = {
        .release = xfs_sysfs_release,
 };
+STATIC ssize_t
+xfs_sysfs_object_show(
+        struct kobject          *kobject,
+        struct attribute        *attr,
+        char                    *buf)
+{
+        struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+        return xfs_attr->show ? xfs_attr->show(kobject, buf) : 0;
+}
+STATIC ssize_t
+xfs_sysfs_object_store(
+        struct kobject          *kobject,
+        struct attribute        *attr,
+        const char              *buf,
+        size_t                  count)
+{
+        struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+        return xfs_attr->store ? xfs_attr->store(kobject, buf, count) : 0;
+}
+static const struct sysfs_ops xfs_sysfs_ops = {
+        .show = xfs_sysfs_object_show,
+        .store = xfs_sysfs_object_store,
+};
 #ifdef DEBUG
 /* debug */
 STATIC ssize_t
 log_recovery_delay_store(
+        struct kobject  *kobject,
        const char      *buf,
-        size_t          count,
+        size_t          count)
-        void            *data)
 {
        int             ret;
        int             val;
@@ -77,8 +109,8 @@ log_recovery_delay_store(
 STATIC ssize_t
 log_recovery_delay_show(
-        char    *buf,
+        struct kobject  *kobject,
-        void    *data)
+        char            *buf)
 {
        return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay);
 }
@@ -89,52 +121,87 @@ static struct attribute *xfs_dbg_attrs[] = {
        NULL,
 };
+struct kobj_type xfs_dbg_ktype = {
+        .release = xfs_sysfs_release,
+        .sysfs_ops = &xfs_sysfs_ops,
+        .default_attrs = xfs_dbg_attrs,
+};
+#endif /* DEBUG */
+/* stats */
+static inline struct xstats *
+to_xstats(struct kobject *kobject)
+{
+        struct xfs_kobj *kobj = to_kobj(kobject);
+        return container_of(kobj, struct xstats, xs_kobj);
+}
 STATIC ssize_t
-xfs_dbg_show(
+stats_show(
-        struct kobject          *kobject,
+        struct kobject  *kobject,
-        struct attribute        *attr,
+        char            *buf)
-        char                    *buf)
 {
-        struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+        struct xstats   *stats = to_xstats(kobject);
-        return xfs_attr->show ? xfs_attr->show(buf, NULL) : 0;
+        return xfs_stats_format(stats->xs_stats, buf);
 }
+XFS_SYSFS_ATTR_RO(stats);
 STATIC ssize_t
-xfs_dbg_store(
+stats_clear_store(
-        struct kobject          *kobject,
+        struct kobject  *kobject,
-        struct attribute        *attr,
+        const char      *buf,
-        const char              *buf,
+        size_t          count)
-        size_t                  count)
 {
-        struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
+        int             ret;
+        int             val;
+        struct xstats   *stats = to_xstats(kobject);
+        ret = kstrtoint(buf, 0, &val);
+        if (ret)
+                return ret;
-        return xfs_attr->store ? xfs_attr->store(buf, count, NULL) : 0;
+        if (val != 1)
+                return -EINVAL;
+        xfs_stats_clearall(stats->xs_stats);
+        return count;
 }
+XFS_SYSFS_ATTR_WO(stats_clear);
-static struct sysfs_ops xfs_dbg_ops = {
+static struct attribute *xfs_stats_attrs[] = {
-        .show = xfs_dbg_show,
+        ATTR_LIST(stats),
-        .store = xfs_dbg_store,
+        ATTR_LIST(stats_clear),
+        NULL,
 };
-struct kobj_type xfs_dbg_ktype = {
+struct kobj_type xfs_stats_ktype = {
        .release = xfs_sysfs_release,
-        .sysfs_ops = &xfs_dbg_ops,
+        .sysfs_ops = &xfs_sysfs_ops,
-        .default_attrs = xfs_dbg_attrs,
+        .default_attrs = xfs_stats_attrs,
 };
-#endif /* DEBUG */
 /* xlog */
+static inline struct xlog *
+to_xlog(struct kobject *kobject)
+{
+        struct xfs_kobj *kobj = to_kobj(kobject);
+        return container_of(kobj, struct xlog, l_kobj);
+}
 STATIC ssize_t
 log_head_lsn_show(
-        char    *buf,
+        struct kobject  *kobject,
-        void    *data)
+        char            *buf)
 {
-        struct xlog *log = data;
        int cycle;
        int block;
+        struct xlog *log = to_xlog(kobject);
        spin_lock(&log->l_icloglock);
        cycle = log->l_curr_cycle;
@@ -147,12 +214,12 @@ XFS_SYSFS_ATTR_RO(log_head_lsn);
 STATIC ssize_t
 log_tail_lsn_show(
-        char    *buf,
+        struct kobject  *kobject,
-        void    *data)
+        char            *buf)
 {
-        struct xlog *log = data;
        int cycle;
        int block;
+        struct xlog *log = to_xlog(kobject);
        xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block);
        return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block);
@@ -161,12 +228,13 @@ XFS_SYSFS_ATTR_RO(log_tail_lsn);
 STATIC ssize_t
 reserve_grant_head_show(
-        char    *buf,
+        struct kobject  *kobject,
-        void    *data)
+        char            *buf)
 {
-        struct xlog *log = data;
        int cycle;
        int bytes;
+        struct xlog *log = to_xlog(kobject);
        xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes);
        return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
@@ -175,12 +243,12 @@ XFS_SYSFS_ATTR_RO(reserve_grant_head);
 STATIC ssize_t
 write_grant_head_show(
-        char    *buf,
+        struct kobject  *kobject,
-        void    *data)
+        char            *buf)
 {
-        struct xlog *log = data;
        int cycle;
        int bytes;
+        struct xlog *log = to_xlog(kobject);
        xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes);
        return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
@@ -195,45 +263,8 @@ static struct attribute *xfs_log_attrs[] = {
        NULL,
 };
-static inline struct xlog *
-to_xlog(struct kobject *kobject)
-{
-        struct xfs_kobj *kobj = to_kobj(kobject);
-        return container_of(kobj, struct xlog, l_kobj);
-}
-STATIC ssize_t
-xfs_log_show(
-        struct kobject          *kobject,
-        struct attribute        *attr,
-        char                    *buf)
-{
-        struct xlog *log = to_xlog(kobject);
-        struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
-        return xfs_attr->show ? xfs_attr->show(buf, log) : 0;
-}
-STATIC ssize_t
-xfs_log_store(
-        struct kobject          *kobject,
-        struct attribute        *attr,
-        const char              *buf,
-        size_t                  count)
-{
-        struct xlog *log = to_xlog(kobject);
-        struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
-        return xfs_attr->store ? xfs_attr->store(buf, count, log) : 0;
-}
-static struct sysfs_ops xfs_log_ops = {
-        .show = xfs_log_show,
-        .store = xfs_log_store,
-};
 struct kobj_type xfs_log_ktype = {
        .release = xfs_sysfs_release,
-        .sysfs_ops = &xfs_log_ops,
+        .sysfs_ops = &xfs_sysfs_ops,
        .default_attrs = xfs_log_attrs,
 };
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
index 240eee35f342..be692e59938d 100644
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -22,6 +22,7 @@
 extern struct kobj_type xfs_mp_ktype;   /* xfs_mount */
 extern struct kobj_type xfs_dbg_ktype;  /* debug */
 extern struct kobj_type xfs_log_ktype;  /* xlog */
+extern struct kobj_type xfs_stats_ktype;        /* stats */
 static inline struct xfs_kobj *
 to_kobj(struct kobject *kobject)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 5ed36b1e04c1..877079eb0f8f 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -689,6 +689,7 @@ DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
 DEFINE_INODE_EVENT(xfs_filemap_fault);
 DEFINE_INODE_EVENT(xfs_filemap_pmd_fault);
 DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
+DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);
 DECLARE_EVENT_CLASS(xfs_iref_class,
        TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
@@ -1312,6 +1313,7 @@ DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
 DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
 DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
 DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
+DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof);
 DECLARE_EVENT_CLASS(xfs_itrunc_class,
        TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index a0ab1dae9c31..748b16aff45a 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -930,9 +930,9 @@ __xfs_trans_commit(
         */
        if (sync) {
                error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
-                XFS_STATS_INC(xs_trans_sync);
+                XFS_STATS_INC(mp, xs_trans_sync);
        } else {
-                XFS_STATS_INC(xs_trans_async);
+                XFS_STATS_INC(mp, xs_trans_async);
        }
        return error;
@@ -955,7 +955,7 @@ out_unreserve:
        xfs_trans_free_items(tp, NULLCOMMITLSN, !!error);
        xfs_trans_free(tp);
-        XFS_STATS_INC(xs_trans_empty);
+        XFS_STATS_INC(mp, xs_trans_empty);
        return error;
 }
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 1098cf490189..aa67339b9537 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -349,7 +349,7 @@ xfsaild_push(
             xfs_ail_min_lsn(ailp))) {
                ailp->xa_log_flush = 0;
-                XFS_STATS_INC(xs_push_ail_flush);
+                XFS_STATS_INC(mp, xs_push_ail_flush);
                xfs_log_force(mp, XFS_LOG_SYNC);
        }
@@ -371,7 +371,7 @@ xfsaild_push(
                goto out_done;
        }
-        XFS_STATS_INC(xs_push_ail);
+        XFS_STATS_INC(mp, xs_push_ail);
        lsn = lip->li_lsn;
        while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
@@ -385,7 +385,7 @@ xfsaild_push(
                lock_result = lip->li_ops->iop_push(lip, &ailp->xa_buf_list);
                switch (lock_result) {
                case XFS_ITEM_SUCCESS:
-                        XFS_STATS_INC(xs_push_ail_success);
+                        XFS_STATS_INC(mp, xs_push_ail_success);
                        trace_xfs_ail_push(lip);
                        ailp->xa_last_pushed_lsn = lsn;
@@ -403,7 +403,7 @@ xfsaild_push(
                         * re-try the flushing relatively soon if most of the
                         * AIL is beeing flushed.
                         */
-                        XFS_STATS_INC(xs_push_ail_flushing);
+                        XFS_STATS_INC(mp, xs_push_ail_flushing);
                        trace_xfs_ail_flushing(lip);
                        flushing++;
@@ -411,14 +411,14 @@ xfsaild_push(
                        break;
                case XFS_ITEM_PINNED:
-                        XFS_STATS_INC(xs_push_ail_pinned);
+                        XFS_STATS_INC(mp, xs_push_ail_pinned);
                        trace_xfs_ail_pinned(lip);
                        stuck++;
                        ailp->xa_log_flush++;
                        break;
                case XFS_ITEM_LOCKED:
-                        XFS_STATS_INC(xs_push_ail_locked);
+                        XFS_STATS_INC(mp, xs_push_ail_locked);
                        trace_xfs_ail_locked(lip);
                        stuck++;
@@ -497,6 +497,7 @@ xfsaild(
        long            tout = 0;       /* milliseconds */
        current->flags |= PF_MEMALLOC;
+        set_freezable();
        while (!kthread_should_stop()) {
                if (tout && tout <= 20)
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 17280cd71934..b97f1df910ab 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -108,6 +108,15 @@ xfs_trans_log_inode(
        ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
        /*
+         * Record the specific change for fdatasync optimisation. This
+         * allows fdatasync to skip log forces for inodes that are only
+         * timestamp dirty. We do this before the change count so that
+         * the core being logged in this case does not impact on fdatasync
+         * behaviour.
+         */
+        ip->i_itemp->ili_fsync_fields |= flags;
+        /*
         * First time we log the inode in a transaction, bump the inode change
         * counter if it is configured for this to occur. We don't use
         * inode_inc_version() because there is no need for extra locking around
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index c036815183cb..8294f86441bf 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -53,11 +53,34 @@ xfs_xattr_get(struct dentry *dentry, const char *name,
        return asize;
 }
+void
+xfs_forget_acl(
+        struct inode            *inode,
+        const char              *name,
+        int                     xflags)
+{
+        /*
+         * Invalidate any cached ACLs if the user has bypassed the ACL
+         * interface. We don't validate the content whatsoever so it is caller
+         * responsibility to provide data in valid format and ensure i_mode is
+         * consistent.
+         */
+        if (xflags & ATTR_ROOT) {
+#ifdef CONFIG_XFS_POSIX_ACL
+                if (!strcmp(name, SGI_ACL_FILE))
+                        forget_cached_acl(inode, ACL_TYPE_ACCESS);
+                else if (!strcmp(name, SGI_ACL_DEFAULT))
+                        forget_cached_acl(inode, ACL_TYPE_DEFAULT);
+#endif
+        }
+}
 static int
 xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
                size_t size, int flags, int xflags)
 {
-        struct xfs_inode *ip = XFS_I(d_inode(dentry));
+        struct xfs_inode        *ip = XFS_I(d_inode(dentry));
+        int                     error;
        if (strcmp(name, "") == 0)
                return -EINVAL;
@@ -70,8 +93,12 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
        if (!value)
                return xfs_attr_remove(ip, (unsigned char *)name, xflags);
-        return xfs_attr_set(ip, (unsigned char *)name,
+        error = xfs_attr_set(ip, (unsigned char *)name,
                                (void *)value, size, xflags);
+        if (!error)
+                xfs_forget_acl(d_inode(dentry), name, xflags);
+        return error;
 }
 static const struct xattr_handler xfs_xattr_user_handler = {
author	Linus Torvalds <torvalds@linux-foundation.org>	2015-11-11 23:18:48 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-11-11 23:18:48 -0500
commit	5d50ac70fe98518dbf620bfba8184254663125eb (patch)
tree	bc32329b72516ece58142444bbfd520d27a7f6ca /fs
parent	31c1febd7a45229edb3e5d86f354e3c1df543cbb (diff)
parent	4e14e49a91e18098fd8ef30743972e0c3cb727c1 (diff)