26 files changed, 890 insertions, 1489 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index e8696f5a8041..aeffeaaac0ec 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -244,30 +244,6 @@ xfs_bmap_forkoff_reset(
        }
 }
-/*
- * Debug/sanity checking code
- */
-STATIC int
-xfs_bmap_sanity_check(
-        struct xfs_mount        *mp,
-        struct xfs_buf          *bp,
-        int                     level)
-{
-        struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
-        if (block->bb_magic != cpu_to_be32(XFS_BMAP_CRC_MAGIC) &&
-            block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC))
-                return 0;
-        if (be16_to_cpu(block->bb_level) != level ||
-            be16_to_cpu(block->bb_numrecs) == 0 ||
-            be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
-                return 0;
-        return 1;
-}
 #ifdef DEBUG
 STATIC struct xfs_buf *
 xfs_bmap_get_bp(
@@ -410,9 +386,6 @@ xfs_bmap_check_leaf_extents(
                                goto error_norelse;
                }
                block = XFS_BUF_TO_BLOCK(bp);
-                XFS_WANT_CORRUPTED_GOTO(mp,
-                        xfs_bmap_sanity_check(mp, bp, level),
-                        error0);
                if (level == 0)
                        break;
@@ -1312,8 +1285,6 @@ xfs_bmap_read_extents(
                if (error)
                        return error;
                block = XFS_BUF_TO_BLOCK(bp);
-                XFS_WANT_CORRUPTED_GOTO(mp,
-                        xfs_bmap_sanity_check(mp, bp, level), error0);
                if (level == 0)
                        break;
                pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
@@ -1346,9 +1317,6 @@ xfs_bmap_read_extents(
                                XFS_ERRLEVEL_LOW, ip->i_mount, block);
                        goto error0;
                }
-                XFS_WANT_CORRUPTED_GOTO(mp,
-                        xfs_bmap_sanity_check(mp, bp, 0),
-                        error0);
                /*
                 * Read-ahead the next leaf block, if any.
                 */
@@ -2215,9 +2183,8 @@ xfs_bmap_add_extent_delay_real(
                diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
                        (bma->cur ? bma->cur->bc_private.b.allocated : 0));
                if (diff > 0) {
-                        error = xfs_icsb_modify_counters(bma->ip->i_mount,
+                        error = xfs_mod_fdblocks(bma->ip->i_mount,
-                                        XFS_SBS_FDBLOCKS,
+                                                 -((int64_t)diff), false);
-                                        -((int64_t)diff), 0);
                        ASSERT(!error);
                        if (error)
                                goto done;
@@ -2268,9 +2235,8 @@ xfs_bmap_add_extent_delay_real(
                        temp += bma->cur->bc_private.b.allocated;
                ASSERT(temp <= da_old);
                if (temp < da_old)
-                        xfs_icsb_modify_counters(bma->ip->i_mount,
+                        xfs_mod_fdblocks(bma->ip->i_mount,
-                                        XFS_SBS_FDBLOCKS,
+                                        (int64_t)(da_old - temp), false);
-                                        (int64_t)(da_old - temp), 0);
        }
        /* clear out the allocated field, done with it now in any case. */
@@ -2948,8 +2914,8 @@ xfs_bmap_add_extent_hole_delay(
        }
        if (oldlen != newlen) {
                ASSERT(oldlen > newlen);
-                xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
+                xfs_mod_fdblocks(ip->i_mount, (int64_t)(oldlen - newlen),
-                        (int64_t)(oldlen - newlen), 0);
+                                 false);
                /*
                 * Nothing to do for disk quota accounting here.
                 */
@@ -4166,18 +4132,15 @@ xfs_bmapi_reserve_delalloc(
        ASSERT(indlen > 0);
        if (rt) {
-                error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
+                error = xfs_mod_frextents(mp, -((int64_t)extsz));
-                                          -((int64_t)extsz), 0);
        } else {
-                error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                error = xfs_mod_fdblocks(mp, -((int64_t)alen), false);
-                                                 -((int64_t)alen), 0);
        }
        if (error)
                goto out_unreserve_quota;
-        error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+        error = xfs_mod_fdblocks(mp, -((int64_t)indlen), false);
-                                         -((int64_t)indlen), 0);
        if (error)
                goto out_unreserve_blocks;
@@ -4204,9 +4167,9 @@ xfs_bmapi_reserve_delalloc(
 out_unreserve_blocks:
        if (rt)
-                xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0);
+                xfs_mod_frextents(mp, extsz);
        else
-                xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0);
+                xfs_mod_fdblocks(mp, alen, false);
 out_unreserve_quota:
        if (XFS_IS_QUOTA_ON(mp))
                xfs_trans_unreserve_quota_nblks(NULL, ip, (long)alen, 0, rt ?
@@ -5019,10 +4982,8 @@ xfs_bmap_del_extent(
         * Nothing to do for disk quota accounting here.
         */
        ASSERT(da_old >= da_new);
-        if (da_old > da_new) {
+        if (da_old > da_new)
-                xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                xfs_mod_fdblocks(mp, (int64_t)(da_old - da_new), false);
-                        (int64_t)(da_old - da_new), 0);
-        }
 done:
        *logflagsp = flags;
        return error;
@@ -5291,14 +5252,13 @@ xfs_bunmapi(
                                rtexts = XFS_FSB_TO_B(mp, del.br_blockcount);
                                do_div(rtexts, mp->m_sb.sb_rextsize);
-                                xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
+                                xfs_mod_frextents(mp, (int64_t)rtexts);
-                                                (int64_t)rtexts, 0);
                                (void)xfs_trans_reserve_quota_nblks(NULL,
                                        ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_RTBLKS);
                        } else {
-                                xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                                xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
-                                                (int64_t)del.br_blockcount, 0);
+                                                 false);
                                (void)xfs_trans_reserve_quota_nblks(NULL,
                                        ip, -((long)del.br_blockcount), 0,
                                        XFS_QMOPT_RES_REGBLKS);
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 9cb0115c6bd1..2385f8cd08ab 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -538,12 +538,12 @@ xfs_da3_root_split(
        oldroot = blk1->bp->b_addr;
        if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
            oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
-                struct xfs_da3_icnode_hdr nodehdr;
+                struct xfs_da3_icnode_hdr icnodehdr;
-                dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot);
+                dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot);
                btree = dp->d_ops->node_tree_p(oldroot);
-                size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot);
+                size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
-                level = nodehdr.level;
+                level = icnodehdr.level;
                /*
                 * we are about to copy oldroot to bp, so set up the type
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 8eb718979383..4daaa662337b 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -264,68 +264,6 @@ typedef struct xfs_dsb {
        /* must be padded to 64 bit alignment */
 } xfs_dsb_t;
-/*
- * Sequence number values for the fields.
- */
-typedef enum {
-        XFS_SBS_MAGICNUM, XFS_SBS_BLOCKSIZE, XFS_SBS_DBLOCKS, XFS_SBS_RBLOCKS,
-        XFS_SBS_REXTENTS, XFS_SBS_UUID, XFS_SBS_LOGSTART, XFS_SBS_ROOTINO,
-        XFS_SBS_RBMINO, XFS_SBS_RSUMINO, XFS_SBS_REXTSIZE, XFS_SBS_AGBLOCKS,
-        XFS_SBS_AGCOUNT, XFS_SBS_RBMBLOCKS, XFS_SBS_LOGBLOCKS,
-        XFS_SBS_VERSIONNUM, XFS_SBS_SECTSIZE, XFS_SBS_INODESIZE,
-        XFS_SBS_INOPBLOCK, XFS_SBS_FNAME, XFS_SBS_BLOCKLOG,
-        XFS_SBS_SECTLOG, XFS_SBS_INODELOG, XFS_SBS_INOPBLOG, XFS_SBS_AGBLKLOG,
-        XFS_SBS_REXTSLOG, XFS_SBS_INPROGRESS, XFS_SBS_IMAX_PCT, XFS_SBS_ICOUNT,
-        XFS_SBS_IFREE, XFS_SBS_FDBLOCKS, XFS_SBS_FREXTENTS, XFS_SBS_UQUOTINO,
-        XFS_SBS_GQUOTINO, XFS_SBS_QFLAGS, XFS_SBS_FLAGS, XFS_SBS_SHARED_VN,
-        XFS_SBS_INOALIGNMT, XFS_SBS_UNIT, XFS_SBS_WIDTH, XFS_SBS_DIRBLKLOG,
-        XFS_SBS_LOGSECTLOG, XFS_SBS_LOGSECTSIZE, XFS_SBS_LOGSUNIT,
-        XFS_SBS_FEATURES2, XFS_SBS_BAD_FEATURES2, XFS_SBS_FEATURES_COMPAT,
-        XFS_SBS_FEATURES_RO_COMPAT, XFS_SBS_FEATURES_INCOMPAT,
-        XFS_SBS_FEATURES_LOG_INCOMPAT, XFS_SBS_CRC, XFS_SBS_PAD,
-        XFS_SBS_PQUOTINO, XFS_SBS_LSN,
-        XFS_SBS_FIELDCOUNT
-} xfs_sb_field_t;
-/*
- * Mask values, defined based on the xfs_sb_field_t values.
- * Only define the ones we're using.
- */
-#define XFS_SB_MVAL(x)          (1LL << XFS_SBS_ ## x)
-#define XFS_SB_UUID             XFS_SB_MVAL(UUID)
-#define XFS_SB_FNAME            XFS_SB_MVAL(FNAME)
-#define XFS_SB_ROOTINO          XFS_SB_MVAL(ROOTINO)
-#define XFS_SB_RBMINO           XFS_SB_MVAL(RBMINO)
-#define XFS_SB_RSUMINO          XFS_SB_MVAL(RSUMINO)
-#define XFS_SB_VERSIONNUM       XFS_SB_MVAL(VERSIONNUM)
-#define XFS_SB_UQUOTINO         XFS_SB_MVAL(UQUOTINO)
-#define XFS_SB_GQUOTINO         XFS_SB_MVAL(GQUOTINO)
-#define XFS_SB_QFLAGS           XFS_SB_MVAL(QFLAGS)
-#define XFS_SB_SHARED_VN        XFS_SB_MVAL(SHARED_VN)
-#define XFS_SB_UNIT             XFS_SB_MVAL(UNIT)
-#define XFS_SB_WIDTH            XFS_SB_MVAL(WIDTH)
-#define XFS_SB_ICOUNT           XFS_SB_MVAL(ICOUNT)
-#define XFS_SB_IFREE            XFS_SB_MVAL(IFREE)
-#define XFS_SB_FDBLOCKS         XFS_SB_MVAL(FDBLOCKS)
-#define XFS_SB_FEATURES2        (XFS_SB_MVAL(FEATURES2) | \
-                                 XFS_SB_MVAL(BAD_FEATURES2))
-#define XFS_SB_FEATURES_COMPAT  XFS_SB_MVAL(FEATURES_COMPAT)
-#define XFS_SB_FEATURES_RO_COMPAT XFS_SB_MVAL(FEATURES_RO_COMPAT)
-#define XFS_SB_FEATURES_INCOMPAT XFS_SB_MVAL(FEATURES_INCOMPAT)
-#define XFS_SB_FEATURES_LOG_INCOMPAT XFS_SB_MVAL(FEATURES_LOG_INCOMPAT)
-#define XFS_SB_CRC              XFS_SB_MVAL(CRC)
-#define XFS_SB_PQUOTINO         XFS_SB_MVAL(PQUOTINO)
-#define XFS_SB_NUM_BITS         ((int)XFS_SBS_FIELDCOUNT)
-#define XFS_SB_ALL_BITS         ((1LL << XFS_SB_NUM_BITS) - 1)
-#define XFS_SB_MOD_BITS         \
-        (XFS_SB_UUID | XFS_SB_ROOTINO | XFS_SB_RBMINO | XFS_SB_RSUMINO | \
-         XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO | XFS_SB_GQUOTINO | \
-         XFS_SB_QFLAGS | XFS_SB_SHARED_VN | XFS_SB_UNIT | XFS_SB_WIDTH | \
-         XFS_SB_ICOUNT | XFS_SB_IFREE | XFS_SB_FDBLOCKS | XFS_SB_FEATURES2 | \
-         XFS_SB_FEATURES_COMPAT | XFS_SB_FEATURES_RO_COMPAT | \
-         XFS_SB_FEATURES_INCOMPAT | XFS_SB_FEATURES_LOG_INCOMPAT | \
-         XFS_SB_PQUOTINO)
 /*
 * Misc. Flags - warning - these will be cleared by xfs_repair unless
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index db0444893e96..07349a183a11 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -376,7 +376,8 @@ xfs_ialloc_ag_alloc(
         */
        newlen = args.mp->m_ialloc_inos;
        if (args.mp->m_maxicount &&
-            args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)
+            percpu_counter_read(&args.mp->m_icount) + newlen >
+                                                        args.mp->m_maxicount)
                return -ENOSPC;
        args.minlen = args.maxlen = args.mp->m_ialloc_blks;
        /*
@@ -1340,7 +1341,8 @@ xfs_dialloc(
         * inode.
         */
        if (mp->m_maxicount &&
-            mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) {
+            percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos >
+                                                        mp->m_maxicount) {
                noroom = 1;
                okalloc = 0;
        }
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index f3ea02bf893e..dc4bfc5d88fc 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -735,17 +735,15 @@ xfs_initialize_perag_data(
                btree += pag->pagf_btreeblks;
                xfs_perag_put(pag);
        }
-        /*
-         * Overwrite incore superblock counters with just-read data
+        /* Overwrite incore superblock counters with just-read data */
-         */
        spin_lock(&mp->m_sb_lock);
        sbp->sb_ifree = ifree;
        sbp->sb_icount = ialloc;
        sbp->sb_fdblocks = bfree + bfreelst + btree;
        spin_unlock(&mp->m_sb_lock);
-        /* Fixup the per-cpu counters as well. */
+        xfs_reinit_percpu_counters(mp);
-        xfs_icsb_reinit_counters(mp);
        return 0;
 }
@@ -763,6 +761,10 @@ xfs_log_sb(
        struct xfs_mount        *mp = tp->t_mountp;
        struct xfs_buf          *bp = xfs_trans_getsb(tp, mp, 0);
+        mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
+        mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
+        mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
        xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
        xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
        xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb));
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index e86757358d5b..a52bbd3abc7d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1660,13 +1660,6 @@ xfs_swap_extent_flush(
        /* Verify O_DIRECT for ftmp */
        if (VFS_I(ip)->i_mapping->nrpages)
                return -EINVAL;
-        /*
-         * Don't try to swap extents on mmap()d files because we can't lock
-         * out races against page faults safely.
-         */
-        if (mapping_mapped(VFS_I(ip)->i_mapping))
-                return -EBUSY;
        return 0;
 }
@@ -1694,13 +1687,14 @@ xfs_swap_extents(
        }
        /*
-         * Lock up the inodes against other IO and truncate to begin with.
+         * Lock the inodes against other IO, page faults and truncate to
-         * Then we can ensure the inodes are flushed and have no page cache
+         * begin with.  Then we can ensure the inodes are flushed and have no
-         * safely. Once we have done this we can take the ilocks and do the rest
+         * page cache safely. Once we have done this we can take the ilocks and
-         * of the checks.
+         * do the rest of the checks.
         */
-        lock_flags = XFS_IOLOCK_EXCL;
+        lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
        xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
+        xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
        /* Verify that both files have the same format */
        if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
@@ -1727,8 +1721,16 @@ xfs_swap_extents(
                xfs_trans_cancel(tp, 0);
                goto out_unlock;
        }
+        /*
+         * Lock and join the inodes to the tansaction so that transaction commit
+         * or cancel will unlock the inodes from this point onwards.
+         */
        xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
        lock_flags |= XFS_ILOCK_EXCL;
+        xfs_trans_ijoin(tp, ip, lock_flags);
+        xfs_trans_ijoin(tp, tip, lock_flags);
        /* Verify all data are being swapped */
        if (sxp->sx_offset != 0 ||
@@ -1781,9 +1783,6 @@ xfs_swap_extents(
                        goto out_trans_cancel;
        }
-        xfs_trans_ijoin(tp, ip, lock_flags);
-        xfs_trans_ijoin(tp, tip, lock_flags);
        /*
         * Before we've swapped the forks, lets set the owners of the forks
         * appropriately. We have to do this as we are demand paging the btree
@@ -1917,5 +1916,5 @@ out_unlock:
 out_trans_cancel:
        xfs_trans_cancel(tp, 0);
-        goto out_unlock;
+        goto out;
 }
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 3ee186ac1093..338e50bbfd1e 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -131,7 +131,7 @@ xfs_error_report(
 {
        if (level <= xfs_error_level) {
                xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT,
-                "Internal error %s at line %d of file %s.  Caller %pF",
+                "Internal error %s at line %d of file %s.  Caller %pS",
                            tag, linenum, filename, ra);
                xfs_stack_trace();
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f63aeddd31d5..c203839cd5be 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -397,7 +397,8 @@ STATIC int				/* error (positive) */
 xfs_zero_last_block(
        struct xfs_inode        *ip,
        xfs_fsize_t             offset,
-        xfs_fsize_t             isize)
+        xfs_fsize_t             isize,
+        bool                    *did_zeroing)
 {
        struct xfs_mount        *mp = ip->i_mount;
        xfs_fileoff_t           last_fsb = XFS_B_TO_FSBT(mp, isize);
@@ -425,6 +426,7 @@ xfs_zero_last_block(
        zero_len = mp->m_sb.sb_blocksize - zero_offset;
        if (isize + zero_len > offset)
                zero_len = offset - isize;
+        *did_zeroing = true;
        return xfs_iozero(ip, isize, zero_len);
 }
@@ -443,7 +445,8 @@ int					/* error (positive) */
 xfs_zero_eof(
        struct xfs_inode        *ip,
        xfs_off_t               offset,         /* starting I/O offset */
-        xfs_fsize_t             isize)          /* current inode size */
+        xfs_fsize_t             isize,          /* current inode size */
+        bool                    *did_zeroing)
 {
        struct xfs_mount        *mp = ip->i_mount;
        xfs_fileoff_t           start_zero_fsb;
@@ -465,7 +468,7 @@ xfs_zero_eof(
         * We only zero a part of that block so it is handled specially.
         */
        if (XFS_B_FSB_OFFSET(mp, isize) != 0) {
-                error = xfs_zero_last_block(ip, offset, isize);
+                error = xfs_zero_last_block(ip, offset, isize, did_zeroing);
                if (error)
                        return error;
        }
@@ -525,6 +528,7 @@ xfs_zero_eof(
                if (error)
                        return error;
+                *did_zeroing = true;
                start_zero_fsb = imap.br_startoff + imap.br_blockcount;
                ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
        }
@@ -567,13 +571,15 @@ restart:
         * having to redo all checks before.
         */
        if (*pos > i_size_read(inode)) {
+                bool    zero = false;
                if (*iolock == XFS_IOLOCK_SHARED) {
                        xfs_rw_iunlock(ip, *iolock);
                        *iolock = XFS_IOLOCK_EXCL;
                        xfs_rw_ilock(ip, *iolock);
                        goto restart;
                }
-                error = xfs_zero_eof(ip, *pos, i_size_read(inode));
+                error = xfs_zero_eof(ip, *pos, i_size_read(inode), &zero);
                if (error)
                        return error;
        }
@@ -846,6 +852,9 @@ xfs_file_fallocate(
        if (error)
                goto out_unlock;
+        xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+        iolock |= XFS_MMAPLOCK_EXCL;
        if (mode & FALLOC_FL_PUNCH_HOLE) {
                error = xfs_free_file_space(ip, offset, len);
                if (error)
@@ -1028,20 +1037,6 @@ xfs_file_mmap(
 }
 /*
- * mmap()d file has taken write protection fault and is being made
- * writable. We can set the page state up correctly for a writable
- * page, which means we can do correct delalloc accounting (ENOSPC
- * checking!) and unwritten extent mapping.
- */
-STATIC int
-xfs_vm_page_mkwrite(
-        struct vm_area_struct   *vma,
-        struct vm_fault         *vmf)
-{
-        return block_page_mkwrite(vma, vmf, xfs_get_blocks);
-}
-/*
 * This type is designed to indicate the type of offset we would like
 * to search from page cache for xfs_seek_hole_data().
 */
@@ -1416,6 +1411,55 @@ xfs_file_llseek(
        }
 }
+/*
+ * Locking for serialisation of IO during page faults. This results in a lock
+ * ordering of:
+ *
+ * mmap_sem (MM)
+ *   i_mmap_lock (XFS - truncate serialisation)
+ *     page_lock (MM)
+ *       i_lock (XFS - extent map serialisation)
+ */
+STATIC int
+xfs_filemap_fault(
+        struct vm_area_struct   *vma,
+        struct vm_fault         *vmf)
+{
+        struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
+        int                     error;
+        trace_xfs_filemap_fault(ip);
+        xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+        error = filemap_fault(vma, vmf);
+        xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+        return error;
+}
+/*
+ * mmap()d file has taken write protection fault and is being made writable. We
+ * can set the page state up correctly for a writable page, which means we can
+ * do correct delalloc accounting (ENOSPC checking!) and unwritten extent
+ * mapping.
+ */
+STATIC int
+xfs_filemap_page_mkwrite(
+        struct vm_area_struct   *vma,
+        struct vm_fault         *vmf)
+{
+        struct xfs_inode        *ip = XFS_I(vma->vm_file->f_mapping->host);
+        int                     error;
+        trace_xfs_filemap_page_mkwrite(ip);
+        xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+        error = block_page_mkwrite(vma, vmf, xfs_get_blocks);
+        xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+        return error;
+}
 const struct file_operations xfs_file_operations = {
        .llseek         = xfs_file_llseek,
        .read           = new_sync_read,
@@ -1448,7 +1492,7 @@ const struct file_operations xfs_dir_file_operations = {
 };
 static const struct vm_operations_struct xfs_file_vm_ops = {
-        .fault          = filemap_fault,
+        .fault          = xfs_filemap_fault,
        .map_pages      = filemap_map_pages,
-        .page_mkwrite   = xfs_vm_page_mkwrite,
+        .page_mkwrite   = xfs_filemap_page_mkwrite,
 };
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index a2e86e8a0fea..8f9f854376c6 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -322,7 +322,7 @@ xfs_filestream_lookup_ag(
        pip = xfs_filestream_get_parent(ip);
        if (!pip)
-                goto out;
+                return NULLAGNUMBER;
        mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino);
        if (mru) {
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 74efe5b760dc..cb7e8a29dfb6 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -637,12 +637,13 @@ xfs_fs_counts(
        xfs_mount_t             *mp,
        xfs_fsop_counts_t       *cnt)
 {
-        xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+        cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
+        cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
+        cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
+                                                        XFS_ALLOC_SET_ASIDE(mp);
        spin_lock(&mp->m_sb_lock);
-        cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
        cnt->freertx = mp->m_sb.sb_frextents;
-        cnt->freeino = mp->m_sb.sb_ifree;
-        cnt->allocino = mp->m_sb.sb_icount;
        spin_unlock(&mp->m_sb_lock);
        return 0;
 }
@@ -692,14 +693,9 @@ xfs_reserve_blocks(
         * what to do. This means that the amount of free space can
         * change while we do this, so we need to retry if we end up
         * trying to reserve more space than is available.
-         *
-         * We also use the xfs_mod_incore_sb() interface so that we
-         * don't have to care about whether per cpu counter are
-         * enabled, disabled or even compiled in....
         */
 retry:
        spin_lock(&mp->m_sb_lock);
-        xfs_icsb_sync_counters_locked(mp, 0);
        /*
         * If our previous reservation was larger than the current value,
@@ -716,7 +712,8 @@ retry:
        } else {
                __int64_t       free;
-                free =  mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+                free = percpu_counter_sum(&mp->m_fdblocks) -
+                                                        XFS_ALLOC_SET_ASIDE(mp);
                if (!free)
                        goto out; /* ENOSPC and fdblks_delta = 0 */
@@ -755,8 +752,7 @@ out:
                 * the extra reserve blocks from the reserve.....
                 */
                int error;
-                error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
-                                                 fdblks_delta, 0);
                if (error == -ENOSPC)
                        goto retry;
        }
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index d0414f305967..d6ebc85192b7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -117,24 +117,34 @@ xfs_ilock_attr_map_shared(
 }
 /*
- * The xfs inode contains 2 locks: a multi-reader lock called the
+ * The xfs inode contains 3 multi-reader locks: the i_iolock the i_mmap_lock and
- * i_iolock and a multi-reader lock called the i_lock.  This routine
+ * the i_lock.  This routine allows various combinations of the locks to be
- * allows either or both of the locks to be obtained.
+ * obtained.
 *
- * The 2 locks should always be ordered so that the IO lock is
+ * The 3 locks should always be ordered so that the IO lock is obtained first,
- * obtained first in order to prevent deadlock.
+ * the mmap lock second and the ilock last in order to prevent deadlock.
 *
- * ip -- the inode being locked
+ * Basic locking order:
- * lock_flags -- this parameter indicates the inode's locks
+ *
- *       to be locked.  It can be:
+ * i_iolock -> i_mmap_lock -> page_lock -> i_ilock
- *              XFS_IOLOCK_SHARED,
+ *
- *              XFS_IOLOCK_EXCL,
+ * mmap_sem locking order:
- *              XFS_ILOCK_SHARED,
+ *
- *              XFS_ILOCK_EXCL,
+ * i_iolock -> page lock -> mmap_sem
- *              XFS_IOLOCK_SHARED | XFS_ILOCK_SHARED,
+ * mmap_sem -> i_mmap_lock -> page_lock
- *              XFS_IOLOCK_SHARED | XFS_ILOCK_EXCL,
+ *
- *              XFS_IOLOCK_EXCL | XFS_ILOCK_SHARED,
+ * The difference in mmap_sem locking order mean that we cannot hold the
- *              XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
+ * i_mmap_lock over syscall based read(2)/write(2) based IO. These IO paths can
+ * fault in pages during copy in/out (for buffered IO) or require the mmap_sem
+ * in get_user_pages() to map the user pages into the kernel address space for
+ * direct IO. Similarly the i_iolock cannot be taken inside a page fault because
+ * page faults already hold the mmap_sem.
+ *
+ * Hence to serialise fully against both syscall and mmap based IO, we need to
+ * take both the i_iolock and the i_mmap_lock. These locks should *only* be both
+ * taken in places where we need to invalidate the page cache in a race
+ * free manner (e.g. truncate, hole punch and other extent manipulation
+ * functions).
 */
 void
 xfs_ilock(
@@ -150,6 +160,8 @@ xfs_ilock(
         */
        ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
               (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+        ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+               (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -159,6 +171,11 @@ xfs_ilock(
        else if (lock_flags & XFS_IOLOCK_SHARED)
                mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
+        if (lock_flags & XFS_MMAPLOCK_EXCL)
+                mrupdate_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
+        else if (lock_flags & XFS_MMAPLOCK_SHARED)
+                mraccess_nested(&ip->i_mmaplock, XFS_MMAPLOCK_DEP(lock_flags));
        if (lock_flags & XFS_ILOCK_EXCL)
                mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
        else if (lock_flags & XFS_ILOCK_SHARED)
@@ -191,6 +208,8 @@ xfs_ilock_nowait(
         */
        ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
               (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+        ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+               (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -202,21 +221,35 @@ xfs_ilock_nowait(
                if (!mrtryaccess(&ip->i_iolock))
                        goto out;
        }
+        if (lock_flags & XFS_MMAPLOCK_EXCL) {
+                if (!mrtryupdate(&ip->i_mmaplock))
+                        goto out_undo_iolock;
+        } else if (lock_flags & XFS_MMAPLOCK_SHARED) {
+                if (!mrtryaccess(&ip->i_mmaplock))
+                        goto out_undo_iolock;
+        }
        if (lock_flags & XFS_ILOCK_EXCL) {
                if (!mrtryupdate(&ip->i_lock))
-                        goto out_undo_iolock;
+                        goto out_undo_mmaplock;
        } else if (lock_flags & XFS_ILOCK_SHARED) {
                if (!mrtryaccess(&ip->i_lock))
-                        goto out_undo_iolock;
+                        goto out_undo_mmaplock;
        }
        return 1;
- out_undo_iolock:
+out_undo_mmaplock:
+        if (lock_flags & XFS_MMAPLOCK_EXCL)
+                mrunlock_excl(&ip->i_mmaplock);
+        else if (lock_flags & XFS_MMAPLOCK_SHARED)
+                mrunlock_shared(&ip->i_mmaplock);
+out_undo_iolock:
        if (lock_flags & XFS_IOLOCK_EXCL)
                mrunlock_excl(&ip->i_iolock);
        else if (lock_flags & XFS_IOLOCK_SHARED)
                mrunlock_shared(&ip->i_iolock);
- out:
+out:
        return 0;
 }
@@ -244,6 +277,8 @@ xfs_iunlock(
         */
        ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
               (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+        ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+               (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
        ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
               (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
        ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
@@ -254,6 +289,11 @@ xfs_iunlock(
        else if (lock_flags & XFS_IOLOCK_SHARED)
                mrunlock_shared(&ip->i_iolock);
+        if (lock_flags & XFS_MMAPLOCK_EXCL)
+                mrunlock_excl(&ip->i_mmaplock);
+        else if (lock_flags & XFS_MMAPLOCK_SHARED)
+                mrunlock_shared(&ip->i_mmaplock);
        if (lock_flags & XFS_ILOCK_EXCL)
                mrunlock_excl(&ip->i_lock);
        else if (lock_flags & XFS_ILOCK_SHARED)
@@ -271,11 +311,14 @@ xfs_ilock_demote(
        xfs_inode_t             *ip,
        uint                    lock_flags)
 {
-        ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
+        ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL));
-        ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
+        ASSERT((lock_flags &
+                ~(XFS_IOLOCK_EXCL|XFS_MMAPLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
        if (lock_flags & XFS_ILOCK_EXCL)
                mrdemote(&ip->i_lock);
+        if (lock_flags & XFS_MMAPLOCK_EXCL)
+                mrdemote(&ip->i_mmaplock);
        if (lock_flags & XFS_IOLOCK_EXCL)
                mrdemote(&ip->i_iolock);
@@ -294,6 +337,12 @@ xfs_isilocked(
                return rwsem_is_locked(&ip->i_lock.mr_lock);
        }
+        if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
+                if (!(lock_flags & XFS_MMAPLOCK_SHARED))
+                        return !!ip->i_mmaplock.mr_writer;
+                return rwsem_is_locked(&ip->i_mmaplock.mr_lock);
+        }
        if (lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) {
                if (!(lock_flags & XFS_IOLOCK_SHARED))
                        return !!ip->i_iolock.mr_writer;
@@ -314,14 +363,27 @@ int xfs_lock_delays;
 #endif
 /*
- * Bump the subclass so xfs_lock_inodes() acquires each lock with
+ * Bump the subclass so xfs_lock_inodes() acquires each lock with a different
- * a different value
+ * value. This shouldn't be called for page fault locking, but we also need to
+ * ensure we don't overrun the number of lockdep subclasses for the iolock or
+ * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
 */
 static inline int
 xfs_lock_inumorder(int lock_mode, int subclass)
 {
-        if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
+        if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
+                ASSERT(subclass + XFS_LOCK_INUMORDER <
+                        (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
                lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
+        }
+        if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
+                ASSERT(subclass + XFS_LOCK_INUMORDER <
+                        (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
+                lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
+                                                        XFS_MMAPLOCK_SHIFT;
+        }
        if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
                lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
@@ -329,15 +391,14 @@ xfs_lock_inumorder(int lock_mode, int subclass)
 }
 /*
- * The following routine will lock n inodes in exclusive mode.
+ * The following routine will lock n inodes in exclusive mode.  We assume the
- * We assume the caller calls us with the inodes in i_ino order.
+ * caller calls us with the inodes in i_ino order.
 *
- * We need to detect deadlock where an inode that we lock
+ * We need to detect deadlock where an inode that we lock is in the AIL and we
- * is in the AIL and we start waiting for another inode that is locked
+ * start waiting for another inode that is locked by a thread in a long running
- * by a thread in a long running transaction (such as truncate). This can
+ * transaction (such as truncate). This can result in deadlock since the long
- * result in deadlock since the long running trans might need to wait
+ * running trans might need to wait for the inode we just locked in order to
- * for the inode we just locked in order to push the tail and free space
+ * push the tail and free space in the log.
- * in the log.
 */
 void
 xfs_lock_inodes(
@@ -348,30 +409,27 @@ xfs_lock_inodes(
        int             attempts = 0, i, j, try_lock;
        xfs_log_item_t  *lp;
-        ASSERT(ips && (inodes >= 2)); /* we need at least two */
+        /* currently supports between 2 and 5 inodes */
+        ASSERT(ips && inodes >= 2 && inodes <= 5);
        try_lock = 0;
        i = 0;
 again:
        for (; i < inodes; i++) {
                ASSERT(ips[i]);
-                if (i && (ips[i] == ips[i-1]))  /* Already locked */
+                if (i && (ips[i] == ips[i - 1]))        /* Already locked */
                        continue;
                /*
-                 * If try_lock is not set yet, make sure all locked inodes
+                 * If try_lock is not set yet, make sure all locked inodes are
-                 * are not in the AIL.
+                 * not in the AIL.  If any are, set try_lock to be used later.
-                 * If any are, set try_lock to be used later.
                 */
                if (!try_lock) {
                        for (j = (i - 1); j >= 0 && !try_lock; j--) {
                                lp = (xfs_log_item_t *)ips[j]->i_itemp;
-                                if (lp && (lp->li_flags & XFS_LI_IN_AIL)) {
+                                if (lp && (lp->li_flags & XFS_LI_IN_AIL))
                                        try_lock++;
-                                }
                        }
                }
@@ -381,51 +439,42 @@ again:
                 * we can't get any, we must release all we have
                 * and try again.
                 */
+                if (!try_lock) {
+                        xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
+                        continue;
+                }
+                /* try_lock means we have an inode locked that is in the AIL. */
+                ASSERT(i != 0);
+                if (xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i)))
+                        continue;
-                if (try_lock) {
+                /*
-                        /* try_lock must be 0 if i is 0. */
+                 * Unlock all previous guys and try again.  xfs_iunlock will try
+                 * to push the tail if the inode is in the AIL.
+                 */
+                attempts++;
+                for (j = i - 1; j >= 0; j--) {
                        /*
-                         * try_lock means we have an inode locked
+                         * Check to see if we've already unlocked this one.  Not
-                         * that is in the AIL.
+                         * the first one going back, and the inode ptr is the
+                         * same.
                         */
-                        ASSERT(i != 0);
+                        if (j != (i - 1) && ips[j] == ips[j + 1])
-                        if (!xfs_ilock_nowait(ips[i], xfs_lock_inumorder(lock_mode, i))) {
+                                continue;
-                                attempts++;
-                                /*
-                                 * Unlock all previous guys and try again.
-                                 * xfs_iunlock will try to push the tail
-                                 * if the inode is in the AIL.
-                                 */
-                                for(j = i - 1; j >= 0; j--) {
-                                        /*
-                                         * Check to see if we've already
-                                         * unlocked this one.
-                                         * Not the first one going back,
-                                         * and the inode ptr is the same.
-                                         */
-                                        if ((j != (i - 1)) && ips[j] ==
-                                                                ips[j+1])
-                                                continue;
-                                        xfs_iunlock(ips[j], lock_mode);
-                                }
-                                if ((attempts % 5) == 0) {
+                        xfs_iunlock(ips[j], lock_mode);
-                                        delay(1); /* Don't just spin the CPU */
+                }
+                if ((attempts % 5) == 0) {
+                        delay(1); /* Don't just spin the CPU */
 #ifdef DEBUG
-                                        xfs_lock_delays++;
+                        xfs_lock_delays++;
 #endif
-                                }
-                                i = 0;
-                                try_lock = 0;
-                                goto again;
-                        }
-                } else {
-                        xfs_ilock(ips[i], xfs_lock_inumorder(lock_mode, i));
                }
+                i = 0;
+                try_lock = 0;
+                goto again;
        }
 #ifdef DEBUG
@@ -440,10 +489,10 @@ again:
 }
 /*
- * xfs_lock_two_inodes() can only be used to lock one type of lock
+ * xfs_lock_two_inodes() can only be used to lock one type of lock at a time -
- * at a time - the iolock or the ilock, but not both at once. If
+ * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
- * we lock both at once, lockdep will report false positives saying
+ * lock more than one at a time, lockdep will report false positives saying we
- * we have violated locking orders.
+ * have violated locking orders.
 */
 void
 xfs_lock_two_inodes(
@@ -455,8 +504,12 @@ xfs_lock_two_inodes(
        int                     attempts = 0;
        xfs_log_item_t          *lp;
-        if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL))
+        if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
-                ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0);
+                ASSERT(!(lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)));
+                ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
+        } else if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL))
+                ASSERT(!(lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)));
        ASSERT(ip0->i_ino != ip1->i_ino);
        if (ip0->i_ino > ip1->i_ino) {
@@ -2615,19 +2668,22 @@ xfs_remove(
 /*
 * Enter all inodes for a rename transaction into a sorted array.
 */
+#define __XFS_SORT_INODES       5
 STATIC void
 xfs_sort_for_rename(
-        xfs_inode_t     *dp1,   /* in: old (source) directory inode */
+        struct xfs_inode        *dp1,   /* in: old (source) directory inode */
-        xfs_inode_t     *dp2,   /* in: new (target) directory inode */
+        struct xfs_inode        *dp2,   /* in: new (target) directory inode */
-        xfs_inode_t     *ip1,   /* in: inode of old entry */
+        struct xfs_inode        *ip1,   /* in: inode of old entry */
-        xfs_inode_t     *ip2,   /* in: inode of new entry, if it
+        struct xfs_inode        *ip2,   /* in: inode of new entry */
-                                   already exists, NULL otherwise. */
+        struct xfs_inode        *wip,   /* in: whiteout inode */
-        xfs_inode_t     **i_tab,/* out: array of inode returned, sorted */
+        struct xfs_inode        **i_tab,/* out: sorted array of inodes */
-        int             *num_inodes)  /* out: number of inodes in array */
+        int                     *num_inodes)  /* in/out: inodes in array */
 {
-        xfs_inode_t             *temp;
        int                     i, j;
+        ASSERT(*num_inodes == __XFS_SORT_INODES);
+        memset(i_tab, 0, *num_inodes * sizeof(struct xfs_inode *));
        /*
         * i_tab contains a list of pointers to inodes.  We initialize
         * the table here & we'll sort it.  We will then use it to
@@ -2635,25 +2691,24 @@ xfs_sort_for_rename(
         *
         * Note that the table may contain duplicates.  e.g., dp1 == dp2.
         */
-        i_tab[0] = dp1;
+        i = 0;
-        i_tab[1] = dp2;
+        i_tab[i++] = dp1;
-        i_tab[2] = ip1;
+        i_tab[i++] = dp2;
-        if (ip2) {
+        i_tab[i++] = ip1;
-                *num_inodes = 4;
+        if (ip2)
-                i_tab[3] = ip2;
+                i_tab[i++] = ip2;
-        } else {
+        if (wip)
-                *num_inodes = 3;
+                i_tab[i++] = wip;
-                i_tab[3] = NULL;
+        *num_inodes = i;
-        }
        /*
         * Sort the elements via bubble sort.  (Remember, there are at
-         * most 4 elements to sort, so this is adequate.)
+         * most 5 elements to sort, so this is adequate.)
         */
        for (i = 0; i < *num_inodes; i++) {
                for (j = 1; j < *num_inodes; j++) {
                        if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
-                                temp = i_tab[j];
+                                struct xfs_inode *temp = i_tab[j];
                                i_tab[j] = i_tab[j-1];
                                i_tab[j-1] = temp;
                        }
@@ -2661,6 +2716,31 @@ xfs_sort_for_rename(
        }
 }
+static int
+xfs_finish_rename(
+        struct xfs_trans        *tp,
+        struct xfs_bmap_free    *free_list)
+{
+        int                     committed = 0;
+        int                     error;
+        /*
+         * If this is a synchronous mount, make sure that the rename transaction
+         * goes to disk before returning to the user.
+         */
+        if (tp->t_mountp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC))
+                xfs_trans_set_sync(tp);
+        error = xfs_bmap_finish(&tp, free_list, &committed);
+        if (error) {
+                xfs_bmap_cancel(free_list);
+                xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
+                return error;
+        }
+        return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+}
 /*
 * xfs_cross_rename()
 *
@@ -2689,14 +2769,14 @@ xfs_cross_rename(
                                ip2->i_ino,
                                first_block, free_list, spaceres);
        if (error)
-                goto out;
+                goto out_trans_abort;
        /* Swap inode number for dirent in second parent */
        error = xfs_dir_replace(tp, dp2, name2,
                                ip1->i_ino,
                                first_block, free_list, spaceres);
        if (error)
-                goto out;
+                goto out_trans_abort;
        /*
         * If we're renaming one or more directories across different parents,
@@ -2711,16 +2791,16 @@ xfs_cross_rename(
                                                dp1->i_ino, first_block,
                                                free_list, spaceres);
                        if (error)
-                                goto out;
+                                goto out_trans_abort;
                        /* transfer ip2 ".." reference to dp1 */
                        if (!S_ISDIR(ip1->i_d.di_mode)) {
                                error = xfs_droplink(tp, dp2);
                                if (error)
-                                        goto out;
+                                        goto out_trans_abort;
                                error = xfs_bumplink(tp, dp1);
                                if (error)
-                                        goto out;
+                                        goto out_trans_abort;
                        }
                        /*
@@ -2738,16 +2818,16 @@ xfs_cross_rename(
                                                dp2->i_ino, first_block,
                                                free_list, spaceres);
                        if (error)
-                                goto out;
+                                goto out_trans_abort;
                        /* transfer ip1 ".." reference to dp2 */
                        if (!S_ISDIR(ip2->i_d.di_mode)) {
                                error = xfs_droplink(tp, dp1);
                                if (error)
-                                        goto out;
+                                        goto out_trans_abort;
                                error = xfs_bumplink(tp, dp2);
                                if (error)
-                                        goto out;
+                                        goto out_trans_abort;
                        }
                        /*
@@ -2775,66 +2855,108 @@ xfs_cross_rename(
        }
        xfs_trans_ichgtime(tp, dp1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, dp1, XFS_ILOG_CORE);
-out:
+        return xfs_finish_rename(tp, free_list);
+out_trans_abort:
+        xfs_bmap_cancel(free_list);
+        xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
        return error;
 }
 /*
+ * xfs_rename_alloc_whiteout()
+ *
+ * Return a referenced, unlinked, unlocked inode that that can be used as a
+ * whiteout in a rename transaction. We use a tmpfile inode here so that if we
+ * crash between allocating the inode and linking it into the rename transaction
+ * recovery will free the inode and we won't leak it.
+ */
+static int
+xfs_rename_alloc_whiteout(
+        struct xfs_inode        *dp,
+        struct xfs_inode        **wip)
+{
+        struct xfs_inode        *tmpfile;
+        int                     error;
+        error = xfs_create_tmpfile(dp, NULL, S_IFCHR | WHITEOUT_MODE, &tmpfile);
+        if (error)
+                return error;
+        /* Satisfy xfs_bumplink that this is a real tmpfile */
+        xfs_finish_inode_setup(tmpfile);
+        VFS_I(tmpfile)->i_state |= I_LINKABLE;
+        *wip = tmpfile;
+        return 0;
+}
+/*
 * xfs_rename
 */
 int
 xfs_rename(
-        xfs_inode_t     *src_dp,
+        struct xfs_inode        *src_dp,
-        struct xfs_name *src_name,
+        struct xfs_name         *src_name,
-        xfs_inode_t     *src_ip,
+        struct xfs_inode        *src_ip,
-        xfs_inode_t     *target_dp,
+        struct xfs_inode        *target_dp,
-        struct xfs_name *target_name,
+        struct xfs_name         *target_name,
-        xfs_inode_t     *target_ip,
+        struct xfs_inode        *target_ip,
-        unsigned int    flags)
+        unsigned int            flags)
 {
-        xfs_trans_t     *tp = NULL;
+        struct xfs_mount        *mp = src_dp->i_mount;
-        xfs_mount_t     *mp = src_dp->i_mount;
+        struct xfs_trans        *tp;
-        int             new_parent;             /* moving to a new dir */
+        struct xfs_bmap_free    free_list;
-        int             src_is_directory;       /* src_name is a directory */
+        xfs_fsblock_t           first_block;
-        int             error;
+        struct xfs_inode        *wip = NULL;            /* whiteout inode */
-        xfs_bmap_free_t free_list;
+        struct xfs_inode        *inodes[__XFS_SORT_INODES];
-        xfs_fsblock_t   first_block;
+        int                     num_inodes = __XFS_SORT_INODES;
-        int             cancel_flags;
+        bool                    new_parent = (src_dp != target_dp);
-        int             committed;
+        bool                    src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
-        xfs_inode_t     *inodes[4];
+        int                     cancel_flags = 0;
-        int             spaceres;
+        int                     spaceres;
-        int             num_inodes;
+        int                     error;
        trace_xfs_rename(src_dp, target_dp, src_name, target_name);
-        new_parent = (src_dp != target_dp);
+        if ((flags & RENAME_EXCHANGE) && !target_ip)
-        src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
+                return -EINVAL;
+        /*
+         * If we are doing a whiteout operation, allocate the whiteout inode
+         * we will be placing at the target and ensure the type is set
+         * appropriately.
+         */
+        if (flags & RENAME_WHITEOUT) {
+                ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE)));
+                error = xfs_rename_alloc_whiteout(target_dp, &wip);
+                if (error)
+                        return error;
+                /* setup target dirent info as whiteout */
+                src_name->type = XFS_DIR3_FT_CHRDEV;
+        }
-        xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
+        xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip, wip,
                                inodes, &num_inodes);
-        xfs_bmap_init(&free_list, &first_block);
        tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
-        cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
        spaceres = XFS_RENAME_SPACE_RES(mp, target_name->len);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, spaceres, 0);
        if (error == -ENOSPC) {
                spaceres = 0;
                error = xfs_trans_reserve(tp, &M_RES(mp)->tr_rename, 0, 0);
        }
-        if (error) {
+        if (error)
-                xfs_trans_cancel(tp, 0);
+                goto out_trans_cancel;
-                goto std_return;
+        cancel_flags = XFS_TRANS_RELEASE_LOG_RES;
-        }
        /*
         * Attach the dquots to the inodes
         */
        error = xfs_qm_vop_rename_dqattach(inodes);
-        if (error) {
+        if (error)
-                xfs_trans_cancel(tp, cancel_flags);
+                goto out_trans_cancel;
-                goto std_return;
-        }
        /*
         * Lock all the participating inodes. Depending upon whether
@@ -2855,6 +2977,8 @@ xfs_rename(
        xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
        if (target_ip)
                xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
+        if (wip)
+                xfs_trans_ijoin(tp, wip, XFS_ILOCK_EXCL);
        /*
         * If we are using project inheritance, we only allow renames
@@ -2864,20 +2988,16 @@ xfs_rename(
        if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
                     (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
                error = -EXDEV;
-                goto error_return;
+                goto out_trans_cancel;
        }
-        /*
+        xfs_bmap_init(&free_list, &first_block);
-         * Handle RENAME_EXCHANGE flags
-         */
+        /* RENAME_EXCHANGE is unique from here on. */
-        if (flags & RENAME_EXCHANGE) {
+        if (flags & RENAME_EXCHANGE)
-                error = xfs_cross_rename(tp, src_dp, src_name, src_ip,
+                return xfs_cross_rename(tp, src_dp, src_name, src_ip,
-                                         target_dp, target_name, target_ip,
+                                        target_dp, target_name, target_ip,
-                                         &free_list, &first_block, spaceres);
+                                        &free_list, &first_block, spaceres);
-                if (error)
-                        goto abort_return;
-                goto finish_rename;
-        }
        /*
         * Set up the target.
@@ -2890,7 +3010,7 @@ xfs_rename(
                if (!spaceres) {
                        error = xfs_dir_canenter(tp, target_dp, target_name);
                        if (error)
-                                goto error_return;
+                                goto out_trans_cancel;
                }
                /*
                 * If target does not exist and the rename crosses
@@ -2901,9 +3021,9 @@ xfs_rename(
                                                src_ip->i_ino, &first_block,
                                                &free_list, spaceres);
                if (error == -ENOSPC)
-                        goto error_return;
+                        goto out_bmap_cancel;
                if (error)
-                        goto abort_return;
+                        goto out_trans_abort;
                xfs_trans_ichgtime(tp, target_dp,
                                        XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2911,7 +3031,7 @@ xfs_rename(
                if (new_parent && src_is_directory) {
                        error = xfs_bumplink(tp, target_dp);
                        if (error)
-                                goto abort_return;
+                                goto out_trans_abort;
                }
        } else { /* target_ip != NULL */
                /*
@@ -2926,7 +3046,7 @@ xfs_rename(
                        if (!(xfs_dir_isempty(target_ip)) ||
                            (target_ip->i_d.di_nlink > 2)) {
                                error = -EEXIST;
-                                goto error_return;
+                                goto out_trans_cancel;
                        }
                }
@@ -2943,7 +3063,7 @@ xfs_rename(
                                        src_ip->i_ino,
                                        &first_block, &free_list, spaceres);
                if (error)
-                        goto abort_return;
+                        goto out_trans_abort;
                xfs_trans_ichgtime(tp, target_dp,
                                        XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
@@ -2954,7 +3074,7 @@ xfs_rename(
                 */
                error = xfs_droplink(tp, target_ip);
                if (error)
-                        goto abort_return;
+                        goto out_trans_abort;
                if (src_is_directory) {
                        /*
@@ -2962,7 +3082,7 @@ xfs_rename(
                         */
                        error = xfs_droplink(tp, target_ip);
                        if (error)
-                                goto abort_return;
+                                goto out_trans_abort;
                }
        } /* target_ip != NULL */
@@ -2979,7 +3099,7 @@ xfs_rename(
                                        &first_block, &free_list, spaceres);
                ASSERT(error != -EEXIST);
                if (error)
-                        goto abort_return;
+                        goto out_trans_abort;
        }
        /*
@@ -3005,49 +3125,67 @@ xfs_rename(
                 */
                error = xfs_droplink(tp, src_dp);
                if (error)
-                        goto abort_return;
+                        goto out_trans_abort;
        }
-        error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
+        /*
+         * For whiteouts, we only need to update the source dirent with the
+         * inode number of the whiteout inode rather than removing it
+         * altogether.
+         */
+        if (wip) {
+                error = xfs_dir_replace(tp, src_dp, src_name, wip->i_ino,
                                        &first_block, &free_list, spaceres);
+        } else
+                error = xfs_dir_removename(tp, src_dp, src_name, src_ip->i_ino,
+                                           &first_block, &free_list, spaceres);
        if (error)
-                goto abort_return;
+                goto out_trans_abort;
-        xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-        xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
-        if (new_parent)
-                xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
-finish_rename:
        /*
-         * If this is a synchronous mount, make sure that the
+         * For whiteouts, we need to bump the link count on the whiteout inode.
-         * rename transaction goes to disk before returning to
+         * This means that failures all the way up to this point leave the inode
-         * the user.
+         * on the unlinked list and so cleanup is a simple matter of dropping
+         * the remaining reference to it. If we fail here after bumping the link
+         * count, we're shutting down the filesystem so we'll never see the
+         * intermediate state on disk.
         */
-        if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) {
+        if (wip) {
-                xfs_trans_set_sync(tp);
+                ASSERT(wip->i_d.di_nlink == 0);
-        }
+                error = xfs_bumplink(tp, wip);
+                if (error)
+                        goto out_trans_abort;
+                error = xfs_iunlink_remove(tp, wip);
+                if (error)
+                        goto out_trans_abort;
+                xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
-        error = xfs_bmap_finish(&tp, &free_list, &committed);
+                /*
-        if (error) {
+                 * Now we have a real link, clear the "I'm a tmpfile" state
-                xfs_bmap_cancel(&free_list);
+                 * flag from the inode so it doesn't accidentally get misused in
-                xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
+                 * future.
-                                 XFS_TRANS_ABORT));
+                 */
-                goto std_return;
+                VFS_I(wip)->i_state &= ~I_LINKABLE;
        }
-        /*
+        xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
-         * trans_commit will unlock src_ip, target_ip & decrement
+        xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
-         * the vnode references.
+        if (new_parent)
-         */
+                xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
-        return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- abort_return:
+        error = xfs_finish_rename(tp, &free_list);
+        if (wip)
+                IRELE(wip);
+        return error;
+out_trans_abort:
        cancel_flags |= XFS_TRANS_ABORT;
- error_return:
+out_bmap_cancel:
        xfs_bmap_cancel(&free_list);
+out_trans_cancel:
        xfs_trans_cancel(tp, cancel_flags);
- std_return:
+        if (wip)
+                IRELE(wip);
        return error;
 }
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 8e82b41d2050..8f22d20368d8 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -56,6 +56,7 @@ typedef struct xfs_inode {
        struct xfs_inode_log_item *i_itemp;     /* logging information */
        mrlock_t                i_lock;         /* inode lock */
        mrlock_t                i_iolock;       /* inode IO lock */
+        mrlock_t                i_mmaplock;     /* inode mmap IO lock */
        atomic_t                i_pincount;     /* inode pin count */
        spinlock_t              i_flags_lock;   /* inode i_flags lock */
        /* Miscellaneous state. */
@@ -263,15 +264,20 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
 #define XFS_IOLOCK_SHARED       (1<<1)
 #define XFS_ILOCK_EXCL          (1<<2)
 #define XFS_ILOCK_SHARED        (1<<3)
+#define XFS_MMAPLOCK_EXCL       (1<<4)
+#define XFS_MMAPLOCK_SHARED     (1<<5)
 #define XFS_LOCK_MASK           (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
-                                | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
+                                | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
+                                | XFS_MMAPLOCK_EXCL | XFS_MMAPLOCK_SHARED)
 #define XFS_LOCK_FLAGS \
        { XFS_IOLOCK_EXCL,      "IOLOCK_EXCL" }, \
        { XFS_IOLOCK_SHARED,    "IOLOCK_SHARED" }, \
        { XFS_ILOCK_EXCL,       "ILOCK_EXCL" }, \
-        { XFS_ILOCK_SHARED,     "ILOCK_SHARED" }
+        { XFS_ILOCK_SHARED,     "ILOCK_SHARED" }, \
+        { XFS_MMAPLOCK_EXCL,    "MMAPLOCK_EXCL" }, \
+        { XFS_MMAPLOCK_SHARED,  "MMAPLOCK_SHARED" }
 /*
@@ -302,17 +308,26 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
 #define XFS_IOLOCK_SHIFT        16
 #define XFS_IOLOCK_PARENT       (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
+#define XFS_MMAPLOCK_SHIFT      20
 #define XFS_ILOCK_SHIFT         24
 #define XFS_ILOCK_PARENT        (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
 #define XFS_ILOCK_RTBITMAP      (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
 #define XFS_ILOCK_RTSUM         (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
-#define XFS_IOLOCK_DEP_MASK     0x00ff0000
+#define XFS_IOLOCK_DEP_MASK     0x000f0000
+#define XFS_MMAPLOCK_DEP_MASK   0x00f00000
 #define XFS_ILOCK_DEP_MASK      0xff000000
-#define XFS_LOCK_DEP_MASK       (XFS_IOLOCK_DEP_MASK | XFS_ILOCK_DEP_MASK)
+#define XFS_LOCK_DEP_MASK       (XFS_IOLOCK_DEP_MASK | \
+                                 XFS_MMAPLOCK_DEP_MASK | \
+                                 XFS_ILOCK_DEP_MASK)
-#define XFS_IOLOCK_DEP(flags)   (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
+#define XFS_IOLOCK_DEP(flags)   (((flags) & XFS_IOLOCK_DEP_MASK) \
-#define XFS_ILOCK_DEP(flags)    (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
+                                        >> XFS_IOLOCK_SHIFT)
+#define XFS_MMAPLOCK_DEP(flags) (((flags) & XFS_MMAPLOCK_DEP_MASK) \
+                                        >> XFS_MMAPLOCK_SHIFT)
+#define XFS_ILOCK_DEP(flags)    (((flags) & XFS_ILOCK_DEP_MASK) \
+                                        >> XFS_ILOCK_SHIFT)
 /*
 * For multiple groups support: if S_ISGID bit is set in the parent
@@ -384,10 +399,11 @@ enum xfs_prealloc_flags {
        XFS_PREALLOC_INVISIBLE  = (1 << 4),
 };
-int             xfs_update_prealloc_flags(struct xfs_inode *,
+int     xfs_update_prealloc_flags(struct xfs_inode *ip,
-                        enum xfs_prealloc_flags);
+                                  enum xfs_prealloc_flags flags);
-int             xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
+int     xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
-int             xfs_iozero(struct xfs_inode *, loff_t, size_t);
+                     xfs_fsize_t isize, bool *did_zeroing);
+int     xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
 /* from xfs_iops.c */
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 3a21cc71fda0..5f4a396f5186 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -631,7 +631,7 @@ xfs_ioc_space(
        if (filp->f_flags & O_DSYNC)
                flags |= XFS_PREALLOC_SYNC;
-        if (ioflags & XFS_IO_INVIS)     
+        if (ioflags & XFS_IO_INVIS)
                flags |= XFS_PREALLOC_INVISIBLE;
        error = mnt_want_write_file(filp);
@@ -643,6 +643,9 @@ xfs_ioc_space(
        if (error)
                goto out_unlock;
+        xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+        iolock |= XFS_MMAPLOCK_EXCL;
        switch (bf->l_whence) {
        case 0: /*SEEK_SET*/
                break;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ccb1dd0d509e..38e633bad8c2 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -460,8 +460,7 @@ xfs_iomap_prealloc_size(
        alloc_blocks = XFS_FILEOFF_MIN(roundup_pow_of_two(MAXEXTLEN),
                                       alloc_blocks);
-        xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+        freesp = percpu_counter_read_positive(&mp->m_fdblocks);
-        freesp = mp->m_sb.sb_fdblocks;
        if (freesp < mp->m_low_space[XFS_LOWSP_5_PCNT]) {
                shift = 2;
                if (freesp < mp->m_low_space[XFS_LOWSP_4_PCNT])
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 1d4efee4be17..2f1839e4dd1b 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -394,7 +394,7 @@ xfs_vn_rename(
        struct xfs_name oname;
        struct xfs_name nname;
-        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
+        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
                return -EINVAL;
        /* if we are exchanging files, we need to set i_mode of both files */
@@ -756,6 +756,7 @@ xfs_setattr_size(
        int                     error;
        uint                    lock_flags = 0;
        uint                    commit_flags = 0;
+        bool                    did_zeroing = false;
        trace_xfs_setattr(ip);
@@ -770,6 +771,7 @@ xfs_setattr_size(
                return error;
        ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
+        ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
        ASSERT(S_ISREG(ip->i_d.di_mode));
        ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
                ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
@@ -799,20 +801,16 @@ xfs_setattr_size(
                return error;
        /*
-         * Now we can make the changes.  Before we join the inode to the
+         * File data changes must be complete before we start the transaction to
-         * transaction, take care of the part of the truncation that must be
+         * modify the inode.  This needs to be done before joining the inode to
-         * done without the inode lock.  This needs to be done before joining
+         * the transaction because the inode cannot be unlocked once it is a
-         * the inode to the transaction, because the inode cannot be unlocked
+         * part of the transaction.
-         * once it is a part of the transaction.
+         *
+         * Start with zeroing any data block beyond EOF that we may expose on
+         * file extension.
         */
        if (newsize > oldsize) {
-                /*
+                error = xfs_zero_eof(ip, newsize, oldsize, &did_zeroing);
-                 * Do the first part of growing a file: zero any data in the
-                 * last block that is beyond the old EOF.  We need to do this
-                 * before the inode is joined to the transaction to modify
-                 * i_size.
-                 */
-                error = xfs_zero_eof(ip, newsize, oldsize);
                if (error)
                        return error;
        }
@@ -822,75 +820,42 @@ xfs_setattr_size(
         * any previous writes that are beyond the on disk EOF and the new
         * EOF that have not been written out need to be written here.  If we
         * do not write the data out, we expose ourselves to the null files
-         * problem.
+         * problem. Note that this includes any block zeroing we did above;
-         *
+         * otherwise those blocks may not be zeroed after a crash.
-         * Only flush from the on disk size to the smaller of the in memory
-         * file size or the new size as that's the range we really care about
-         * here and prevents waiting for other data not within the range we
-         * care about here.
         */
-        if (oldsize != ip->i_d.di_size && newsize > ip->i_d.di_size) {
+        if (newsize > ip->i_d.di_size &&
+            (oldsize != ip->i_d.di_size || did_zeroing)) {
                error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
                                                      ip->i_d.di_size, newsize);
                if (error)
                        return error;
        }
-        /*
+        /* Now wait for all direct I/O to complete. */
-         * Wait for all direct I/O to complete.
-         */
        inode_dio_wait(inode);
        /*
-         * Do all the page cache truncate work outside the transaction context
+         * We've already locked out new page faults, so now we can safely remove
-         * as the "lock" order is page lock->log space reservation.  i.e.
+         * pages from the page cache knowing they won't get refaulted until we
-         * locking pages inside the transaction can ABBA deadlock with
+         * drop the XFS_MMAP_EXCL lock after the extent manipulations are
-         * writeback. We have to do the VFS inode size update before we truncate
+         * complete. The truncate_setsize() call also cleans partial EOF page
-         * the pagecache, however, to avoid racing with page faults beyond the
+         * PTEs on extending truncates and hence ensures sub-page block size
-         * new EOF they are not serialised against truncate operations except by
+         * filesystems are correctly handled, too.
-         * page locks and size updates.
         *
-         * Hence we are in a situation where a truncate can fail with ENOMEM
+         * We have to do all the page cache truncate work outside the
-         * from xfs_trans_reserve(), but having already truncated the in-memory
+         * transaction context as the "lock" order is page lock->log space
-         * version of the file (i.e. made user visible changes). There's not
+         * reservation as defined by extent allocation in the writeback path.
-         * much we can do about this, except to hope that the caller sees ENOMEM
+         * Hence a truncate can fail with ENOMEM from xfs_trans_reserve(), but
-         * and retries the truncate operation.
+         * having already truncated the in-memory version of the file (i.e. made
+         * user visible changes). There's not much we can do about this, except
+         * to hope that the caller sees ENOMEM and retries the truncate
+         * operation.
         */
        error = block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);
        if (error)
                return error;
        truncate_setsize(inode, newsize);
-        /*
-         * The "we can't serialise against page faults" pain gets worse.
-         *
-         * If the file is mapped then we have to clean the page at the old EOF
-         * when extending the file. Extending the file can expose changes the
-         * underlying page mapping (e.g. from beyond EOF to a hole or
-         * unwritten), and so on the next attempt to write to that page we need
-         * to remap it for write. i.e. we need .page_mkwrite() to be called.
-         * Hence we need to clean the page to clean the pte and so a new write
-         * fault will be triggered appropriately.
-         *
-         * If we do it before we change the inode size, then we can race with a
-         * page fault that maps the page with exactly the same problem. If we do
-         * it after we change the file size, then a new page fault can come in
-         * and allocate space before we've run the rest of the truncate
-         * transaction. That's kinda grotesque, but it's better than have data
-         * over a hole, and so that's the lesser evil that has been chosen here.
-         *
-         * The real solution, however, is to have some mechanism for locking out
-         * page faults while a truncate is in progress.
-         */
-        if (newsize > oldsize && mapping_mapped(VFS_I(ip)->i_mapping)) {
-                error = filemap_write_and_wait_range(
-                                VFS_I(ip)->i_mapping,
-                                round_down(oldsize, PAGE_CACHE_SIZE),
-                                round_up(oldsize, PAGE_CACHE_SIZE) - 1);
-                if (error)
-                        return error;
-        }
        tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);
        if (error)
@@ -989,8 +954,12 @@ xfs_vn_setattr(
                xfs_ilock(ip, iolock);
                error = xfs_break_layouts(dentry->d_inode, &iolock, true);
-                if (!error)
+                if (!error) {
+                        xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+                        iolock |= XFS_MMAPLOCK_EXCL;
                        error = xfs_setattr_size(ip, iattr);
+                }
                xfs_iunlock(ip, iolock);
        } else {
                error = xfs_setattr_nonsize(ip, iattr, 0);
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index c31d2c2eadc4..7c7842c85a08 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -116,15 +116,6 @@ typedef __uint64_t __psunsigned_t;
 #undef XFS_NATIVE_HOST
 #endif
-/*
- * Feature macros (disable/enable)
- */
-#ifdef CONFIG_SMP
-#define HAVE_PERCPU_SB  /* per cpu superblock counters are a 2.6 feature */
-#else
-#undef  HAVE_PERCPU_SB  /* per cpu superblock counters are a 2.6 feature */
-#endif
 #define irix_sgid_inherit       xfs_params.sgid_inherit.val
 #define irix_symlink_mode       xfs_params.symlink_mode.val
 #define xfs_panic_mask          xfs_params.panic_mask.val
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a5a945fc3bdc..4f5784f85a5b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -4463,10 +4463,10 @@ xlog_do_recover(
        xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
        ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
        ASSERT(xfs_sb_good_version(sbp));
+        xfs_reinit_percpu_counters(log->l_mp);
        xfs_buf_relse(bp);
-        /* We've re-read the superblock so re-initialize per-cpu counters */
-        xfs_icsb_reinit_counters(log->l_mp);
        xlog_recover_check_summary(log);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 4fa80e63eea2..2ce7ee3b4ec1 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,18 +43,6 @@
 #include "xfs_sysfs.h"
-#ifdef HAVE_PERCPU_SB
-STATIC void     xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
-                                                int);
-STATIC void     xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
-                                                int);
-STATIC void     xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
-#else
-#define xfs_icsb_balance_counter(mp, a, b)              do { } while (0)
-#define xfs_icsb_balance_counter_locked(mp, a, b)       do { } while (0)
-#endif
 static DEFINE_MUTEX(xfs_uuid_table_mutex);
 static int xfs_uuid_table_size;
 static uuid_t *xfs_uuid_table;
@@ -347,8 +335,7 @@ reread:
                goto reread;
        }
-        /* Initialize per-cpu counters */
+        xfs_reinit_percpu_counters(mp);
-        xfs_icsb_reinit_counters(mp);
        /* no need to be quiet anymore, so reset the buf ops */
        bp->b_ops = &xfs_sb_buf_ops;
@@ -1087,8 +1074,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
        if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
                return 0;
-        xfs_icsb_sync_counters(mp, 0);
        /*
         * we don't need to do this if we are updating the superblock
         * counters on every modification.
@@ -1099,253 +1084,136 @@ xfs_log_sbcount(xfs_mount_t *mp)
        return xfs_sync_sb(mp, true);
 }
-/*
+int
- * xfs_mod_incore_sb_unlocked() is a utility routine commonly used to apply
+xfs_mod_icount(
- * a delta to a specified field in the in-core superblock.  Simply
+        struct xfs_mount        *mp,
- * switch on the field indicated and apply the delta to that field.
+        int64_t                 delta)
- * Fields are not allowed to dip below zero, so if the delta would
- * do this do not apply it and return EINVAL.
- *
- * The m_sb_lock must be held when this routine is called.
- */
-STATIC int
-xfs_mod_incore_sb_unlocked(
-        xfs_mount_t     *mp,
-        xfs_sb_field_t  field,
-        int64_t         delta,
-        int             rsvd)
 {
-        int             scounter;       /* short counter for 32 bit fields */
+        /* deltas are +/-64, hence the large batch size of 128. */
-        long long       lcounter;       /* long counter for 64 bit fields */
+        __percpu_counter_add(&mp->m_icount, delta, 128);
-        long long       res_used, rem;
+        if (percpu_counter_compare(&mp->m_icount, 0) < 0) {
-        /*
-         * With the in-core superblock spin lock held, switch
-         * on the indicated field.  Apply the delta to the
-         * proper field.  If the fields value would dip below
-         * 0, then do not apply the delta and return EINVAL.
-         */
-        switch (field) {
-        case XFS_SBS_ICOUNT:
-                lcounter = (long long)mp->m_sb.sb_icount;
-                lcounter += delta;
-                if (lcounter < 0) {
-                        ASSERT(0);
-                        return -EINVAL;
-                }
-                mp->m_sb.sb_icount = lcounter;
-                return 0;
-        case XFS_SBS_IFREE:
-                lcounter = (long long)mp->m_sb.sb_ifree;
-                lcounter += delta;
-                if (lcounter < 0) {
-                        ASSERT(0);
-                        return -EINVAL;
-                }
-                mp->m_sb.sb_ifree = lcounter;
-                return 0;
-        case XFS_SBS_FDBLOCKS:
-                lcounter = (long long)
-                        mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
-                res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
-                if (delta > 0) {                /* Putting blocks back */
-                        if (res_used > delta) {
-                                mp->m_resblks_avail += delta;
-                        } else {
-                                rem = delta - res_used;
-                                mp->m_resblks_avail = mp->m_resblks;
-                                lcounter += rem;
-                        }
-                } else {                                /* Taking blocks away */
-                        lcounter += delta;
-                        if (lcounter >= 0) {
-                                mp->m_sb.sb_fdblocks = lcounter +
-                                                        XFS_ALLOC_SET_ASIDE(mp);
-                                return 0;
-                        }
-                        /*
-                         * We are out of blocks, use any available reserved
-                         * blocks if were allowed to.
-                         */
-                        if (!rsvd)
-                                return -ENOSPC;
-                        lcounter = (long long)mp->m_resblks_avail + delta;
-                        if (lcounter >= 0) {
-                                mp->m_resblks_avail = lcounter;
-                                return 0;
-                        }
-                        printk_once(KERN_WARNING
-                                "Filesystem \"%s\": reserve blocks depleted! "
-                                "Consider increasing reserve pool size.",
-                                mp->m_fsname);
-                        return -ENOSPC;
-                }
-                mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
-                return 0;
-        case XFS_SBS_FREXTENTS:
-                lcounter = (long long)mp->m_sb.sb_frextents;
-                lcounter += delta;
-                if (lcounter < 0) {
-                        return -ENOSPC;
-                }
-                mp->m_sb.sb_frextents = lcounter;
-                return 0;
-        case XFS_SBS_DBLOCKS:
-                lcounter = (long long)mp->m_sb.sb_dblocks;
-                lcounter += delta;
-                if (lcounter < 0) {
-                        ASSERT(0);
-                        return -EINVAL;
-                }
-                mp->m_sb.sb_dblocks = lcounter;
-                return 0;
-        case XFS_SBS_AGCOUNT:
-                scounter = mp->m_sb.sb_agcount;
-                scounter += delta;
-                if (scounter < 0) {
-                        ASSERT(0);
-                        return -EINVAL;
-                }
-                mp->m_sb.sb_agcount = scounter;
-                return 0;
-        case XFS_SBS_IMAX_PCT:
-                scounter = mp->m_sb.sb_imax_pct;
-                scounter += delta;
-                if (scounter < 0) {
-                        ASSERT(0);
-                        return -EINVAL;
-                }
-                mp->m_sb.sb_imax_pct = scounter;
-                return 0;
-        case XFS_SBS_REXTSIZE:
-                scounter = mp->m_sb.sb_rextsize;
-                scounter += delta;
-                if (scounter < 0) {
-                        ASSERT(0);
-                        return -EINVAL;
-                }
-                mp->m_sb.sb_rextsize = scounter;
-                return 0;
-        case XFS_SBS_RBMBLOCKS:
-                scounter = mp->m_sb.sb_rbmblocks;
-                scounter += delta;
-                if (scounter < 0) {
-                        ASSERT(0);
-                        return -EINVAL;
-                }
-                mp->m_sb.sb_rbmblocks = scounter;
-                return 0;
-        case XFS_SBS_RBLOCKS:
-                lcounter = (long long)mp->m_sb.sb_rblocks;
-                lcounter += delta;
-                if (lcounter < 0) {
-                        ASSERT(0);
-                        return -EINVAL;
-                }
-                mp->m_sb.sb_rblocks = lcounter;
-                return 0;
-        case XFS_SBS_REXTENTS:
-                lcounter = (long long)mp->m_sb.sb_rextents;
-                lcounter += delta;
-                if (lcounter < 0) {
-                        ASSERT(0);
-                        return -EINVAL;
-                }
-                mp->m_sb.sb_rextents = lcounter;
-                return 0;
-        case XFS_SBS_REXTSLOG:
-                scounter = mp->m_sb.sb_rextslog;
-                scounter += delta;
-                if (scounter < 0) {
-                        ASSERT(0);
-                        return -EINVAL;
-                }
-                mp->m_sb.sb_rextslog = scounter;
-                return 0;
-        default:
                ASSERT(0);
+                percpu_counter_add(&mp->m_icount, -delta);
                return -EINVAL;
        }
+        return 0;
 }
-/*
- * xfs_mod_incore_sb() is used to change a field in the in-core
- * superblock structure by the specified delta.  This modification
- * is protected by the m_sb_lock.  Just use the xfs_mod_incore_sb_unlocked()
- * routine to do the work.
- */
 int
-xfs_mod_incore_sb(
+xfs_mod_ifree(
        struct xfs_mount        *mp,
-        xfs_sb_field_t          field,
+        int64_t                 delta)
-        int64_t                 delta,
-        int                     rsvd)
 {
-        int                     status;
+        percpu_counter_add(&mp->m_ifree, delta);
+        if (percpu_counter_compare(&mp->m_ifree, 0) < 0) {
-#ifdef HAVE_PERCPU_SB
+                ASSERT(0);
-        ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS);
+                percpu_counter_add(&mp->m_ifree, -delta);
-#endif
+                return -EINVAL;
-        spin_lock(&mp->m_sb_lock);
+        }
-        status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
+        return 0;
-        spin_unlock(&mp->m_sb_lock);
-        return status;
 }
-/*
- * Change more than one field in the in-core superblock structure at a time.
- *
- * The fields and changes to those fields are specified in the array of
- * xfs_mod_sb structures passed in.  Either all of the specified deltas
- * will be applied or none of them will.  If any modified field dips below 0,
- * then all modifications will be backed out and EINVAL will be returned.
- *
- * Note that this function may not be used for the superblock values that
- * are tracked with the in-memory per-cpu counters - a direct call to
- * xfs_icsb_modify_counters is required for these.
- */
 int
-xfs_mod_incore_sb_batch(
+xfs_mod_fdblocks(
        struct xfs_mount        *mp,
-        xfs_mod_sb_t            *msb,
+        int64_t                 delta,
-        uint                    nmsb,
+        bool                    rsvd)
-        int                     rsvd)
 {
-        xfs_mod_sb_t            *msbp;
+        int64_t                 lcounter;
-        int                     error = 0;
+        long long               res_used;
+        s32                     batch;
+        if (delta > 0) {
+                /*
+                 * If the reserve pool is depleted, put blocks back into it
+                 * first. Most of the time the pool is full.
+                 */
+                if (likely(mp->m_resblks == mp->m_resblks_avail)) {
+                        percpu_counter_add(&mp->m_fdblocks, delta);
+                        return 0;
+                }
+                spin_lock(&mp->m_sb_lock);
+                res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
+                if (res_used > delta) {
+                        mp->m_resblks_avail += delta;
+                } else {
+                        delta -= res_used;
+                        mp->m_resblks_avail = mp->m_resblks;
+                        percpu_counter_add(&mp->m_fdblocks, delta);
+                }
+                spin_unlock(&mp->m_sb_lock);
+                return 0;
+        }
        /*
-         * Loop through the array of mod structures and apply each individually.
+         * Taking blocks away, need to be more accurate the closer we
-         * If any fail, then back out all those which have already been applied.
+         * are to zero.
-         * Do all of this within the scope of the m_sb_lock so that all of the
+         *
-         * changes will be atomic.
+         * batch size is set to a maximum of 1024 blocks - if we are
+         * allocating of freeing extents larger than this then we aren't
+         * going to be hammering the counter lock so a lock per update
+         * is not a problem.
+         *
+         * If the counter has a value of less than 2 * max batch size,
+         * then make everything serialise as we are real close to
+         * ENOSPC.
+         */
+#define __BATCH 1024
+        if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0)
+                batch = 1;
+        else
+                batch = __BATCH;
+#undef __BATCH
+        __percpu_counter_add(&mp->m_fdblocks, delta, batch);
+        if (percpu_counter_compare(&mp->m_fdblocks,
+                                   XFS_ALLOC_SET_ASIDE(mp)) >= 0) {
+                /* we had space! */
+                return 0;
+        }
+        /*
+         * lock up the sb for dipping into reserves before releasing the space
+         * that took us to ENOSPC.
         */
        spin_lock(&mp->m_sb_lock);
-        for (msbp = msb; msbp < (msb + nmsb); msbp++) {
+        percpu_counter_add(&mp->m_fdblocks, -delta);
-                ASSERT(msbp->msb_field < XFS_SBS_ICOUNT ||
+        if (!rsvd)
-                       msbp->msb_field > XFS_SBS_FDBLOCKS);
+                goto fdblocks_enospc;
-                error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
+        lcounter = (long long)mp->m_resblks_avail + delta;
-                                                   msbp->msb_delta, rsvd);
+        if (lcounter >= 0) {
-                if (error)
+                mp->m_resblks_avail = lcounter;
-                        goto unwind;
+                spin_unlock(&mp->m_sb_lock);
+                return 0;
        }
+        printk_once(KERN_WARNING
+                "Filesystem \"%s\": reserve blocks depleted! "
+                "Consider increasing reserve pool size.",
+                mp->m_fsname);
+fdblocks_enospc:
        spin_unlock(&mp->m_sb_lock);
-        return 0;
+        return -ENOSPC;
+}
-unwind:
+int
-        while (--msbp >= msb) {
+xfs_mod_frextents(
-                error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field,
+        struct xfs_mount        *mp,
-                                                   -msbp->msb_delta, rsvd);
+        int64_t                 delta)
-                ASSERT(error == 0);
+{
-        }
+        int64_t                 lcounter;
+        int                     ret = 0;
+        spin_lock(&mp->m_sb_lock);
+        lcounter = mp->m_sb.sb_frextents + delta;
+        if (lcounter < 0)
+                ret = -ENOSPC;
+        else
+                mp->m_sb.sb_frextents = lcounter;
        spin_unlock(&mp->m_sb_lock);
-        return error;
+        return ret;
 }
 /*
@@ -1407,573 +1275,3 @@ xfs_dev_is_read_only(
        }
        return 0;
 }
-#ifdef HAVE_PERCPU_SB
-/*
- * Per-cpu incore superblock counters
- *
- * Simple concept, difficult implementation
- *
- * Basically, replace the incore superblock counters with a distributed per cpu
- * counter for contended fields (e.g.  free block count).
- *
- * Difficulties arise in that the incore sb is used for ENOSPC checking, and
- * hence needs to be accurately read when we are running low on space. Hence
- * there is a method to enable and disable the per-cpu counters based on how
- * much "stuff" is available in them.
- *
- * Basically, a counter is enabled if there is enough free resource to justify
- * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
- * ENOSPC), then we disable the counters to synchronise all callers and
- * re-distribute the available resources.
- *
- * If, once we redistributed the available resources, we still get a failure,
- * we disable the per-cpu counter and go through the slow path.
- *
- * The slow path is the current xfs_mod_incore_sb() function.  This means that
- * when we disable a per-cpu counter, we need to drain its resources back to
- * the global superblock. We do this after disabling the counter to prevent
- * more threads from queueing up on the counter.
- *
- * Essentially, this means that we still need a lock in the fast path to enable
- * synchronisation between the global counters and the per-cpu counters. This
- * is not a problem because the lock will be local to a CPU almost all the time
- * and have little contention except when we get to ENOSPC conditions.
- *
- * Basically, this lock becomes a barrier that enables us to lock out the fast
- * path while we do things like enabling and disabling counters and
- * synchronising the counters.
- *
- * Locking rules:
- *
- *      1. m_sb_lock before picking up per-cpu locks
- *      2. per-cpu locks always picked up via for_each_online_cpu() order
- *      3. accurate counter sync requires m_sb_lock + per cpu locks
- *      4. modifying per-cpu counters requires holding per-cpu lock
- *      5. modifying global counters requires holding m_sb_lock
- *      6. enabling or disabling a counter requires holding the m_sb_lock 
- *         and _none_ of the per-cpu locks.
- *
- * Disabled counters are only ever re-enabled by a balance operation
- * that results in more free resources per CPU than a given threshold.
- * To ensure counters don't remain disabled, they are rebalanced when
- * the global resource goes above a higher threshold (i.e. some hysteresis
- * is present to prevent thrashing).
- */
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * hot-plug CPU notifier support.
- *
- * We need a notifier per filesystem as we need to be able to identify
- * the filesystem to balance the counters out. This is achieved by
- * having a notifier block embedded in the xfs_mount_t and doing pointer
- * magic to get the mount pointer from the notifier block address.
- */
-STATIC int
-xfs_icsb_cpu_notify(
-        struct notifier_block *nfb,
-        unsigned long action,
-        void *hcpu)
-{
-        xfs_icsb_cnts_t *cntp;
-        xfs_mount_t     *mp;
-        mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
-        cntp = (xfs_icsb_cnts_t *)
-                        per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
-        switch (action) {
-        case CPU_UP_PREPARE:
-        case CPU_UP_PREPARE_FROZEN:
-                /* Easy Case - initialize the area and locks, and
-                 * then rebalance when online does everything else for us. */
-                memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-                break;
-        case CPU_ONLINE:
-        case CPU_ONLINE_FROZEN:
-                xfs_icsb_lock(mp);
-                xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
-                xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
-                xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
-                xfs_icsb_unlock(mp);
-                break;
-        case CPU_DEAD:
-        case CPU_DEAD_FROZEN:
-                /* Disable all the counters, then fold the dead cpu's
-                 * count into the total on the global superblock and
-                 * re-enable the counters. */
-                xfs_icsb_lock(mp);
-                spin_lock(&mp->m_sb_lock);
-                xfs_icsb_disable_counter(mp, XFS_SBS_ICOUNT);
-                xfs_icsb_disable_counter(mp, XFS_SBS_IFREE);
-                xfs_icsb_disable_counter(mp, XFS_SBS_FDBLOCKS);
-                mp->m_sb.sb_icount += cntp->icsb_icount;
-                mp->m_sb.sb_ifree += cntp->icsb_ifree;
-                mp->m_sb.sb_fdblocks += cntp->icsb_fdblocks;
-                memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-                xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
-                xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
-                xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
-                spin_unlock(&mp->m_sb_lock);
-                xfs_icsb_unlock(mp);
-                break;
-        }
-        return NOTIFY_OK;
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-int
-xfs_icsb_init_counters(
-        xfs_mount_t     *mp)
-{
-        xfs_icsb_cnts_t *cntp;
-        int             i;
-        mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
-        if (mp->m_sb_cnts == NULL)
-                return -ENOMEM;
-        for_each_online_cpu(i) {
-                cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-                memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-        }
-        mutex_init(&mp->m_icsb_mutex);
-        /*
-         * start with all counters disabled so that the
-         * initial balance kicks us off correctly
-         */
-        mp->m_icsb_counters = -1;
-#ifdef CONFIG_HOTPLUG_CPU
-        mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
-        mp->m_icsb_notifier.priority = 0;
-        register_hotcpu_notifier(&mp->m_icsb_notifier);
-#endif /* CONFIG_HOTPLUG_CPU */
-        return 0;
-}
-void
-xfs_icsb_reinit_counters(
-        xfs_mount_t     *mp)
-{
-        xfs_icsb_lock(mp);
-        /*
-         * start with all counters disabled so that the
-         * initial balance kicks us off correctly
-         */
-        mp->m_icsb_counters = -1;
-        xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
-        xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
-        xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
-        xfs_icsb_unlock(mp);
-}
-void
-xfs_icsb_destroy_counters(
-        xfs_mount_t     *mp)
-{
-        if (mp->m_sb_cnts) {
-                unregister_hotcpu_notifier(&mp->m_icsb_notifier);
-                free_percpu(mp->m_sb_cnts);
-        }
-        mutex_destroy(&mp->m_icsb_mutex);
-}
-STATIC void
-xfs_icsb_lock_cntr(
-        xfs_icsb_cnts_t *icsbp)
-{
-        while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
-                ndelay(1000);
-        }
-}
-STATIC void
-xfs_icsb_unlock_cntr(
-        xfs_icsb_cnts_t *icsbp)
-{
-        clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
-}
-STATIC void
-xfs_icsb_lock_all_counters(
-        xfs_mount_t     *mp)
-{
-        xfs_icsb_cnts_t *cntp;
-        int             i;
-        for_each_online_cpu(i) {
-                cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-                xfs_icsb_lock_cntr(cntp);
-        }
-}
-STATIC void
-xfs_icsb_unlock_all_counters(
-        xfs_mount_t     *mp)
-{
-        xfs_icsb_cnts_t *cntp;
-        int             i;
-        for_each_online_cpu(i) {
-                cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-                xfs_icsb_unlock_cntr(cntp);
-        }
-}
-STATIC void
-xfs_icsb_count(
-        xfs_mount_t     *mp,
-        xfs_icsb_cnts_t *cnt,
-        int             flags)
-{
-        xfs_icsb_cnts_t *cntp;
-        int             i;
-        memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
-        if (!(flags & XFS_ICSB_LAZY_COUNT))
-                xfs_icsb_lock_all_counters(mp);
-        for_each_online_cpu(i) {
-                cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-                cnt->icsb_icount += cntp->icsb_icount;
-                cnt->icsb_ifree += cntp->icsb_ifree;
-                cnt->icsb_fdblocks += cntp->icsb_fdblocks;
-        }
-        if (!(flags & XFS_ICSB_LAZY_COUNT))
-                xfs_icsb_unlock_all_counters(mp);
-}
-STATIC int
-xfs_icsb_counter_disabled(
-        xfs_mount_t     *mp,
-        xfs_sb_field_t  field)
-{
-        ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-        return test_bit(field, &mp->m_icsb_counters);
-}
-STATIC void
-xfs_icsb_disable_counter(
-        xfs_mount_t     *mp,
-        xfs_sb_field_t  field)
-{
-        xfs_icsb_cnts_t cnt;
-        ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-        /*
-         * If we are already disabled, then there is nothing to do
-         * here. We check before locking all the counters to avoid
-         * the expensive lock operation when being called in the
-         * slow path and the counter is already disabled. This is
-         * safe because the only time we set or clear this state is under
-         * the m_icsb_mutex.
-         */
-        if (xfs_icsb_counter_disabled(mp, field))
-                return;
-        xfs_icsb_lock_all_counters(mp);
-        if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
-                /* drain back to superblock */
-                xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
-                switch(field) {
-                case XFS_SBS_ICOUNT:
-                        mp->m_sb.sb_icount = cnt.icsb_icount;
-                        break;
-                case XFS_SBS_IFREE:
-                        mp->m_sb.sb_ifree = cnt.icsb_ifree;
-                        break;
-                case XFS_SBS_FDBLOCKS:
-                        mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
-                        break;
-                default:
-                        BUG();
-                }
-        }
-        xfs_icsb_unlock_all_counters(mp);
-}
-STATIC void
-xfs_icsb_enable_counter(
-        xfs_mount_t     *mp,
-        xfs_sb_field_t  field,
-        uint64_t        count,
-        uint64_t        resid)
-{
-        xfs_icsb_cnts_t *cntp;
-        int             i;
-        ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
-        xfs_icsb_lock_all_counters(mp);
-        for_each_online_cpu(i) {
-                cntp = per_cpu_ptr(mp->m_sb_cnts, i);
-                switch (field) {
-                case XFS_SBS_ICOUNT:
-                        cntp->icsb_icount = count + resid;
-                        break;
-                case XFS_SBS_IFREE:
-                        cntp->icsb_ifree = count + resid;
-                        break;
-                case XFS_SBS_FDBLOCKS:
-                        cntp->icsb_fdblocks = count + resid;
-                        break;
-                default:
-                        BUG();
-                        break;
-                }
-                resid = 0;
-        }
-        clear_bit(field, &mp->m_icsb_counters);
-        xfs_icsb_unlock_all_counters(mp);
-}
-void
-xfs_icsb_sync_counters_locked(
-        xfs_mount_t     *mp,
-        int             flags)
-{
-        xfs_icsb_cnts_t cnt;
-        xfs_icsb_count(mp, &cnt, flags);
-        if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
-                mp->m_sb.sb_icount = cnt.icsb_icount;
-        if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
-                mp->m_sb.sb_ifree = cnt.icsb_ifree;
-        if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
-                mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
-}
-/*
- * Accurate update of per-cpu counters to incore superblock
- */
-void
-xfs_icsb_sync_counters(
-        xfs_mount_t     *mp,
-        int             flags)
-{
-        spin_lock(&mp->m_sb_lock);
-        xfs_icsb_sync_counters_locked(mp, flags);
-        spin_unlock(&mp->m_sb_lock);
-}
-/*
- * Balance and enable/disable counters as necessary.
- *
- * Thresholds for re-enabling counters are somewhat magic.  inode counts are
- * chosen to be the same number as single on disk allocation chunk per CPU, and
- * free blocks is something far enough zero that we aren't going thrash when we
- * get near ENOSPC. We also need to supply a minimum we require per cpu to
- * prevent looping endlessly when xfs_alloc_space asks for more than will
- * be distributed to a single CPU but each CPU has enough blocks to be
- * reenabled.
- *
- * Note that we can be called when counters are already disabled.
- * xfs_icsb_disable_counter() optimises the counter locking in this case to
- * prevent locking every per-cpu counter needlessly.
- */
-#define XFS_ICSB_INO_CNTR_REENABLE      (uint64_t)64
-#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
-                (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
-STATIC void
-xfs_icsb_balance_counter_locked(
-        xfs_mount_t     *mp,
-        xfs_sb_field_t  field,
-        int             min_per_cpu)
-{
-        uint64_t        count, resid;
-        int             weight = num_online_cpus();
-        uint64_t        min = (uint64_t)min_per_cpu;
-        /* disable counter and sync counter */
-        xfs_icsb_disable_counter(mp, field);
-        /* update counters  - first CPU gets residual*/
-        switch (field) {
-        case XFS_SBS_ICOUNT:
-                count = mp->m_sb.sb_icount;
-                resid = do_div(count, weight);
-                if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
-                        return;
-                break;
-        case XFS_SBS_IFREE:
-                count = mp->m_sb.sb_ifree;
-                resid = do_div(count, weight);
-                if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
-                        return;
-                break;
-        case XFS_SBS_FDBLOCKS:
-                count = mp->m_sb.sb_fdblocks;
-                resid = do_div(count, weight);
-                if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
-                        return;
-                break;
-        default:
-                BUG();
-                count = resid = 0;      /* quiet, gcc */
-                break;
-        }
-        xfs_icsb_enable_counter(mp, field, count, resid);
-}
-STATIC void
-xfs_icsb_balance_counter(
-        xfs_mount_t     *mp,
-        xfs_sb_field_t  fields,
-        int             min_per_cpu)
-{
-        spin_lock(&mp->m_sb_lock);
-        xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
-        spin_unlock(&mp->m_sb_lock);
-}
-int
-xfs_icsb_modify_counters(
-        xfs_mount_t     *mp,
-        xfs_sb_field_t  field,
-        int64_t         delta,
-        int             rsvd)
-{
-        xfs_icsb_cnts_t *icsbp;
-        long long       lcounter;       /* long counter for 64 bit fields */
-        int             ret = 0;
-        might_sleep();
-again:
-        preempt_disable();
-        icsbp = this_cpu_ptr(mp->m_sb_cnts);
-        /*
-         * if the counter is disabled, go to slow path
-         */
-        if (unlikely(xfs_icsb_counter_disabled(mp, field)))
-                goto slow_path;
-        xfs_icsb_lock_cntr(icsbp);
-        if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
-                xfs_icsb_unlock_cntr(icsbp);
-                goto slow_path;
-        }
-        switch (field) {
-        case XFS_SBS_ICOUNT:
-                lcounter = icsbp->icsb_icount;
-                lcounter += delta;
-                if (unlikely(lcounter < 0))
-                        goto balance_counter;
-                icsbp->icsb_icount = lcounter;
-                break;
-        case XFS_SBS_IFREE:
-                lcounter = icsbp->icsb_ifree;
-                lcounter += delta;
-                if (unlikely(lcounter < 0))
-                        goto balance_counter;
-                icsbp->icsb_ifree = lcounter;
-                break;
-        case XFS_SBS_FDBLOCKS:
-                BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
-                lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
-                lcounter += delta;
-                if (unlikely(lcounter < 0))
-                        goto balance_counter;
-                icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
-                break;
-        default:
-                BUG();
-                break;
-        }
-        xfs_icsb_unlock_cntr(icsbp);
-        preempt_enable();
-        return 0;
-slow_path:
-        preempt_enable();
-        /*
-         * serialise with a mutex so we don't burn lots of cpu on
-         * the superblock lock. We still need to hold the superblock
-         * lock, however, when we modify the global structures.
-         */
-        xfs_icsb_lock(mp);
-        /*
-         * Now running atomically.
-         *
-         * If the counter is enabled, someone has beaten us to rebalancing.
-         * Drop the lock and try again in the fast path....
-         */
-        if (!(xfs_icsb_counter_disabled(mp, field))) {
-                xfs_icsb_unlock(mp);
-                goto again;
-        }
-        /*
-         * The counter is currently disabled. Because we are
-         * running atomically here, we know a rebalance cannot
-         * be in progress. Hence we can go straight to operating
-         * on the global superblock. We do not call xfs_mod_incore_sb()
-         * here even though we need to get the m_sb_lock. Doing so
-         * will cause us to re-enter this function and deadlock.
-         * Hence we get the m_sb_lock ourselves and then call
-         * xfs_mod_incore_sb_unlocked() as the unlocked path operates
-         * directly on the global counters.
-         */
-        spin_lock(&mp->m_sb_lock);
-        ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-        spin_unlock(&mp->m_sb_lock);
-        /*
-         * Now that we've modified the global superblock, we
-         * may be able to re-enable the distributed counters
-         * (e.g. lots of space just got freed). After that
-         * we are done.
-         */
-        if (ret != -ENOSPC)
-                xfs_icsb_balance_counter(mp, field, 0);
-        xfs_icsb_unlock(mp);
-        return ret;
-balance_counter:
-        xfs_icsb_unlock_cntr(icsbp);
-        preempt_enable();
-        /*
-         * We may have multiple threads here if multiple per-cpu
-         * counters run dry at the same time. This will mean we can
-         * do more balances than strictly necessary but it is not
-         * the common slowpath case.
-         */
-        xfs_icsb_lock(mp);
-        /*
-         * running atomically.
-         *
-         * This will leave the counter in the correct state for future
-         * accesses. After the rebalance, we simply try again and our retry
-         * will either succeed through the fast path or slow path without
-         * another balance operation being required.
-         */
-        xfs_icsb_balance_counter(mp, field, delta);
-        xfs_icsb_unlock(mp);
-        goto again;
-}
-#endif
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 0d8abd6364d9..8c995a2ccb6f 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -18,8 +18,6 @@
 #ifndef __XFS_MOUNT_H__
 #define __XFS_MOUNT_H__
-#ifdef __KERNEL__
 struct xlog;
 struct xfs_inode;
 struct xfs_mru_cache;
@@ -29,44 +27,6 @@ struct xfs_quotainfo;
 struct xfs_dir_ops;
 struct xfs_da_geometry;
-#ifdef HAVE_PERCPU_SB
-/*
- * Valid per-cpu incore superblock counters. Note that if you add new counters,
- * you may need to define new counter disabled bit field descriptors as there
- * are more possible fields in the superblock that can fit in a bitfield on a
- * 32 bit platform. The XFS_SBS_* values for the current current counters just
- * fit.
- */
-typedef struct xfs_icsb_cnts {
-        uint64_t        icsb_fdblocks;
-        uint64_t        icsb_ifree;
-        uint64_t        icsb_icount;
-        unsigned long   icsb_flags;
-} xfs_icsb_cnts_t;
-#define XFS_ICSB_FLAG_LOCK      (1 << 0)        /* counter lock bit */
-#define XFS_ICSB_LAZY_COUNT     (1 << 1)        /* accuracy not needed */
-extern int      xfs_icsb_init_counters(struct xfs_mount *);
-extern void     xfs_icsb_reinit_counters(struct xfs_mount *);
-extern void     xfs_icsb_destroy_counters(struct xfs_mount *);
-extern void     xfs_icsb_sync_counters(struct xfs_mount *, int);
-extern void     xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
-extern int      xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t,
-                                                int64_t, int);
-#else
-#define xfs_icsb_init_counters(mp)              (0)
-#define xfs_icsb_destroy_counters(mp)           do { } while (0)
-#define xfs_icsb_reinit_counters(mp)            do { } while (0)
-#define xfs_icsb_sync_counters(mp, flags)       do { } while (0)
-#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
-#define xfs_icsb_modify_counters(mp, field, delta, rsvd) \
-        xfs_mod_incore_sb(mp, field, delta, rsvd)
-#endif
 /* dynamic preallocation free space thresholds, 5% down to 1% */
 enum {
        XFS_LOWSP_1_PCNT = 0,
@@ -81,8 +41,13 @@ typedef struct xfs_mount {
        struct super_block      *m_super;
        xfs_tid_t               m_tid;          /* next unused tid for fs */
        struct xfs_ail          *m_ail;         /* fs active log item list */
-        xfs_sb_t                m_sb;           /* copy of fs superblock */
+        struct xfs_sb           m_sb;           /* copy of fs superblock */
        spinlock_t              m_sb_lock;      /* sb counter lock */
+        struct percpu_counter   m_icount;       /* allocated inodes counter */
+        struct percpu_counter   m_ifree;        /* free inodes counter */
+        struct percpu_counter   m_fdblocks;     /* free block counter */
        struct xfs_buf          *m_sb_bp;       /* buffer for superblock */
        char                    *m_fsname;      /* filesystem name */
        int                     m_fsname_len;   /* strlen of fs name */
@@ -152,12 +117,6 @@ typedef struct xfs_mount {
        const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
        uint                    m_chsize;       /* size of next field */
        atomic_t                m_active_trans; /* number trans frozen */
-#ifdef HAVE_PERCPU_SB
-        xfs_icsb_cnts_t __percpu *m_sb_cnts;    /* per-cpu superblock counters */
-        unsigned long           m_icsb_counters; /* disabled per-cpu counters */
-        struct notifier_block   m_icsb_notifier; /* hotplug cpu notifier */
-        struct mutex            m_icsb_mutex;   /* balancer sync lock */
-#endif
        struct xfs_mru_cache    *m_filestream;  /* per-mount filestream data */
        struct delayed_work     m_reclaim_work; /* background inode reclaim */
        struct delayed_work     m_eofblocks_work; /* background eof blocks
@@ -301,35 +260,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
 }
 /*
- * Per-cpu superblock locking functions
- */
-#ifdef HAVE_PERCPU_SB
-static inline void
-xfs_icsb_lock(xfs_mount_t *mp)
-{
-        mutex_lock(&mp->m_icsb_mutex);
-}
-static inline void
-xfs_icsb_unlock(xfs_mount_t *mp)
-{
-        mutex_unlock(&mp->m_icsb_mutex);
-}
-#else
-#define xfs_icsb_lock(mp)
-#define xfs_icsb_unlock(mp)
-#endif
-/*
- * This structure is for use by the xfs_mod_incore_sb_batch() routine.
- * xfs_growfs can specify a few fields which are more than int limit
- */
-typedef struct xfs_mod_sb {
-        xfs_sb_field_t  msb_field;      /* Field to modify, see below */
-        int64_t         msb_delta;      /* Change to make to specified field */
-} xfs_mod_sb_t;
-/*
 * Per-ag incore structure, copies of information in agf and agi, to improve the
 * performance of allocation group selection.
 */
@@ -383,11 +313,14 @@ extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
 extern int      xfs_mountfs(xfs_mount_t *mp);
 extern int      xfs_initialize_perag(xfs_mount_t *mp, xfs_agnumber_t agcount,
                                     xfs_agnumber_t *maxagi);
 extern void     xfs_unmountfs(xfs_mount_t *);
-extern int      xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int);
-extern int      xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
+extern int      xfs_mod_icount(struct xfs_mount *mp, int64_t delta);
-                        uint, int);
+extern int      xfs_mod_ifree(struct xfs_mount *mp, int64_t delta);
+extern int      xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
+                                 bool reserved);
+extern int      xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
 extern int      xfs_mount_log_sb(xfs_mount_t *);
 extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
 extern int      xfs_readsb(xfs_mount_t *, int);
@@ -399,6 +332,4 @@ extern int	xfs_dev_is_read_only(struct xfs_mount *, char *);
 extern void     xfs_set_low_space_thresholds(struct xfs_mount *);
-#endif  /* __KERNEL__ */
 #endif  /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 30ecca3037e3..f8a674d7f092 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -437,7 +437,7 @@ xfs_mru_cache_insert(
        if (!mru || !mru->lists)
                return -EINVAL;
-        if (radix_tree_preload(GFP_KERNEL))
+        if (radix_tree_preload(GFP_NOFS))
                return -ENOMEM;
        INIT_LIST_HEAD(&elem->list_node);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index cbb424f4d93a..981a657eca39 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -305,8 +305,10 @@ xfs_fs_commit_blocks(
        tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
        error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
-        if (error)
+        if (error) {
+                xfs_trans_cancel(tp, 0);
                goto out_drop_iolock;
+        }
        xfs_ilock(ip, XFS_ILOCK_EXCL);
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index c6b22e1e77ed..5538468c7f63 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -841,6 +841,11 @@ xfs_qm_reset_dqcounts(
                 */
                xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
                            "xfs_quotacheck");
+                /*
+                 * Reset type in case we are reusing group quota file for
+                 * project quotas or vice versa
+                 */
+                ddq->d_flags = type;
                ddq->d_bcount = 0;
                ddq->d_icount = 0;
                ddq->d_rtbcount = 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 02718638dc12..5f357ca97e76 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -966,6 +966,8 @@ xfs_fs_inode_init_once(
        atomic_set(&ip->i_pincount, 0);
        spin_lock_init(&ip->i_flags_lock);
+        mrlock_init(&ip->i_mmaplock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
+                     "xfsino", ip->i_ino);
        mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
                     "xfsino", ip->i_ino);
 }
@@ -1013,24 +1015,6 @@ xfs_free_fsname(
        kfree(mp->m_logname);
 }
-STATIC void
-xfs_fs_put_super(
-        struct super_block      *sb)
-{
-        struct xfs_mount        *mp = XFS_M(sb);
-        xfs_notice(mp, "Unmounting Filesystem");
-        xfs_filestream_unmount(mp);
-        xfs_unmountfs(mp);
-        xfs_freesb(mp);
-        xfs_icsb_destroy_counters(mp);
-        xfs_destroy_mount_workqueues(mp);
-        xfs_close_devices(mp);
-        xfs_free_fsname(mp);
-        kfree(mp);
-}
 STATIC int
 xfs_fs_sync_fs(
        struct super_block      *sb,
@@ -1066,6 +1050,9 @@ xfs_fs_statfs(
        xfs_sb_t                *sbp = &mp->m_sb;
        struct xfs_inode        *ip = XFS_I(dentry->d_inode);
        __uint64_t              fakeinos, id;
+        __uint64_t              icount;
+        __uint64_t              ifree;
+        __uint64_t              fdblocks;
        xfs_extlen_t            lsize;
        __int64_t               ffree;
@@ -1076,17 +1063,21 @@ xfs_fs_statfs(
        statp->f_fsid.val[0] = (u32)id;
        statp->f_fsid.val[1] = (u32)(id >> 32);
-        xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
+        icount = percpu_counter_sum(&mp->m_icount);
+        ifree = percpu_counter_sum(&mp->m_ifree);
+        fdblocks = percpu_counter_sum(&mp->m_fdblocks);
        spin_lock(&mp->m_sb_lock);
        statp->f_bsize = sbp->sb_blocksize;
        lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
        statp->f_blocks = sbp->sb_dblocks - lsize;
-        statp->f_bfree = statp->f_bavail =
+        spin_unlock(&mp->m_sb_lock);
-                                sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+        statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp);
+        statp->f_bavail = statp->f_bfree;
        fakeinos = statp->f_bfree << sbp->sb_inopblog;
-        statp->f_files =
+        statp->f_files = MIN(icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
-            MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
        if (mp->m_maxicount)
                statp->f_files = min_t(typeof(statp->f_files),
                                        statp->f_files,
@@ -1098,10 +1089,9 @@ xfs_fs_statfs(
                                        sbp->sb_icount);
        /* make sure statp->f_ffree does not underflow */
-        ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+        ffree = statp->f_files - (icount - ifree);
        statp->f_ffree = max_t(__int64_t, ffree, 0);
-        spin_unlock(&mp->m_sb_lock);
        if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
            ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
@@ -1388,6 +1378,51 @@ xfs_finish_flags(
        return 0;
 }
+static int
+xfs_init_percpu_counters(
+        struct xfs_mount        *mp)
+{
+        int             error;
+        error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
+        if (error)
+                return -ENOMEM;
+        error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
+        if (error)
+                goto free_icount;
+        error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
+        if (error)
+                goto free_ifree;
+        return 0;
+free_ifree:
+        percpu_counter_destroy(&mp->m_ifree);
+free_icount:
+        percpu_counter_destroy(&mp->m_icount);
+        return -ENOMEM;
+}
+void
+xfs_reinit_percpu_counters(
+        struct xfs_mount        *mp)
+{
+        percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
+        percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
+        percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
+}
+static void
+xfs_destroy_percpu_counters(
+        struct xfs_mount        *mp)
+{
+        percpu_counter_destroy(&mp->m_icount);
+        percpu_counter_destroy(&mp->m_ifree);
+        percpu_counter_destroy(&mp->m_fdblocks);
+}
 STATIC int
 xfs_fs_fill_super(
        struct super_block      *sb,
@@ -1436,7 +1471,7 @@ xfs_fs_fill_super(
        if (error)
                goto out_close_devices;
-        error = xfs_icsb_init_counters(mp);
+        error = xfs_init_percpu_counters(mp);
        if (error)
                goto out_destroy_workqueues;
@@ -1494,7 +1529,7 @@ xfs_fs_fill_super(
 out_free_sb:
        xfs_freesb(mp);
 out_destroy_counters:
-        xfs_icsb_destroy_counters(mp);
+        xfs_destroy_percpu_counters(mp);
 out_destroy_workqueues:
        xfs_destroy_mount_workqueues(mp);
 out_close_devices:
@@ -1511,6 +1546,24 @@ out_destroy_workqueues:
        goto out_free_sb;
 }
+STATIC void
+xfs_fs_put_super(
+        struct super_block      *sb)
+{
+        struct xfs_mount        *mp = XFS_M(sb);
+        xfs_notice(mp, "Unmounting Filesystem");
+        xfs_filestream_unmount(mp);
+        xfs_unmountfs(mp);
+        xfs_freesb(mp);
+        xfs_destroy_percpu_counters(mp);
+        xfs_destroy_mount_workqueues(mp);
+        xfs_close_devices(mp);
+        xfs_free_fsname(mp);
+        kfree(mp);
+}
 STATIC struct dentry *
 xfs_fs_mount(
        struct file_system_type *fs_type,
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 2b830c2f322e..499058fea303 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -72,6 +72,8 @@ extern const struct export_operations xfs_export_operations;
 extern const struct xattr_handler *xfs_xattr_handlers[];
 extern const struct quotactl_ops xfs_quotactl_operations;
+extern void xfs_reinit_percpu_counters(struct xfs_mount *mp);
 #define XFS_M(sb)               ((struct xfs_mount *)((sb)->s_fs_info))
 #endif  /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 7e45fa155ea8..b2a45cc9eceb 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -115,7 +115,7 @@ DECLARE_EVENT_CLASS(xfs_perag_class,
                __entry->refcount = refcount;
                __entry->caller_ip = caller_ip;
        ),
-        TP_printk("dev %d:%d agno %u refcount %d caller %pf",
+        TP_printk("dev %d:%d agno %u refcount %d caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->agno,
                  __entry->refcount,
@@ -239,7 +239,7 @@ TRACE_EVENT(xfs_iext_insert,
                __entry->caller_ip = caller_ip;
        ),
        TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-                  "offset %lld block %lld count %lld flag %d caller %pf",
+                  "offset %lld block %lld count %lld flag %d caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -283,7 +283,7 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
                __entry->caller_ip = caller_ip;
        ),
        TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
-                  "offset %lld block %lld count %lld flag %d caller %pf",
+                  "offset %lld block %lld count %lld flag %d caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
@@ -329,7 +329,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
                __entry->caller_ip = caller_ip;
        ),
        TP_printk("dev %d:%d bno 0x%llx nblks 0x%x hold %d pincount %d "
-                  "lock %d flags %s caller %pf",
+                  "lock %d flags %s caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->bno,
                  __entry->nblks,
@@ -402,7 +402,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
                __entry->caller_ip = caller_ip;
        ),
        TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                  "lock %d flags %s caller %pf",
+                  "lock %d flags %s caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->bno,
                  __entry->buffer_length,
@@ -447,7 +447,7 @@ TRACE_EVENT(xfs_buf_ioerror,
                __entry->caller_ip = caller_ip;
        ),
        TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
-                  "lock %d error %d flags %s caller %pf",
+                  "lock %d error %d flags %s caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->bno,
                  __entry->buffer_length,
@@ -613,7 +613,7 @@ DECLARE_EVENT_CLASS(xfs_lock_class,
                __entry->lock_flags = lock_flags;
                __entry->caller_ip = caller_ip;
        ),
-        TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
+        TP_printk("dev %d:%d ino 0x%llx flags %s caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
@@ -686,6 +686,9 @@ DEFINE_INODE_EVENT(xfs_inode_set_eofblocks_tag);
 DEFINE_INODE_EVENT(xfs_inode_clear_eofblocks_tag);
 DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
+DEFINE_INODE_EVENT(xfs_filemap_fault);
+DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
 DECLARE_EVENT_CLASS(xfs_iref_class,
        TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
        TP_ARGS(ip, caller_ip),
@@ -703,7 +706,7 @@ DECLARE_EVENT_CLASS(xfs_iref_class,
                __entry->pincount = atomic_read(&ip->i_pincount);
                __entry->caller_ip = caller_ip;
        ),
-        TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
+        TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->count,
@@ -1334,7 +1337,7 @@ TRACE_EVENT(xfs_bunmap,
                __entry->flags = flags;
        ),
        TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
-                  "flags %s caller %pf",
+                  "flags %s caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->ino,
                  __entry->size,
@@ -1467,7 +1470,7 @@ TRACE_EVENT(xfs_agf,
        ),
        TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
                  "levels b %u c %u flfirst %u fllast %u flcount %u "
-                  "freeblks %u longest %u caller %pf",
+                  "freeblks %u longest %u caller %ps",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  __entry->agno,
                  __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index eb90cd59a0ec..220ef2c906b2 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -173,7 +173,7 @@ xfs_trans_reserve(
        uint                    rtextents)
 {
        int             error = 0;
-        int             rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
+        bool            rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
        /* Mark this thread as being in a transaction */
        current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
@@ -184,8 +184,7 @@ xfs_trans_reserve(
         * fail if the count would go below zero.
         */
        if (blocks > 0) {
-                error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
+                error = xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
-                                          -((int64_t)blocks), rsvd);
                if (error != 0) {
                        current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
                        return -ENOSPC;
@@ -236,8 +235,7 @@ xfs_trans_reserve(
         * fail if the count would go below zero.
         */
        if (rtextents > 0) {
-                error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS,
+                error = xfs_mod_frextents(tp->t_mountp, -((int64_t)rtextents));
-                                          -((int64_t)rtextents), rsvd);
                if (error) {
                        error = -ENOSPC;
                        goto undo_log;
@@ -268,8 +266,7 @@ undo_log:
 undo_blocks:
        if (blocks > 0) {
-                xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS,
+                xfs_mod_fdblocks(tp->t_mountp, -((int64_t)blocks), rsvd);
-                                         (int64_t)blocks, rsvd);
                tp->t_blk_res = 0;
        }
@@ -488,6 +485,54 @@ xfs_trans_apply_sb_deltas(
                                  sizeof(sbp->sb_frextents) - 1);
 }
+STATIC int
+xfs_sb_mod8(
+        uint8_t                 *field,
+        int8_t                  delta)
+{
+        int8_t                  counter = *field;
+        counter += delta;
+        if (counter < 0) {
+                ASSERT(0);
+                return -EINVAL;
+        }
+        *field = counter;
+        return 0;
+}
+STATIC int
+xfs_sb_mod32(
+        uint32_t                *field,
+        int32_t                 delta)
+{
+        int32_t                 counter = *field;
+        counter += delta;
+        if (counter < 0) {
+                ASSERT(0);
+                return -EINVAL;
+        }
+        *field = counter;
+        return 0;
+}
+STATIC int
+xfs_sb_mod64(
+        uint64_t                *field,
+        int64_t                 delta)
+{
+        int64_t                 counter = *field;
+        counter += delta;
+        if (counter < 0) {
+                ASSERT(0);
+                return -EINVAL;
+        }
+        *field = counter;
+        return 0;
+}
 /*
 * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations
 * and apply superblock counter changes to the in-core superblock.  The
@@ -495,13 +540,6 @@ xfs_trans_apply_sb_deltas(
 * applied to the in-core superblock.  The idea is that that has already been
 * done.
 *
- * This is done efficiently with a single call to xfs_mod_incore_sb_batch().
- * However, we have to ensure that we only modify each superblock field only
- * once because the application of the delta values may not be atomic. That can
- * lead to ENOSPC races occurring if we have two separate modifcations of the
- * free space counter to put back the entire reservation and then take away
- * what we used.
- *
 * If we are not logging superblock counters, then the inode allocated/free and
 * used block counts are not updated in the on disk superblock. In this case,
 * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
@@ -509,21 +547,15 @@ xfs_trans_apply_sb_deltas(
 */
 void
 xfs_trans_unreserve_and_mod_sb(
-        xfs_trans_t     *tp)
+        struct xfs_trans        *tp)
 {
-        xfs_mod_sb_t    msb[9]; /* If you add cases, add entries */
+        struct xfs_mount        *mp = tp->t_mountp;
-        xfs_mod_sb_t    *msbp;
+        bool                    rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
-        xfs_mount_t     *mp = tp->t_mountp;
+        int64_t                 blkdelta = 0;
-        /* REFERENCED */
+        int64_t                 rtxdelta = 0;
-        int             error;
+        int64_t                 idelta = 0;
-        int             rsvd;
+        int64_t                 ifreedelta = 0;
-        int64_t         blkdelta = 0;
+        int                     error;
-        int64_t         rtxdelta = 0;
-        int64_t         idelta = 0;
-        int64_t         ifreedelta = 0;
-        msbp = msb;
-        rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
        /* calculate deltas */
        if (tp->t_blk_res > 0)
@@ -547,97 +579,115 @@ xfs_trans_unreserve_and_mod_sb(
        /* apply the per-cpu counters */
        if (blkdelta) {
-                error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+                error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
-                                                 blkdelta, rsvd);
                if (error)
                        goto out;
        }
        if (idelta) {
-                error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT,
+                error = xfs_mod_icount(mp, idelta);
-                                                 idelta, rsvd);
                if (error)
                        goto out_undo_fdblocks;
        }
        if (ifreedelta) {
-                error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE,
+                error = xfs_mod_ifree(mp, ifreedelta);
-                                                 ifreedelta, rsvd);
                if (error)
                        goto out_undo_icount;
        }
+        if (rtxdelta == 0 && !(tp->t_flags & XFS_TRANS_SB_DIRTY))
+                return;
        /* apply remaining deltas */
-        if (rtxdelta != 0) {
+        spin_lock(&mp->m_sb_lock);
-                msbp->msb_field = XFS_SBS_FREXTENTS;
+        if (rtxdelta) {
-                msbp->msb_delta = rtxdelta;
+                error = xfs_sb_mod64(&mp->m_sb.sb_frextents, rtxdelta);
-                msbp++;
+                if (error)
+                        goto out_undo_ifree;
        }
-        if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
+        if (tp->t_dblocks_delta != 0) {
-                if (tp->t_dblocks_delta != 0) {
+                error = xfs_sb_mod64(&mp->m_sb.sb_dblocks, tp->t_dblocks_delta);
-                        msbp->msb_field = XFS_SBS_DBLOCKS;
+                if (error)
-                        msbp->msb_delta = tp->t_dblocks_delta;
+                        goto out_undo_frextents;
-                        msbp++;
-                }
-                if (tp->t_agcount_delta != 0) {
-                        msbp->msb_field = XFS_SBS_AGCOUNT;
-                        msbp->msb_delta = tp->t_agcount_delta;
-                        msbp++;
-                }
-                if (tp->t_imaxpct_delta != 0) {
-                        msbp->msb_field = XFS_SBS_IMAX_PCT;
-                        msbp->msb_delta = tp->t_imaxpct_delta;
-                        msbp++;
-                }
-                if (tp->t_rextsize_delta != 0) {
-                        msbp->msb_field = XFS_SBS_REXTSIZE;
-                        msbp->msb_delta = tp->t_rextsize_delta;
-                        msbp++;
-                }
-                if (tp->t_rbmblocks_delta != 0) {
-                        msbp->msb_field = XFS_SBS_RBMBLOCKS;
-                        msbp->msb_delta = tp->t_rbmblocks_delta;
-                        msbp++;
-                }
-                if (tp->t_rblocks_delta != 0) {
-                        msbp->msb_field = XFS_SBS_RBLOCKS;
-                        msbp->msb_delta = tp->t_rblocks_delta;
-                        msbp++;
-                }
-                if (tp->t_rextents_delta != 0) {
-                        msbp->msb_field = XFS_SBS_REXTENTS;
-                        msbp->msb_delta = tp->t_rextents_delta;
-                        msbp++;
-                }
-                if (tp->t_rextslog_delta != 0) {
-                        msbp->msb_field = XFS_SBS_REXTSLOG;
-                        msbp->msb_delta = tp->t_rextslog_delta;
-                        msbp++;
-                }
        }
+        if (tp->t_agcount_delta != 0) {
-        /*
+                error = xfs_sb_mod32(&mp->m_sb.sb_agcount, tp->t_agcount_delta);
-         * If we need to change anything, do it.
-         */
-        if (msbp > msb) {
-                error = xfs_mod_incore_sb_batch(tp->t_mountp, msb,
-                        (uint)(msbp - msb), rsvd);
                if (error)
-                        goto out_undo_ifreecount;
+                        goto out_undo_dblocks;
        }
+        if (tp->t_imaxpct_delta != 0) {
+                error = xfs_sb_mod8(&mp->m_sb.sb_imax_pct, tp->t_imaxpct_delta);
+                if (error)
+                        goto out_undo_agcount;
+        }
+        if (tp->t_rextsize_delta != 0) {
+                error = xfs_sb_mod32(&mp->m_sb.sb_rextsize,
+                                     tp->t_rextsize_delta);
+                if (error)
+                        goto out_undo_imaxpct;
+        }
+        if (tp->t_rbmblocks_delta != 0) {
+                error = xfs_sb_mod32(&mp->m_sb.sb_rbmblocks,
+                                     tp->t_rbmblocks_delta);
+                if (error)
+                        goto out_undo_rextsize;
+        }
+        if (tp->t_rblocks_delta != 0) {
+                error = xfs_sb_mod64(&mp->m_sb.sb_rblocks, tp->t_rblocks_delta);
+                if (error)
+                        goto out_undo_rbmblocks;
+        }
+        if (tp->t_rextents_delta != 0) {
+                error = xfs_sb_mod64(&mp->m_sb.sb_rextents,
+                                     tp->t_rextents_delta);
+                if (error)
+                        goto out_undo_rblocks;
+        }
+        if (tp->t_rextslog_delta != 0) {
+                error = xfs_sb_mod8(&mp->m_sb.sb_rextslog,
+                                     tp->t_rextslog_delta);
+                if (error)
+                        goto out_undo_rextents;
+        }
+        spin_unlock(&mp->m_sb_lock);
        return;
-out_undo_ifreecount:
+out_undo_rextents:
+        if (tp->t_rextents_delta)
+                xfs_sb_mod64(&mp->m_sb.sb_rextents, -tp->t_rextents_delta);
+out_undo_rblocks:
+        if (tp->t_rblocks_delta)
+                xfs_sb_mod64(&mp->m_sb.sb_rblocks, -tp->t_rblocks_delta);
+out_undo_rbmblocks:
+        if (tp->t_rbmblocks_delta)
+                xfs_sb_mod32(&mp->m_sb.sb_rbmblocks, -tp->t_rbmblocks_delta);
+out_undo_rextsize:
+        if (tp->t_rextsize_delta)
+                xfs_sb_mod32(&mp->m_sb.sb_rextsize, -tp->t_rextsize_delta);
+out_undo_imaxpct:
+        if (tp->t_rextsize_delta)
+                xfs_sb_mod8(&mp->m_sb.sb_imax_pct, -tp->t_imaxpct_delta);
+out_undo_agcount:
+        if (tp->t_agcount_delta)
+                xfs_sb_mod32(&mp->m_sb.sb_agcount, -tp->t_agcount_delta);
+out_undo_dblocks:
+        if (tp->t_dblocks_delta)
+                xfs_sb_mod64(&mp->m_sb.sb_dblocks, -tp->t_dblocks_delta);
+out_undo_frextents:
+        if (rtxdelta)
+                xfs_sb_mod64(&mp->m_sb.sb_frextents, -rtxdelta);
+out_undo_ifree:
+        spin_unlock(&mp->m_sb_lock);
        if (ifreedelta)
-                xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd);
+                xfs_mod_ifree(mp, -ifreedelta);
 out_undo_icount:
        if (idelta)
-                xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd);
+                xfs_mod_icount(mp, -idelta);
 out_undo_fdblocks:
        if (blkdelta)
-                xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd);
+                xfs_mod_fdblocks(mp, -blkdelta, rsvd);
 out:
        ASSERT(error == 0);
        return;