summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-11 23:18:48 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-11 23:18:48 -0500
commit5d50ac70fe98518dbf620bfba8184254663125eb (patch)
treebc32329b72516ece58142444bbfd520d27a7f6ca /fs
parent31c1febd7a45229edb3e5d86f354e3c1df543cbb (diff)
parent4e14e49a91e18098fd8ef30743972e0c3cb727c1 (diff)
Merge tag 'xfs-for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull xfs updates from Dave Chinner: "There is nothing really major here - the only significant addition is the per-mount operation statistics infrastructure. Otherwises there's various ACL, xattr, DAX, AIO and logging fixes, and a smattering of small cleanups and fixes elsewhere. Summary: - per-mount operational statistics in sysfs - fixes for concurrent aio append write submission - various logging fixes - detection of zeroed logs and invalid log sequence numbers on v5 filesystems - memory allocation failure message improvements - a bunch of xattr/ACL fixes - fdatasync optimisation - miscellaneous other fixes and cleanups" * tag 'xfs-for-linus-4.4' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (39 commits) xfs: give all workqueues rescuer threads xfs: fix log recovery op header validation assert xfs: Fix error path in xfs_get_acl xfs: optimise away log forces on timestamp updates for fdatasync xfs: don't leak uuid table on rmmod xfs: invalidate cached acl if set via ioctl xfs: Plug memory leak in xfs_attrmulti_attr_set xfs: Validate the length of on-disk ACLs xfs: invalidate cached acl if set directly via xattr xfs: xfs_filemap_pmd_fault treats read faults as write faults xfs: add ->pfn_mkwrite support for DAX xfs: DAX does not use IO completion callbacks xfs: Don't use unwritten extents for DAX xfs: introduce BMAPI_ZERO for allocating zeroed extents xfs: fix inode size update overflow in xfs_map_direct() xfs: clear PF_NOFREEZE for xfsaild kthread xfs: fix an error code in xfs_fs_fill_super() xfs: stats are no longer dependent on CONFIG_PROC_FS xfs: simplify /proc teardown & error handling xfs: per-filesystem stats counter implementation ...
Diffstat (limited to 'fs')
-rw-r--r--fs/dax.c5
-rw-r--r--fs/xfs/Makefile2
-rw-r--r--fs/xfs/kmem.c10
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c30
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h8
-rw-r--r--fs/xfs/libxfs/xfs_attr.c6
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c3
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c65
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h13
-rw-r--r--fs/xfs/libxfs/xfs_btree.c21
-rw-r--r--fs/xfs/libxfs/xfs_btree.h39
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c4
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c6
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c3
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c3
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c3
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c3
-rw-r--r--fs/xfs/libxfs/xfs_format.h18
-rw-r--r--fs/xfs/libxfs/xfs_fs.h10
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c10
-rw-r--r--fs/xfs/libxfs/xfs_sb.c10
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c7
-rw-r--r--fs/xfs/xfs_acl.c14
-rw-r--r--fs/xfs/xfs_acl.h4
-rw-r--r--fs/xfs/xfs_aops.c119
-rw-r--r--fs/xfs/xfs_aops.h3
-rw-r--r--fs/xfs/xfs_attr_list.c2
-rw-r--r--fs/xfs/xfs_bmap_util.c38
-rw-r--r--fs/xfs/xfs_buf.c21
-rw-r--r--fs/xfs/xfs_dir2_readdir.c2
-rw-r--r--fs/xfs/xfs_dquot.c14
-rw-r--r--fs/xfs/xfs_file.c114
-rw-r--r--fs/xfs/xfs_icache.c18
-rw-r--r--fs/xfs/xfs_inode.c8
-rw-r--r--fs/xfs/xfs_inode_item.c1
-rw-r--r--fs/xfs/xfs_inode_item.h1
-rw-r--r--fs/xfs/xfs_ioctl.c23
-rw-r--r--fs/xfs/xfs_ioctl32.c2
-rw-r--r--fs/xfs/xfs_iomap.c70
-rw-r--r--fs/xfs/xfs_iops.c4
-rw-r--r--fs/xfs/xfs_linux.h7
-rw-r--r--fs/xfs/xfs_log.c93
-rw-r--r--fs/xfs/xfs_log.h1
-rw-r--r--fs/xfs/xfs_log_priv.h51
-rw-r--r--fs/xfs/xfs_log_recover.c14
-rw-r--r--fs/xfs/xfs_message.c7
-rw-r--r--fs/xfs/xfs_mount.c21
-rw-r--r--fs/xfs/xfs_mount.h5
-rw-r--r--fs/xfs/xfs_pnfs.c5
-rw-r--r--fs/xfs/xfs_qm.c14
-rw-r--r--fs/xfs/xfs_stats.c93
-rw-r--r--fs/xfs/xfs_stats.h36
-rw-r--r--fs/xfs/xfs_super.c57
-rw-r--r--fs/xfs/xfs_sysctl.c15
-rw-r--r--fs/xfs/xfs_sysfs.c185
-rw-r--r--fs/xfs/xfs_sysfs.h1
-rw-r--r--fs/xfs/xfs_trace.h2
-rw-r--r--fs/xfs/xfs_trans.c6
-rw-r--r--fs/xfs/xfs_trans_ail.c13
-rw-r--r--fs/xfs/xfs_trans_inode.c9
-rw-r--r--fs/xfs/xfs_xattr.c31
62 files changed, 994 insertions, 411 deletions
diff --git a/fs/dax.c b/fs/dax.c
index a86d3cc2b389..131fd35ae39d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -29,6 +29,11 @@
29#include <linux/uio.h> 29#include <linux/uio.h>
30#include <linux/vmstat.h> 30#include <linux/vmstat.h>
31 31
32/*
33 * dax_clear_blocks() is called from within transaction context from XFS,
34 * and hence this means the stack from this point must follow GFP_NOFS
35 * semantics for all operations.
36 */
32int dax_clear_blocks(struct inode *inode, sector_t block, long size) 37int dax_clear_blocks(struct inode *inode, sector_t block, long size)
33{ 38{
34 struct block_device *bdev = inode->i_sb->s_bdev; 39 struct block_device *bdev = inode->i_sb->s_bdev;
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index a096841bd06c..f64639176670 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -84,6 +84,7 @@ xfs-y += xfs_aops.o \
84 xfs_message.o \ 84 xfs_message.o \
85 xfs_mount.o \ 85 xfs_mount.o \
86 xfs_mru_cache.o \ 86 xfs_mru_cache.o \
87 xfs_stats.o \
87 xfs_super.o \ 88 xfs_super.o \
88 xfs_symlink.o \ 89 xfs_symlink.o \
89 xfs_sysfs.o \ 90 xfs_sysfs.o \
@@ -118,7 +119,6 @@ xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
118xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o 119xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
119 120
120xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o 121xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
121xfs-$(CONFIG_PROC_FS) += xfs_stats.o
122xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o 122xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
123xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o 123xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
124xfs-$(CONFIG_NFSD_PNFS) += xfs_pnfs.o 124xfs-$(CONFIG_NFSD_PNFS) += xfs_pnfs.o
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index a7a3a63bb360..686ba6fb20dd 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -55,8 +55,9 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
55 return ptr; 55 return ptr;
56 if (!(++retries % 100)) 56 if (!(++retries % 100))
57 xfs_err(NULL, 57 xfs_err(NULL,
58 "possible memory allocation deadlock in %s (mode:0x%x)", 58 "%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)",
59 __func__, lflags); 59 current->comm, current->pid,
60 (unsigned int)size, __func__, lflags);
60 congestion_wait(BLK_RW_ASYNC, HZ/50); 61 congestion_wait(BLK_RW_ASYNC, HZ/50);
61 } while (1); 62 } while (1);
62} 63}
@@ -120,8 +121,9 @@ kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags)
120 return ptr; 121 return ptr;
121 if (!(++retries % 100)) 122 if (!(++retries % 100))
122 xfs_err(NULL, 123 xfs_err(NULL,
123 "possible memory allocation deadlock in %s (mode:0x%x)", 124 "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
124 __func__, lflags); 125 current->comm, current->pid,
126 __func__, lflags);
125 congestion_wait(BLK_RW_ASYNC, HZ/50); 127 congestion_wait(BLK_RW_ASYNC, HZ/50);
126 } while (1); 128 } while (1);
127} 129}
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index ffad7f20342f..3479294c1d58 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -482,7 +482,9 @@ xfs_agfl_verify(
482 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) 482 be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks)
483 return false; 483 return false;
484 } 484 }
485 return true; 485
486 return xfs_log_check_lsn(mp,
487 be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn));
486} 488}
487 489
488static void 490static void
@@ -651,8 +653,8 @@ xfs_alloc_ag_vextent(
651 -((long)(args->len))); 653 -((long)(args->len)));
652 } 654 }
653 655
654 XFS_STATS_INC(xs_allocx); 656 XFS_STATS_INC(args->mp, xs_allocx);
655 XFS_STATS_ADD(xs_allocb, args->len); 657 XFS_STATS_ADD(args->mp, xs_allocb, args->len);
656 return error; 658 return error;
657} 659}
658 660
@@ -1808,8 +1810,8 @@ xfs_free_ag_extent(
1808 1810
1809 if (!isfl) 1811 if (!isfl)
1810 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); 1812 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1811 XFS_STATS_INC(xs_freex); 1813 XFS_STATS_INC(mp, xs_freex);
1812 XFS_STATS_ADD(xs_freeb, len); 1814 XFS_STATS_ADD(mp, xs_freeb, len);
1813 1815
1814 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); 1816 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
1815 1817
@@ -2259,9 +2261,13 @@ xfs_agf_verify(
2259 { 2261 {
2260 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); 2262 struct xfs_agf *agf = XFS_BUF_TO_AGF(bp);
2261 2263
2262 if (xfs_sb_version_hascrc(&mp->m_sb) && 2264 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2263 !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid)) 2265 if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
2264 return false; 2266 return false;
2267 if (!xfs_log_check_lsn(mp,
2268 be64_to_cpu(XFS_BUF_TO_AGF(bp)->agf_lsn)))
2269 return false;
2270 }
2265 2271
2266 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && 2272 if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2267 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 2273 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
@@ -2503,7 +2509,7 @@ xfs_alloc_vextent(
2503 * Try near allocation first, then anywhere-in-ag after 2509 * Try near allocation first, then anywhere-in-ag after
2504 * the first a.g. fails. 2510 * the first a.g. fails.
2505 */ 2511 */
2506 if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) && 2512 if ((args->userdata & XFS_ALLOC_INITIAL_USER_DATA) &&
2507 (mp->m_flags & XFS_MOUNT_32BITINODES)) { 2513 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2508 args->fsbno = XFS_AGB_TO_FSB(mp, 2514 args->fsbno = XFS_AGB_TO_FSB(mp,
2509 ((mp->m_agfrotor / rotorstep) % 2515 ((mp->m_agfrotor / rotorstep) %
@@ -2634,6 +2640,14 @@ xfs_alloc_vextent(
2634 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), 2640 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2635 args->len); 2641 args->len);
2636#endif 2642#endif
2643
2644 /* Zero the extent if we were asked to do so */
2645 if (args->userdata & XFS_ALLOC_USERDATA_ZERO) {
2646 error = xfs_zero_extent(args->ip, args->fsbno, args->len);
2647 if (error)
2648 goto error0;
2649 }
2650
2637 } 2651 }
2638 xfs_perag_put(args->pag); 2652 xfs_perag_put(args->pag);
2639 return 0; 2653 return 0;
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index ca1c8168373a..0ecde4d5cac8 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -101,6 +101,7 @@ typedef struct xfs_alloc_arg {
101 struct xfs_mount *mp; /* file system mount point */ 101 struct xfs_mount *mp; /* file system mount point */
102 struct xfs_buf *agbp; /* buffer for a.g. freelist header */ 102 struct xfs_buf *agbp; /* buffer for a.g. freelist header */
103 struct xfs_perag *pag; /* per-ag struct for this agno */ 103 struct xfs_perag *pag; /* per-ag struct for this agno */
104 struct xfs_inode *ip; /* for userdata zeroing method */
104 xfs_fsblock_t fsbno; /* file system block number */ 105 xfs_fsblock_t fsbno; /* file system block number */
105 xfs_agnumber_t agno; /* allocation group number */ 106 xfs_agnumber_t agno; /* allocation group number */
106 xfs_agblock_t agbno; /* allocation group-relative block # */ 107 xfs_agblock_t agbno; /* allocation group-relative block # */
@@ -120,15 +121,16 @@ typedef struct xfs_alloc_arg {
120 char wasdel; /* set if allocation was prev delayed */ 121 char wasdel; /* set if allocation was prev delayed */
121 char wasfromfl; /* set if allocation is from freelist */ 122 char wasfromfl; /* set if allocation is from freelist */
122 char isfl; /* set if is freelist blocks - !acctg */ 123 char isfl; /* set if is freelist blocks - !acctg */
123 char userdata; /* set if this is user data */ 124 char userdata; /* mask defining userdata treatment */
124 xfs_fsblock_t firstblock; /* io first block allocated */ 125 xfs_fsblock_t firstblock; /* io first block allocated */
125} xfs_alloc_arg_t; 126} xfs_alloc_arg_t;
126 127
127/* 128/*
128 * Defines for userdata 129 * Defines for userdata
129 */ 130 */
130#define XFS_ALLOC_USERDATA 1 /* allocation is for user data*/ 131#define XFS_ALLOC_USERDATA (1 << 0)/* allocation is for user data*/
131#define XFS_ALLOC_INITIAL_USER_DATA 2 /* special case start of file */ 132#define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */
133#define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */
132 134
133xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, 135xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp,
134 struct xfs_perag *pag, xfs_extlen_t need); 136 struct xfs_perag *pag, xfs_extlen_t need);
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index ff065578969f..f949818fa1c7 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -125,7 +125,7 @@ xfs_attr_get(
125 uint lock_mode; 125 uint lock_mode;
126 int error; 126 int error;
127 127
128 XFS_STATS_INC(xs_attr_get); 128 XFS_STATS_INC(ip->i_mount, xs_attr_get);
129 129
130 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 130 if (XFS_FORCED_SHUTDOWN(ip->i_mount))
131 return -EIO; 131 return -EIO;
@@ -209,7 +209,7 @@ xfs_attr_set(
209 int rsvd = (flags & ATTR_ROOT) != 0; 209 int rsvd = (flags & ATTR_ROOT) != 0;
210 int error, err2, committed, local; 210 int error, err2, committed, local;
211 211
212 XFS_STATS_INC(xs_attr_set); 212 XFS_STATS_INC(mp, xs_attr_set);
213 213
214 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 214 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
215 return -EIO; 215 return -EIO;
@@ -412,7 +412,7 @@ xfs_attr_remove(
412 xfs_fsblock_t firstblock; 412 xfs_fsblock_t firstblock;
413 int error; 413 int error;
414 414
415 XFS_STATS_INC(xs_attr_remove); 415 XFS_STATS_INC(mp, xs_attr_remove);
416 416
417 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 417 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
418 return -EIO; 418 return -EIO;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 33df52d97ec7..aa187f7ba2dd 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -41,6 +41,7 @@
41#include "xfs_buf_item.h" 41#include "xfs_buf_item.h"
42#include "xfs_cksum.h" 42#include "xfs_cksum.h"
43#include "xfs_dir2.h" 43#include "xfs_dir2.h"
44#include "xfs_log.h"
44 45
45 46
46/* 47/*
@@ -266,6 +267,8 @@ xfs_attr3_leaf_verify(
266 return false; 267 return false;
267 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) 268 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
268 return false; 269 return false;
270 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
271 return false;
269 } else { 272 } else {
270 if (ichdr.magic != XFS_ATTR_LEAF_MAGIC) 273 if (ichdr.magic != XFS_ATTR_LEAF_MAGIC)
271 return false; 274 return false;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index f38f9bd81557..5ab95ffa4ae9 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -107,7 +107,7 @@ xfs_attr3_rmt_verify(
107 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt)) 107 if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))
108 return false; 108 return false;
109 if (be32_to_cpu(rmt->rm_offset) + 109 if (be32_to_cpu(rmt->rm_offset) +
110 be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX) 110 be32_to_cpu(rmt->rm_bytes) > XFS_XATTR_SIZE_MAX)
111 return false; 111 return false;
112 if (rmt->rm_owner == 0) 112 if (rmt->rm_owner == 0)
113 return false; 113 return false;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 8e2010d53b07..119c2422aac7 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -948,14 +948,16 @@ xfs_bmap_local_to_extents(
948 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0); 948 bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
949 949
950 /* 950 /*
951 * Initialise the block and copy the data 951 * Initialize the block, copy the data and log the remote buffer.
952 * 952 *
953 * Note: init_fn must set the buffer log item type correctly! 953 * The callout is responsible for logging because the remote format
954 * might differ from the local format and thus we don't know how much to
955 * log here. Note that init_fn must also set the buffer log item type
956 * correctly.
954 */ 957 */
955 init_fn(tp, bp, ip, ifp); 958 init_fn(tp, bp, ip, ifp);
956 959
957 /* account for the change in fork size and log everything */ 960 /* account for the change in fork size */
958 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
959 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); 961 xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
960 xfs_bmap_local_to_extents_empty(ip, whichfork); 962 xfs_bmap_local_to_extents_empty(ip, whichfork);
961 flags |= XFS_ILOG_CORE; 963 flags |= XFS_ILOG_CORE;
@@ -1435,7 +1437,7 @@ xfs_bmap_search_extents(
1435 xfs_ifork_t *ifp; /* inode fork pointer */ 1437 xfs_ifork_t *ifp; /* inode fork pointer */
1436 xfs_bmbt_rec_host_t *ep; /* extent record pointer */ 1438 xfs_bmbt_rec_host_t *ep; /* extent record pointer */
1437 1439
1438 XFS_STATS_INC(xs_look_exlist); 1440 XFS_STATS_INC(ip->i_mount, xs_look_exlist);
1439 ifp = XFS_IFORK_PTR(ip, fork); 1441 ifp = XFS_IFORK_PTR(ip, fork);
1440 1442
1441 ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp); 1443 ep = xfs_bmap_search_multi_extents(ifp, bno, eofp, lastxp, gotp, prevp);
@@ -1732,7 +1734,7 @@ xfs_bmap_add_extent_delay_real(
1732 ASSERT(!bma->cur || 1734 ASSERT(!bma->cur ||
1733 (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); 1735 (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
1734 1736
1735 XFS_STATS_INC(xs_add_exlist); 1737 XFS_STATS_INC(mp, xs_add_exlist);
1736 1738
1737#define LEFT r[0] 1739#define LEFT r[0]
1738#define RIGHT r[1] 1740#define RIGHT r[1]
@@ -2286,7 +2288,7 @@ xfs_bmap_add_extent_unwritten_real(
2286 ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec)); 2288 ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
2287 ASSERT(!isnullstartblock(new->br_startblock)); 2289 ASSERT(!isnullstartblock(new->br_startblock));
2288 2290
2289 XFS_STATS_INC(xs_add_exlist); 2291 XFS_STATS_INC(mp, xs_add_exlist);
2290 2292
2291#define LEFT r[0] 2293#define LEFT r[0]
2292#define RIGHT r[1] 2294#define RIGHT r[1]
@@ -2946,7 +2948,7 @@ xfs_bmap_add_extent_hole_real(
2946 ASSERT(!bma->cur || 2948 ASSERT(!bma->cur ||
2947 !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL)); 2949 !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
2948 2950
2949 XFS_STATS_INC(xs_add_exlist); 2951 XFS_STATS_INC(mp, xs_add_exlist);
2950 2952
2951 state = 0; 2953 state = 0;
2952 if (whichfork == XFS_ATTR_FORK) 2954 if (whichfork == XFS_ATTR_FORK)
@@ -3800,8 +3802,13 @@ xfs_bmap_btalloc(
3800 args.wasdel = ap->wasdel; 3802 args.wasdel = ap->wasdel;
3801 args.isfl = 0; 3803 args.isfl = 0;
3802 args.userdata = ap->userdata; 3804 args.userdata = ap->userdata;
3803 if ((error = xfs_alloc_vextent(&args))) 3805 if (ap->userdata & XFS_ALLOC_USERDATA_ZERO)
3806 args.ip = ap->ip;
3807
3808 error = xfs_alloc_vextent(&args);
3809 if (error)
3804 return error; 3810 return error;
3811
3805 if (tryagain && args.fsbno == NULLFSBLOCK) { 3812 if (tryagain && args.fsbno == NULLFSBLOCK) {
3806 /* 3813 /*
3807 * Exact allocation failed. Now try with alignment 3814 * Exact allocation failed. Now try with alignment
@@ -4036,7 +4043,7 @@ xfs_bmapi_read(
4036 if (XFS_FORCED_SHUTDOWN(mp)) 4043 if (XFS_FORCED_SHUTDOWN(mp))
4037 return -EIO; 4044 return -EIO;
4038 4045
4039 XFS_STATS_INC(xs_blk_mapr); 4046 XFS_STATS_INC(mp, xs_blk_mapr);
4040 4047
4041 ifp = XFS_IFORK_PTR(ip, whichfork); 4048 ifp = XFS_IFORK_PTR(ip, whichfork);
4042 4049
@@ -4221,7 +4228,7 @@ xfs_bmapi_delay(
4221 if (XFS_FORCED_SHUTDOWN(mp)) 4228 if (XFS_FORCED_SHUTDOWN(mp))
4222 return -EIO; 4229 return -EIO;
4223 4230
4224 XFS_STATS_INC(xs_blk_mapw); 4231 XFS_STATS_INC(mp, xs_blk_mapw);
4225 4232
4226 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 4233 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
4227 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK); 4234 error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
@@ -4300,11 +4307,14 @@ xfs_bmapi_allocate(
4300 4307
4301 /* 4308 /*
4302 * Indicate if this is the first user data in the file, or just any 4309 * Indicate if this is the first user data in the file, or just any
4303 * user data. 4310 * user data. And if it is userdata, indicate whether it needs to
4311 * be initialised to zero during allocation.
4304 */ 4312 */
4305 if (!(bma->flags & XFS_BMAPI_METADATA)) { 4313 if (!(bma->flags & XFS_BMAPI_METADATA)) {
4306 bma->userdata = (bma->offset == 0) ? 4314 bma->userdata = (bma->offset == 0) ?
4307 XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA; 4315 XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
4316 if (bma->flags & XFS_BMAPI_ZERO)
4317 bma->userdata |= XFS_ALLOC_USERDATA_ZERO;
4308 } 4318 }
4309 4319
4310 bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1; 4320 bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
@@ -4419,6 +4429,17 @@ xfs_bmapi_convert_unwritten(
4419 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) 4429 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4420 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN; 4430 ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
4421 4431
4432 /*
4433 * Before insertion into the bmbt, zero the range being converted
4434 * if required.
4435 */
4436 if (flags & XFS_BMAPI_ZERO) {
4437 error = xfs_zero_extent(bma->ip, mval->br_startblock,
4438 mval->br_blockcount);
4439 if (error)
4440 return error;
4441 }
4442
4422 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, 4443 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
4423 &bma->cur, mval, bma->firstblock, bma->flist, 4444 &bma->cur, mval, bma->firstblock, bma->flist,
4424 &tmp_logflags); 4445 &tmp_logflags);
@@ -4512,6 +4533,18 @@ xfs_bmapi_write(
4512 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); 4533 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
4513 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 4534 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4514 4535
4536 /* zeroing is for currently only for data extents, not metadata */
4537 ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
4538 (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO));
4539 /*
4540 * we can allocate unwritten extents or pre-zero allocated blocks,
4541 * but it makes no sense to do both at once. This would result in
4542 * zeroing the unwritten extent twice, but it still being an
4543 * unwritten extent....
4544 */
4545 ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
4546 (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
4547
4515 if (unlikely(XFS_TEST_ERROR( 4548 if (unlikely(XFS_TEST_ERROR(
4516 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && 4549 (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
4517 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), 4550 XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
@@ -4525,7 +4558,7 @@ xfs_bmapi_write(
4525 4558
4526 ifp = XFS_IFORK_PTR(ip, whichfork); 4559 ifp = XFS_IFORK_PTR(ip, whichfork);
4527 4560
4528 XFS_STATS_INC(xs_blk_mapw); 4561 XFS_STATS_INC(mp, xs_blk_mapw);
4529 4562
4530 if (*firstblock == NULLFSBLOCK) { 4563 if (*firstblock == NULLFSBLOCK) {
4531 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE) 4564 if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
@@ -4718,12 +4751,12 @@ xfs_bmap_del_extent(
4718 xfs_filblks_t temp2; /* for indirect length calculations */ 4751 xfs_filblks_t temp2; /* for indirect length calculations */
4719 int state = 0; 4752 int state = 0;
4720 4753
4721 XFS_STATS_INC(xs_del_exlist); 4754 mp = ip->i_mount;
4755 XFS_STATS_INC(mp, xs_del_exlist);
4722 4756
4723 if (whichfork == XFS_ATTR_FORK) 4757 if (whichfork == XFS_ATTR_FORK)
4724 state |= BMAP_ATTRFORK; 4758 state |= BMAP_ATTRFORK;
4725 4759
4726 mp = ip->i_mount;
4727 ifp = XFS_IFORK_PTR(ip, whichfork); 4760 ifp = XFS_IFORK_PTR(ip, whichfork);
4728 ASSERT((*idx >= 0) && (*idx < ifp->if_bytes / 4761 ASSERT((*idx >= 0) && (*idx < ifp->if_bytes /
4729 (uint)sizeof(xfs_bmbt_rec_t))); 4762 (uint)sizeof(xfs_bmbt_rec_t)));
@@ -5070,7 +5103,7 @@ xfs_bunmapi(
5070 *done = 1; 5103 *done = 1;
5071 return 0; 5104 return 0;
5072 } 5105 }
5073 XFS_STATS_INC(xs_blk_unmap); 5106 XFS_STATS_INC(mp, xs_blk_unmap);
5074 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip); 5107 isrt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
5075 start = bno; 5108 start = bno;
5076 bno = start + len - 1; 5109 bno = start + len - 1;
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 6aaa0c1c7200..a160f8a5a3fc 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -52,9 +52,9 @@ struct xfs_bmalloca {
52 xfs_extlen_t minleft; /* amount must be left after alloc */ 52 xfs_extlen_t minleft; /* amount must be left after alloc */
53 bool eof; /* set if allocating past last extent */ 53 bool eof; /* set if allocating past last extent */
54 bool wasdel; /* replacing a delayed allocation */ 54 bool wasdel; /* replacing a delayed allocation */
55 bool userdata;/* set if is user data */
56 bool aeof; /* allocated space at eof */ 55 bool aeof; /* allocated space at eof */
57 bool conv; /* overwriting unwritten extents */ 56 bool conv; /* overwriting unwritten extents */
57 char userdata;/* userdata mask */
58 int flags; 58 int flags;
59}; 59};
60 60
@@ -109,6 +109,14 @@ typedef struct xfs_bmap_free
109 */ 109 */
110#define XFS_BMAPI_CONVERT 0x040 110#define XFS_BMAPI_CONVERT 0x040
111 111
112/*
113 * allocate zeroed extents - this requires all newly allocated user data extents
114 * to be initialised to zero. It will be ignored if XFS_BMAPI_METADATA is set.
115 * Use in conjunction with XFS_BMAPI_CONVERT to convert unwritten extents found
116 * during the allocation range to zeroed written extents.
117 */
118#define XFS_BMAPI_ZERO 0x080
119
112#define XFS_BMAPI_FLAGS \ 120#define XFS_BMAPI_FLAGS \
113 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \ 121 { XFS_BMAPI_ENTIRE, "ENTIRE" }, \
114 { XFS_BMAPI_METADATA, "METADATA" }, \ 122 { XFS_BMAPI_METADATA, "METADATA" }, \
@@ -116,7 +124,8 @@ typedef struct xfs_bmap_free
116 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \ 124 { XFS_BMAPI_PREALLOC, "PREALLOC" }, \
117 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \ 125 { XFS_BMAPI_IGSTATE, "IGSTATE" }, \
118 { XFS_BMAPI_CONTIG, "CONTIG" }, \ 126 { XFS_BMAPI_CONTIG, "CONTIG" }, \
119 { XFS_BMAPI_CONVERT, "CONVERT" } 127 { XFS_BMAPI_CONVERT, "CONVERT" }, \
128 { XFS_BMAPI_ZERO, "ZERO" }
120 129
121 130
122static inline int xfs_bmapi_aflag(int w) 131static inline int xfs_bmapi_aflag(int w)
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index f7d7ee7a2607..af1bbee5586e 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -32,6 +32,7 @@
32#include "xfs_trace.h" 32#include "xfs_trace.h"
33#include "xfs_cksum.h" 33#include "xfs_cksum.h"
34#include "xfs_alloc.h" 34#include "xfs_alloc.h"
35#include "xfs_log.h"
35 36
36/* 37/*
37 * Cursor allocation zone. 38 * Cursor allocation zone.
@@ -222,7 +223,7 @@ xfs_btree_check_ptr(
222 * long-form btree header. 223 * long-form btree header.
223 * 224 *
224 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 225 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
225 * it into the buffer so recovery knows what the last modifcation was that made 226 * it into the buffer so recovery knows what the last modification was that made
226 * it to disk. 227 * it to disk.
227 */ 228 */
228void 229void
@@ -243,8 +244,14 @@ bool
243xfs_btree_lblock_verify_crc( 244xfs_btree_lblock_verify_crc(
244 struct xfs_buf *bp) 245 struct xfs_buf *bp)
245{ 246{
246 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 247 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
248 struct xfs_mount *mp = bp->b_target->bt_mount;
249
250 if (xfs_sb_version_hascrc(&mp->m_sb)) {
251 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.l.bb_lsn)))
252 return false;
247 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); 253 return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);
254 }
248 255
249 return true; 256 return true;
250} 257}
@@ -254,7 +261,7 @@ xfs_btree_lblock_verify_crc(
254 * short-form btree header. 261 * short-form btree header.
255 * 262 *
256 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put 263 * Prior to calculting the CRC, pull the LSN out of the buffer log item and put
257 * it into the buffer so recovery knows what the last modifcation was that made 264 * it into the buffer so recovery knows what the last modification was that made
258 * it to disk. 265 * it to disk.
259 */ 266 */
260void 267void
@@ -275,8 +282,14 @@ bool
275xfs_btree_sblock_verify_crc( 282xfs_btree_sblock_verify_crc(
276 struct xfs_buf *bp) 283 struct xfs_buf *bp)
277{ 284{
278 if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) 285 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
286 struct xfs_mount *mp = bp->b_target->bt_mount;
287
288 if (xfs_sb_version_hascrc(&mp->m_sb)) {
289 if (!xfs_log_check_lsn(mp, be64_to_cpu(block->bb_u.s.bb_lsn)))
290 return false;
279 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); 291 return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);
292 }
280 293
281 return true; 294 return true;
282} 295}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 8f18bab73ea5..992dec0638f3 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -84,31 +84,38 @@ union xfs_btree_rec {
84/* 84/*
85 * Generic stats interface 85 * Generic stats interface
86 */ 86 */
87#define __XFS_BTREE_STATS_INC(type, stat) \ 87#define __XFS_BTREE_STATS_INC(mp, type, stat) \
88 XFS_STATS_INC(xs_ ## type ## _2_ ## stat) 88 XFS_STATS_INC(mp, xs_ ## type ## _2_ ## stat)
89#define XFS_BTREE_STATS_INC(cur, stat) \ 89#define XFS_BTREE_STATS_INC(cur, stat) \
90do { \ 90do { \
91 struct xfs_mount *__mp = cur->bc_mp; \
91 switch (cur->bc_btnum) { \ 92 switch (cur->bc_btnum) { \
92 case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(abtb, stat); break; \ 93 case XFS_BTNUM_BNO: __XFS_BTREE_STATS_INC(__mp, abtb, stat); break; \
93 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break; \ 94 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(__mp, abtc, stat); break; \
94 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break; \ 95 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \
95 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break; \ 96 case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \
96 case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break; \ 97 case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \
97 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 98 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
98 } \ 99 } \
99} while (0) 100} while (0)
100 101
101#define __XFS_BTREE_STATS_ADD(type, stat, val) \ 102#define __XFS_BTREE_STATS_ADD(mp, type, stat, val) \
102 XFS_STATS_ADD(xs_ ## type ## _2_ ## stat, val) 103 XFS_STATS_ADD(mp, xs_ ## type ## _2_ ## stat, val)
103#define XFS_BTREE_STATS_ADD(cur, stat, val) \ 104#define XFS_BTREE_STATS_ADD(cur, stat, val) \
104do { \ 105do { \
106 struct xfs_mount *__mp = cur->bc_mp; \
105 switch (cur->bc_btnum) { \ 107 switch (cur->bc_btnum) { \
106 case XFS_BTNUM_BNO: __XFS_BTREE_STATS_ADD(abtb, stat, val); break; \ 108 case XFS_BTNUM_BNO: \
107 case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \ 109 __XFS_BTREE_STATS_ADD(__mp, abtb, stat, val); break; \
108 case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \ 110 case XFS_BTNUM_CNT: \
109 case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ 111 __XFS_BTREE_STATS_ADD(__mp, abtc, stat, val); break; \
110 case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \ 112 case XFS_BTNUM_BMAP: \
111 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ 113 __XFS_BTREE_STATS_ADD(__mp, bmbt, stat, val); break; \
114 case XFS_BTNUM_INO: \
115 __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \
116 case XFS_BTNUM_FINO: \
117 __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \
118 case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \
112 } \ 119 } \
113} while (0) 120} while (0)
114 121
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index be43248a5822..e89a0f8f827c 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -39,6 +39,7 @@
39#include "xfs_trace.h" 39#include "xfs_trace.h"
40#include "xfs_cksum.h" 40#include "xfs_cksum.h"
41#include "xfs_buf_item.h" 41#include "xfs_buf_item.h"
42#include "xfs_log.h"
42 43
43/* 44/*
44 * xfs_da_btree.c 45 * xfs_da_btree.c
@@ -150,6 +151,8 @@ xfs_da3_node_verify(
150 return false; 151 return false;
151 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn) 152 if (be64_to_cpu(hdr3->info.blkno) != bp->b_bn)
152 return false; 153 return false;
154 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->info.lsn)))
155 return false;
153 } else { 156 } else {
154 if (ichdr.magic != XFS_DA_NODE_MAGIC) 157 if (ichdr.magic != XFS_DA_NODE_MAGIC)
155 return false; 158 return false;
@@ -322,6 +325,7 @@ xfs_da3_node_create(
322 if (xfs_sb_version_hascrc(&mp->m_sb)) { 325 if (xfs_sb_version_hascrc(&mp->m_sb)) {
323 struct xfs_da3_node_hdr *hdr3 = bp->b_addr; 326 struct xfs_da3_node_hdr *hdr3 = bp->b_addr;
324 327
328 memset(hdr3, 0, sizeof(struct xfs_da3_node_hdr));
325 ichdr.magic = XFS_DA3_NODE_MAGIC; 329 ichdr.magic = XFS_DA3_NODE_MAGIC;
326 hdr3->info.blkno = cpu_to_be64(bp->b_bn); 330 hdr3->info.blkno = cpu_to_be64(bp->b_bn);
327 hdr3->info.owner = cpu_to_be64(args->dp->i_ino); 331 hdr3->info.owner = cpu_to_be64(args->dp->i_ino);
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 9de401d297e5..2fb53a5c0a74 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -271,7 +271,7 @@ xfs_dir_createname(
271 rval = xfs_dir_ino_validate(tp->t_mountp, inum); 271 rval = xfs_dir_ino_validate(tp->t_mountp, inum);
272 if (rval) 272 if (rval)
273 return rval; 273 return rval;
274 XFS_STATS_INC(xs_dir_create); 274 XFS_STATS_INC(dp->i_mount, xs_dir_create);
275 } 275 }
276 276
277 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); 277 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
@@ -365,7 +365,7 @@ xfs_dir_lookup(
365 int lock_mode; 365 int lock_mode;
366 366
367 ASSERT(S_ISDIR(dp->i_d.di_mode)); 367 ASSERT(S_ISDIR(dp->i_d.di_mode));
368 XFS_STATS_INC(xs_dir_lookup); 368 XFS_STATS_INC(dp->i_mount, xs_dir_lookup);
369 369
370 /* 370 /*
371 * We need to use KM_NOFS here so that lockdep will not throw false 371 * We need to use KM_NOFS here so that lockdep will not throw false
@@ -444,7 +444,7 @@ xfs_dir_removename(
444 int v; /* type-checking value */ 444 int v; /* type-checking value */
445 445
446 ASSERT(S_ISDIR(dp->i_d.di_mode)); 446 ASSERT(S_ISDIR(dp->i_d.di_mode));
447 XFS_STATS_INC(xs_dir_remove); 447 XFS_STATS_INC(dp->i_mount, xs_dir_remove);
448 448
449 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); 449 args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
450 if (!args) 450 if (!args)
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 4778d1dd511a..9c10e2b8cfcb 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -34,6 +34,7 @@
34#include "xfs_error.h" 34#include "xfs_error.h"
35#include "xfs_trace.h" 35#include "xfs_trace.h"
36#include "xfs_cksum.h" 36#include "xfs_cksum.h"
37#include "xfs_log.h"
37 38
38/* 39/*
39 * Local function prototypes. 40 * Local function prototypes.
@@ -71,6 +72,8 @@ xfs_dir3_block_verify(
71 return false; 72 return false;
72 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 73 if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
73 return false; 74 return false;
75 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
76 return false;
74 } else { 77 } else {
75 if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) 78 if (hdr3->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))
76 return false; 79 return false;
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 824131e71bc5..af71a84f343c 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -31,6 +31,7 @@
31#include "xfs_trans.h" 31#include "xfs_trans.h"
32#include "xfs_buf_item.h" 32#include "xfs_buf_item.h"
33#include "xfs_cksum.h" 33#include "xfs_cksum.h"
34#include "xfs_log.h"
34 35
35/* 36/*
36 * Check the consistency of the data block. 37 * Check the consistency of the data block.
@@ -224,6 +225,8 @@ xfs_dir3_data_verify(
224 return false; 225 return false;
225 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 226 if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
226 return false; 227 return false;
228 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
229 return false;
227 } else { 230 } else {
228 if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC)) 231 if (hdr3->magic != cpu_to_be32(XFS_DIR2_DATA_MAGIC))
229 return false; 232 return false;
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index f300240ebb8d..3923e1f94697 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -33,6 +33,7 @@
33#include "xfs_trans.h" 33#include "xfs_trans.h"
34#include "xfs_buf_item.h" 34#include "xfs_buf_item.h"
35#include "xfs_cksum.h" 35#include "xfs_cksum.h"
36#include "xfs_log.h"
36 37
37/* 38/*
38 * Local function declarations. 39 * Local function declarations.
@@ -164,6 +165,8 @@ xfs_dir3_leaf_verify(
164 return false; 165 return false;
165 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn) 166 if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
166 return false; 167 return false;
168 if (!xfs_log_check_lsn(mp, be64_to_cpu(leaf3->info.lsn)))
169 return false;
167 } else { 170 } else {
168 if (leaf->hdr.info.magic != cpu_to_be16(magic)) 171 if (leaf->hdr.info.magic != cpu_to_be16(magic))
169 return false; 172 return false;
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index cc28e924545b..70b0cb2fd556 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -33,6 +33,7 @@
33#include "xfs_trans.h" 33#include "xfs_trans.h"
34#include "xfs_buf_item.h" 34#include "xfs_buf_item.h"
35#include "xfs_cksum.h" 35#include "xfs_cksum.h"
36#include "xfs_log.h"
36 37
37/* 38/*
38 * Function declarations. 39 * Function declarations.
@@ -97,6 +98,8 @@ xfs_dir3_free_verify(
97 return false; 98 return false;
98 if (be64_to_cpu(hdr3->blkno) != bp->b_bn) 99 if (be64_to_cpu(hdr3->blkno) != bp->b_bn)
99 return false; 100 return false;
101 if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr3->lsn)))
102 return false;
100 } else { 103 } else {
101 if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)) 104 if (hdr->magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC))
102 return false; 105 return false;
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 9590a069e556..8774498ce0ff 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -60,6 +60,14 @@ struct xfs_ifork;
60#define XFS_SB_VERSION_MOREBITSBIT 0x8000 60#define XFS_SB_VERSION_MOREBITSBIT 0x8000
61 61
62/* 62/*
63 * The size of a single extended attribute on disk is limited by
64 * the size of index values within the attribute entries themselves.
65 * These are be16 fields, so we can only support attribute data
66 * sizes up to 2^16 bytes in length.
67 */
68#define XFS_XATTR_SIZE_MAX (1 << 16)
69
70/*
63 * Supported feature bit list is just all bits in the versionnum field because 71 * Supported feature bit list is just all bits in the versionnum field because
64 * we've used them all up and understand them all. Except, of course, for the 72 * we've used them all up and understand them all. Except, of course, for the
65 * shared superblock bit, which nobody knows what it does and so is unsupported. 73 * shared superblock bit, which nobody knows what it does and so is unsupported.
@@ -1483,13 +1491,17 @@ struct xfs_acl {
1483 */ 1491 */
1484#define XFS_ACL_MAX_ENTRIES(mp) \ 1492#define XFS_ACL_MAX_ENTRIES(mp) \
1485 (xfs_sb_version_hascrc(&mp->m_sb) \ 1493 (xfs_sb_version_hascrc(&mp->m_sb) \
1486 ? (XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \ 1494 ? (XFS_XATTR_SIZE_MAX - sizeof(struct xfs_acl)) / \
1487 sizeof(struct xfs_acl_entry) \ 1495 sizeof(struct xfs_acl_entry) \
1488 : 25) 1496 : 25)
1489 1497
1490#define XFS_ACL_MAX_SIZE(mp) \ 1498#define XFS_ACL_SIZE(cnt) \
1491 (sizeof(struct xfs_acl) + \ 1499 (sizeof(struct xfs_acl) + \
1492 sizeof(struct xfs_acl_entry) * XFS_ACL_MAX_ENTRIES((mp))) 1500 sizeof(struct xfs_acl_entry) * cnt)
1501
1502#define XFS_ACL_MAX_SIZE(mp) \
1503 XFS_ACL_SIZE(XFS_ACL_MAX_ENTRIES((mp)))
1504
1493 1505
1494/* On-disk XFS extended attribute names */ 1506/* On-disk XFS extended attribute names */
1495#define SGI_ACL_FILE "SGI_ACL_FILE" 1507#define SGI_ACL_FILE "SGI_ACL_FILE"
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 89689c6a43e2..b2b73a998d42 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -490,6 +490,16 @@ typedef struct xfs_swapext
490#define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */ 490#define XFS_FSOP_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
491 491
492/* 492/*
493 * ioctl limits
494 */
495#ifdef XATTR_LIST_MAX
496# define XFS_XATTR_LIST_MAX XATTR_LIST_MAX
497#else
498# define XFS_XATTR_LIST_MAX 65536
499#endif
500
501
502/*
493 * ioctl commands that are used by Linux filesystems 503 * ioctl commands that are used by Linux filesystems
494 */ 504 */
495#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS 505#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 54deb2d12ac6..70c1db99f6a7 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -38,6 +38,7 @@
38#include "xfs_icreate_item.h" 38#include "xfs_icreate_item.h"
39#include "xfs_icache.h" 39#include "xfs_icache.h"
40#include "xfs_trace.h" 40#include "xfs_trace.h"
41#include "xfs_log.h"
41 42
42 43
43/* 44/*
@@ -2500,9 +2501,14 @@ xfs_agi_verify(
2500 struct xfs_mount *mp = bp->b_target->bt_mount; 2501 struct xfs_mount *mp = bp->b_target->bt_mount;
2501 struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); 2502 struct xfs_agi *agi = XFS_BUF_TO_AGI(bp);
2502 2503
2503 if (xfs_sb_version_hascrc(&mp->m_sb) && 2504 if (xfs_sb_version_hascrc(&mp->m_sb)) {
2504 !uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid)) 2505 if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
2506 return false;
2507 if (!xfs_log_check_lsn(mp,
2508 be64_to_cpu(XFS_BUF_TO_AGI(bp)->agi_lsn)))
2505 return false; 2509 return false;
2510 }
2511
2506 /* 2512 /*
2507 * Validate the magic number of the agi block. 2513 * Validate the magic number of the agi block.
2508 */ 2514 */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 47425140f343..a0b071d881a0 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -35,6 +35,7 @@
35#include "xfs_bmap_btree.h" 35#include "xfs_bmap_btree.h"
36#include "xfs_alloc_btree.h" 36#include "xfs_alloc_btree.h"
37#include "xfs_ialloc_btree.h" 37#include "xfs_ialloc_btree.h"
38#include "xfs_log.h"
38 39
39/* 40/*
40 * Physical superblock buffer manipulations. Shared with libxfs in userspace. 41 * Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -163,6 +164,15 @@ xfs_mount_validate_sb(
163"Filesystem can not be safely mounted by this kernel."); 164"Filesystem can not be safely mounted by this kernel.");
164 return -EINVAL; 165 return -EINVAL;
165 } 166 }
167 } else if (xfs_sb_version_hascrc(sbp)) {
168 /*
169 * We can't read verify the sb LSN because the read verifier is
170 * called before the log is allocated and processed. We know the
171 * log is set up before write verifier (!check_version) calls,
172 * so just check it here.
173 */
174 if (!xfs_log_check_lsn(mp, sbp->sb_lsn))
175 return -EFSCORRUPTED;
166 } 176 }
167 177
168 if (xfs_sb_version_has_pquotino(sbp)) { 178 if (xfs_sb_version_has_pquotino(sbp)) {
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index 8f8af05b3f13..cb6fd20a4d3d 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -31,6 +31,7 @@
31#include "xfs_cksum.h" 31#include "xfs_cksum.h"
32#include "xfs_trans.h" 32#include "xfs_trans.h"
33#include "xfs_buf_item.h" 33#include "xfs_buf_item.h"
34#include "xfs_log.h"
34 35
35 36
36/* 37/*
@@ -60,6 +61,7 @@ xfs_symlink_hdr_set(
60 if (!xfs_sb_version_hascrc(&mp->m_sb)) 61 if (!xfs_sb_version_hascrc(&mp->m_sb))
61 return 0; 62 return 0;
62 63
64 memset(dsl, 0, sizeof(struct xfs_dsymlink_hdr));
63 dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC); 65 dsl->sl_magic = cpu_to_be32(XFS_SYMLINK_MAGIC);
64 dsl->sl_offset = cpu_to_be32(offset); 66 dsl->sl_offset = cpu_to_be32(offset);
65 dsl->sl_bytes = cpu_to_be32(size); 67 dsl->sl_bytes = cpu_to_be32(size);
@@ -116,6 +118,8 @@ xfs_symlink_verify(
116 return false; 118 return false;
117 if (dsl->sl_owner == 0) 119 if (dsl->sl_owner == 0)
118 return false; 120 return false;
121 if (!xfs_log_check_lsn(mp, be64_to_cpu(dsl->sl_lsn)))
122 return false;
119 123
120 return true; 124 return true;
121} 125}
@@ -183,6 +187,7 @@ xfs_symlink_local_to_remote(
183 if (!xfs_sb_version_hascrc(&mp->m_sb)) { 187 if (!xfs_sb_version_hascrc(&mp->m_sb)) {
184 bp->b_ops = NULL; 188 bp->b_ops = NULL;
185 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes); 189 memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
190 xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
186 return; 191 return;
187 } 192 }
188 193
@@ -198,4 +203,6 @@ xfs_symlink_local_to_remote(
198 buf = bp->b_addr; 203 buf = bp->b_addr;
199 buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp); 204 buf += xfs_symlink_hdr_set(mp, ip->i_ino, 0, ifp->if_bytes, bp);
200 memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes); 205 memcpy(buf, ifp->if_u1.if_data, ifp->if_bytes);
206 xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsymlink_hdr) +
207 ifp->if_bytes - 1);
201} 208}
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 4b641676f258..6bb470fbb8e8 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -37,16 +37,19 @@
37 37
38STATIC struct posix_acl * 38STATIC struct posix_acl *
39xfs_acl_from_disk( 39xfs_acl_from_disk(
40 struct xfs_acl *aclp, 40 const struct xfs_acl *aclp,
41 int max_entries) 41 int len,
42 int max_entries)
42{ 43{
43 struct posix_acl_entry *acl_e; 44 struct posix_acl_entry *acl_e;
44 struct posix_acl *acl; 45 struct posix_acl *acl;
45 struct xfs_acl_entry *ace; 46 const struct xfs_acl_entry *ace;
46 unsigned int count, i; 47 unsigned int count, i;
47 48
49 if (len < sizeof(*aclp))
50 return ERR_PTR(-EFSCORRUPTED);
48 count = be32_to_cpu(aclp->acl_cnt); 51 count = be32_to_cpu(aclp->acl_cnt);
49 if (count > max_entries) 52 if (count > max_entries || XFS_ACL_SIZE(count) != len)
50 return ERR_PTR(-EFSCORRUPTED); 53 return ERR_PTR(-EFSCORRUPTED);
51 54
52 acl = posix_acl_alloc(count, GFP_KERNEL); 55 acl = posix_acl_alloc(count, GFP_KERNEL);
@@ -160,10 +163,11 @@ xfs_get_acl(struct inode *inode, int type)
160 */ 163 */
161 if (error == -ENOATTR) 164 if (error == -ENOATTR)
162 goto out_update_cache; 165 goto out_update_cache;
166 acl = ERR_PTR(error);
163 goto out; 167 goto out;
164 } 168 }
165 169
166 acl = xfs_acl_from_disk(xfs_acl, XFS_ACL_MAX_ENTRIES(ip->i_mount)); 170 acl = xfs_acl_from_disk(xfs_acl, len, XFS_ACL_MAX_ENTRIES(ip->i_mount));
167 if (IS_ERR(acl)) 171 if (IS_ERR(acl))
168 goto out; 172 goto out;
169 173
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 3841b07f27bf..52f8255d6bdf 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -20,7 +20,6 @@
20 20
21struct inode; 21struct inode;
22struct posix_acl; 22struct posix_acl;
23struct xfs_inode;
24 23
25#ifdef CONFIG_XFS_POSIX_ACL 24#ifdef CONFIG_XFS_POSIX_ACL
26extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); 25extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
@@ -36,4 +35,7 @@ static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
36# define posix_acl_access_exists(inode) 0 35# define posix_acl_access_exists(inode) 0
37# define posix_acl_default_exists(inode) 0 36# define posix_acl_default_exists(inode) 0
38#endif /* CONFIG_XFS_POSIX_ACL */ 37#endif /* CONFIG_XFS_POSIX_ACL */
38
39extern void xfs_forget_acl(struct inode *inode, const char *name, int xflags);
40
39#endif /* __XFS_ACL_H__ */ 41#endif /* __XFS_ACL_H__ */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 50ab2879b9da..29e7e5dd5178 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -172,6 +172,12 @@ xfs_setfilesize_ioend(
172 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); 172 current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
173 __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); 173 __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
174 174
175 /* we abort the update if there was an IO error */
176 if (ioend->io_error) {
177 xfs_trans_cancel(tp);
178 return ioend->io_error;
179 }
180
175 return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); 181 return xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
176} 182}
177 183
@@ -212,14 +218,17 @@ xfs_end_io(
212 ioend->io_error = -EIO; 218 ioend->io_error = -EIO;
213 goto done; 219 goto done;
214 } 220 }
215 if (ioend->io_error)
216 goto done;
217 221
218 /* 222 /*
219 * For unwritten extents we need to issue transactions to convert a 223 * For unwritten extents we need to issue transactions to convert a
220 * range to normal written extens after the data I/O has finished. 224 * range to normal written extens after the data I/O has finished.
225 * Detecting and handling completion IO errors is done individually
226 * for each case as different cleanup operations need to be performed
227 * on error.
221 */ 228 */
222 if (ioend->io_type == XFS_IO_UNWRITTEN) { 229 if (ioend->io_type == XFS_IO_UNWRITTEN) {
230 if (ioend->io_error)
231 goto done;
223 error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 232 error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
224 ioend->io_size); 233 ioend->io_size);
225 } else if (ioend->io_append_trans) { 234 } else if (ioend->io_append_trans) {
@@ -1250,13 +1259,28 @@ xfs_vm_releasepage(
1250 * the DIO. There is only going to be one reference to the ioend and its life 1259 * the DIO. There is only going to be one reference to the ioend and its life
1251 * cycle is constrained by the DIO completion code. hence we don't need 1260 * cycle is constrained by the DIO completion code. hence we don't need
1252 * reference counting here. 1261 * reference counting here.
1262 *
1263 * Note that for DIO, an IO to the highest supported file block offset (i.e.
1264 * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
1265 * bit variable. Hence if we see this overflow, we have to assume that the IO is
1266 * extending the file size. We won't know for sure until IO completion is run
1267 * and the actual max write offset is communicated to the IO completion
1268 * routine.
1269 *
1270 * For DAX page faults, we are preparing to never see unwritten extents here,
1271 * nor should we ever extend the inode size. Hence we will soon have nothing to
1272 * do here for this case, ensuring we don't have to provide an IO completion
1273 * callback to free an ioend that we don't actually need for a fault into the
1274 * page at offset (2^63 - 1FSB) bytes.
1253 */ 1275 */
1276
1254static void 1277static void
1255xfs_map_direct( 1278xfs_map_direct(
1256 struct inode *inode, 1279 struct inode *inode,
1257 struct buffer_head *bh_result, 1280 struct buffer_head *bh_result,
1258 struct xfs_bmbt_irec *imap, 1281 struct xfs_bmbt_irec *imap,
1259 xfs_off_t offset) 1282 xfs_off_t offset,
1283 bool dax_fault)
1260{ 1284{
1261 struct xfs_ioend *ioend; 1285 struct xfs_ioend *ioend;
1262 xfs_off_t size = bh_result->b_size; 1286 xfs_off_t size = bh_result->b_size;
@@ -1269,6 +1293,13 @@ xfs_map_direct(
1269 1293
1270 trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap); 1294 trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
1271 1295
1296 if (dax_fault) {
1297 ASSERT(type == XFS_IO_OVERWRITE);
1298 trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
1299 imap);
1300 return;
1301 }
1302
1272 if (bh_result->b_private) { 1303 if (bh_result->b_private) {
1273 ioend = bh_result->b_private; 1304 ioend = bh_result->b_private;
1274 ASSERT(ioend->io_size > 0); 1305 ASSERT(ioend->io_size > 0);
@@ -1283,7 +1314,8 @@ xfs_map_direct(
1283 ioend->io_size, ioend->io_type, 1314 ioend->io_size, ioend->io_type,
1284 imap); 1315 imap);
1285 } else if (type == XFS_IO_UNWRITTEN || 1316 } else if (type == XFS_IO_UNWRITTEN ||
1286 offset + size > i_size_read(inode)) { 1317 offset + size > i_size_read(inode) ||
1318 offset + size < 0) {
1287 ioend = xfs_alloc_ioend(inode, type); 1319 ioend = xfs_alloc_ioend(inode, type);
1288 ioend->io_offset = offset; 1320 ioend->io_offset = offset;
1289 ioend->io_size = size; 1321 ioend->io_size = size;
@@ -1345,7 +1377,8 @@ __xfs_get_blocks(
1345 sector_t iblock, 1377 sector_t iblock,
1346 struct buffer_head *bh_result, 1378 struct buffer_head *bh_result,
1347 int create, 1379 int create,
1348 bool direct) 1380 bool direct,
1381 bool dax_fault)
1349{ 1382{
1350 struct xfs_inode *ip = XFS_I(inode); 1383 struct xfs_inode *ip = XFS_I(inode);
1351 struct xfs_mount *mp = ip->i_mount; 1384 struct xfs_mount *mp = ip->i_mount;
@@ -1393,18 +1426,20 @@ __xfs_get_blocks(
1393 if (error) 1426 if (error)
1394 goto out_unlock; 1427 goto out_unlock;
1395 1428
1429 /* for DAX, we convert unwritten extents directly */
1396 if (create && 1430 if (create &&
1397 (!nimaps || 1431 (!nimaps ||
1398 (imap.br_startblock == HOLESTARTBLOCK || 1432 (imap.br_startblock == HOLESTARTBLOCK ||
1399 imap.br_startblock == DELAYSTARTBLOCK))) { 1433 imap.br_startblock == DELAYSTARTBLOCK) ||
1434 (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
1400 if (direct || xfs_get_extsz_hint(ip)) { 1435 if (direct || xfs_get_extsz_hint(ip)) {
1401 /* 1436 /*
1402 * Drop the ilock in preparation for starting the block 1437 * xfs_iomap_write_direct() expects the shared lock. It
1403 * allocation transaction. It will be retaken 1438 * is unlocked on return.
1404 * exclusively inside xfs_iomap_write_direct for the
1405 * actual allocation.
1406 */ 1439 */
1407 xfs_iunlock(ip, lockmode); 1440 if (lockmode == XFS_ILOCK_EXCL)
1441 xfs_ilock_demote(ip, lockmode);
1442
1408 error = xfs_iomap_write_direct(ip, offset, size, 1443 error = xfs_iomap_write_direct(ip, offset, size,
1409 &imap, nimaps); 1444 &imap, nimaps);
1410 if (error) 1445 if (error)
@@ -1441,6 +1476,12 @@ __xfs_get_blocks(
1441 goto out_unlock; 1476 goto out_unlock;
1442 } 1477 }
1443 1478
1479 if (IS_DAX(inode) && create) {
1480 ASSERT(!ISUNWRITTEN(&imap));
1481 /* zeroing is not needed at a higher layer */
1482 new = 0;
1483 }
1484
1444 /* trim mapping down to size requested */ 1485 /* trim mapping down to size requested */
1445 if (direct || size > (1 << inode->i_blkbits)) 1486 if (direct || size > (1 << inode->i_blkbits))
1446 xfs_map_trim_size(inode, iblock, bh_result, 1487 xfs_map_trim_size(inode, iblock, bh_result,
@@ -1458,7 +1499,8 @@ __xfs_get_blocks(
1458 set_buffer_unwritten(bh_result); 1499 set_buffer_unwritten(bh_result);
1459 /* direct IO needs special help */ 1500 /* direct IO needs special help */
1460 if (create && direct) 1501 if (create && direct)
1461 xfs_map_direct(inode, bh_result, &imap, offset); 1502 xfs_map_direct(inode, bh_result, &imap, offset,
1503 dax_fault);
1462 } 1504 }
1463 1505
1464 /* 1506 /*
@@ -1505,7 +1547,7 @@ xfs_get_blocks(
1505 struct buffer_head *bh_result, 1547 struct buffer_head *bh_result,
1506 int create) 1548 int create)
1507{ 1549{
1508 return __xfs_get_blocks(inode, iblock, bh_result, create, false); 1550 return __xfs_get_blocks(inode, iblock, bh_result, create, false, false);
1509} 1551}
1510 1552
1511int 1553int
@@ -1515,7 +1557,17 @@ xfs_get_blocks_direct(
1515 struct buffer_head *bh_result, 1557 struct buffer_head *bh_result,
1516 int create) 1558 int create)
1517{ 1559{
1518 return __xfs_get_blocks(inode, iblock, bh_result, create, true); 1560 return __xfs_get_blocks(inode, iblock, bh_result, create, true, false);
1561}
1562
1563int
1564xfs_get_blocks_dax_fault(
1565 struct inode *inode,
1566 sector_t iblock,
1567 struct buffer_head *bh_result,
1568 int create)
1569{
1570 return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
1519} 1571}
1520 1572
1521static void 1573static void
@@ -1614,45 +1666,6 @@ xfs_end_io_direct_write(
1614 __xfs_end_io_direct_write(inode, ioend, offset, size); 1666 __xfs_end_io_direct_write(inode, ioend, offset, size);
1615} 1667}
1616 1668
1617/*
1618 * For DAX we need a mapping buffer callback for unwritten extent conversion
1619 * when page faults allocate blocks and then zero them. Note that in this
1620 * case the mapping indicated by the ioend may extend beyond EOF. We most
1621 * definitely do not want to extend EOF here, so we trim back the ioend size to
1622 * EOF.
1623 */
1624#ifdef CONFIG_FS_DAX
1625void
1626xfs_end_io_dax_write(
1627 struct buffer_head *bh,
1628 int uptodate)
1629{
1630 struct xfs_ioend *ioend = bh->b_private;
1631 struct inode *inode = ioend->io_inode;
1632 ssize_t size = ioend->io_size;
1633
1634 ASSERT(IS_DAX(ioend->io_inode));
1635
1636 /* if there was an error zeroing, then don't convert it */
1637 if (!uptodate)
1638 ioend->io_error = -EIO;
1639
1640 /*
1641 * Trim update to EOF, so we don't extend EOF during unwritten extent
1642 * conversion of partial EOF blocks.
1643 */
1644 spin_lock(&XFS_I(inode)->i_flags_lock);
1645 if (ioend->io_offset + size > i_size_read(inode))
1646 size = i_size_read(inode) - ioend->io_offset;
1647 spin_unlock(&XFS_I(inode)->i_flags_lock);
1648
1649 __xfs_end_io_direct_write(inode, ioend, ioend->io_offset, size);
1650
1651}
1652#else
1653void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate) { }
1654#endif
1655
1656static inline ssize_t 1669static inline ssize_t
1657xfs_vm_do_dio( 1670xfs_vm_do_dio(
1658 struct inode *inode, 1671 struct inode *inode,
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 86afd1ac7895..f6ffc9ae5ceb 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -58,7 +58,8 @@ int xfs_get_blocks(struct inode *inode, sector_t offset,
58 struct buffer_head *map_bh, int create); 58 struct buffer_head *map_bh, int create);
59int xfs_get_blocks_direct(struct inode *inode, sector_t offset, 59int xfs_get_blocks_direct(struct inode *inode, sector_t offset,
60 struct buffer_head *map_bh, int create); 60 struct buffer_head *map_bh, int create);
61void xfs_end_io_dax_write(struct buffer_head *bh, int uptodate); 61int xfs_get_blocks_dax_fault(struct inode *inode, sector_t offset,
62 struct buffer_head *map_bh, int create);
62 63
63extern void xfs_count_page_state(struct page *, int *, int *); 64extern void xfs_count_page_state(struct page *, int *, int *);
64 65
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 65fb37a18e92..0ef7c2ed3f8a 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -511,7 +511,7 @@ xfs_attr_list_int(
511 xfs_inode_t *dp = context->dp; 511 xfs_inode_t *dp = context->dp;
512 uint lock_mode; 512 uint lock_mode;
513 513
514 XFS_STATS_INC(xs_attr_list); 514 XFS_STATS_INC(dp->i_mount, xs_attr_list);
515 515
516 if (XFS_FORCED_SHUTDOWN(dp->i_mount)) 516 if (XFS_FORCED_SHUTDOWN(dp->i_mount))
517 return -EIO; 517 return -EIO;
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 3bf4ad0d19e4..dbae6490a79a 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -57,6 +57,35 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
57} 57}
58 58
59/* 59/*
60 * Routine to zero an extent on disk allocated to the specific inode.
61 *
62 * The VFS functions take a linearised filesystem block offset, so we have to
63 * convert the sparse xfs fsb to the right format first.
64 * VFS types are real funky, too.
65 */
66int
67xfs_zero_extent(
68 struct xfs_inode *ip,
69 xfs_fsblock_t start_fsb,
70 xfs_off_t count_fsb)
71{
72 struct xfs_mount *mp = ip->i_mount;
73 xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb);
74 sector_t block = XFS_BB_TO_FSBT(mp, sector);
75 ssize_t size = XFS_FSB_TO_B(mp, count_fsb);
76
77 if (IS_DAX(VFS_I(ip)))
78 return dax_clear_blocks(VFS_I(ip), block, size);
79
80 /*
81 * let the block layer decide on the fastest method of
82 * implementing the zeroing.
83 */
84 return sb_issue_zeroout(mp->m_super, block, count_fsb, GFP_NOFS);
85
86}
87
88/*
60 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi 89 * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
61 * caller. Frees all the extents that need freeing, which must be done 90 * caller. Frees all the extents that need freeing, which must be done
62 * last due to locking considerations. We never free any extents in 91 * last due to locking considerations. We never free any extents in
@@ -229,6 +258,13 @@ xfs_bmap_rtalloc(
229 xfs_trans_mod_dquot_byino(ap->tp, ap->ip, 258 xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
230 ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT : 259 ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
231 XFS_TRANS_DQ_RTBCOUNT, (long) ralen); 260 XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
261
262 /* Zero the extent if we were asked to do so */
263 if (ap->userdata & XFS_ALLOC_USERDATA_ZERO) {
264 error = xfs_zero_extent(ap->ip, ap->blkno, ap->length);
265 if (error)
266 return error;
267 }
232 } else { 268 } else {
233 ap->length = 0; 269 ap->length = 0;
234 } 270 }
@@ -1027,7 +1063,7 @@ xfs_alloc_file_space(
1027 xfs_bmap_init(&free_list, &firstfsb); 1063 xfs_bmap_init(&free_list, &firstfsb);
1028 error = xfs_bmapi_write(tp, ip, startoffset_fsb, 1064 error = xfs_bmapi_write(tp, ip, startoffset_fsb,
1029 allocatesize_fsb, alloc_type, &firstfsb, 1065 allocatesize_fsb, alloc_type, &firstfsb,
1030 0, imapp, &nimaps, &free_list); 1066 resblks, imapp, &nimaps, &free_list);
1031 if (error) { 1067 if (error) {
1032 goto error0; 1068 goto error0;
1033 } 1069 }
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8ecffb35935b..3243cdf97f33 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -201,7 +201,7 @@ _xfs_buf_alloc(
201 atomic_set(&bp->b_pin_count, 0); 201 atomic_set(&bp->b_pin_count, 0);
202 init_waitqueue_head(&bp->b_waiters); 202 init_waitqueue_head(&bp->b_waiters);
203 203
204 XFS_STATS_INC(xb_create); 204 XFS_STATS_INC(target->bt_mount, xb_create);
205 trace_xfs_buf_init(bp, _RET_IP_); 205 trace_xfs_buf_init(bp, _RET_IP_);
206 206
207 return bp; 207 return bp;
@@ -354,15 +354,16 @@ retry:
354 */ 354 */
355 if (!(++retries % 100)) 355 if (!(++retries % 100))
356 xfs_err(NULL, 356 xfs_err(NULL,
357 "possible memory allocation deadlock in %s (mode:0x%x)", 357 "%s(%u) possible memory allocation deadlock in %s (mode:0x%x)",
358 current->comm, current->pid,
358 __func__, gfp_mask); 359 __func__, gfp_mask);
359 360
360 XFS_STATS_INC(xb_page_retries); 361 XFS_STATS_INC(bp->b_target->bt_mount, xb_page_retries);
361 congestion_wait(BLK_RW_ASYNC, HZ/50); 362 congestion_wait(BLK_RW_ASYNC, HZ/50);
362 goto retry; 363 goto retry;
363 } 364 }
364 365
365 XFS_STATS_INC(xb_page_found); 366 XFS_STATS_INC(bp->b_target->bt_mount, xb_page_found);
366 367
367 nbytes = min_t(size_t, size, PAGE_SIZE - offset); 368 nbytes = min_t(size_t, size, PAGE_SIZE - offset);
368 size -= nbytes; 369 size -= nbytes;
@@ -516,7 +517,7 @@ _xfs_buf_find(
516 new_bp->b_pag = pag; 517 new_bp->b_pag = pag;
517 spin_unlock(&pag->pag_buf_lock); 518 spin_unlock(&pag->pag_buf_lock);
518 } else { 519 } else {
519 XFS_STATS_INC(xb_miss_locked); 520 XFS_STATS_INC(btp->bt_mount, xb_miss_locked);
520 spin_unlock(&pag->pag_buf_lock); 521 spin_unlock(&pag->pag_buf_lock);
521 xfs_perag_put(pag); 522 xfs_perag_put(pag);
522 } 523 }
@@ -529,11 +530,11 @@ found:
529 if (!xfs_buf_trylock(bp)) { 530 if (!xfs_buf_trylock(bp)) {
530 if (flags & XBF_TRYLOCK) { 531 if (flags & XBF_TRYLOCK) {
531 xfs_buf_rele(bp); 532 xfs_buf_rele(bp);
532 XFS_STATS_INC(xb_busy_locked); 533 XFS_STATS_INC(btp->bt_mount, xb_busy_locked);
533 return NULL; 534 return NULL;
534 } 535 }
535 xfs_buf_lock(bp); 536 xfs_buf_lock(bp);
536 XFS_STATS_INC(xb_get_locked_waited); 537 XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited);
537 } 538 }
538 539
539 /* 540 /*
@@ -549,7 +550,7 @@ found:
549 } 550 }
550 551
551 trace_xfs_buf_find(bp, flags, _RET_IP_); 552 trace_xfs_buf_find(bp, flags, _RET_IP_);
552 XFS_STATS_INC(xb_get_locked); 553 XFS_STATS_INC(btp->bt_mount, xb_get_locked);
553 return bp; 554 return bp;
554} 555}
555 556
@@ -603,7 +604,7 @@ found:
603 } 604 }
604 } 605 }
605 606
606 XFS_STATS_INC(xb_get); 607 XFS_STATS_INC(target->bt_mount, xb_get);
607 trace_xfs_buf_get(bp, flags, _RET_IP_); 608 trace_xfs_buf_get(bp, flags, _RET_IP_);
608 return bp; 609 return bp;
609} 610}
@@ -643,7 +644,7 @@ xfs_buf_read_map(
643 trace_xfs_buf_read(bp, flags, _RET_IP_); 644 trace_xfs_buf_read(bp, flags, _RET_IP_);
644 645
645 if (!XFS_BUF_ISDONE(bp)) { 646 if (!XFS_BUF_ISDONE(bp)) {
646 XFS_STATS_INC(xb_get_read); 647 XFS_STATS_INC(target->bt_mount, xb_get_read);
647 bp->b_ops = ops; 648 bp->b_ops = ops;
648 _xfs_buf_read(bp, flags); 649 _xfs_buf_read(bp, flags);
649 } else if (flags & XBF_ASYNC) { 650 } else if (flags & XBF_ASYNC) {
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index a989a9c7edb7..642d55d10075 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -666,7 +666,7 @@ xfs_readdir(
666 return -EIO; 666 return -EIO;
667 667
668 ASSERT(S_ISDIR(dp->i_d.di_mode)); 668 ASSERT(S_ISDIR(dp->i_d.di_mode));
669 XFS_STATS_INC(xs_dir_getdents); 669 XFS_STATS_INC(dp->i_mount, xs_dir_getdents);
670 670
671 args.dp = dp; 671 args.dp = dp;
672 args.geo = dp->i_mount->m_dir_geo; 672 args.geo = dp->i_mount->m_dir_geo;
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 30cb3afb67f0..7ac6c5c586cb 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -75,9 +75,9 @@ xfs_qm_dqdestroy(
75 ASSERT(list_empty(&dqp->q_lru)); 75 ASSERT(list_empty(&dqp->q_lru));
76 76
77 mutex_destroy(&dqp->q_qlock); 77 mutex_destroy(&dqp->q_qlock);
78 kmem_zone_free(xfs_qm_dqzone, dqp);
79 78
80 XFS_STATS_DEC(xs_qm_dquot); 79 XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot);
80 kmem_zone_free(xfs_qm_dqzone, dqp);
81} 81}
82 82
83/* 83/*
@@ -605,7 +605,7 @@ xfs_qm_dqread(
605 break; 605 break;
606 } 606 }
607 607
608 XFS_STATS_INC(xs_qm_dquot); 608 XFS_STATS_INC(mp, xs_qm_dquot);
609 609
610 trace_xfs_dqread(dqp); 610 trace_xfs_dqread(dqp);
611 611
@@ -747,12 +747,12 @@ restart:
747 mutex_unlock(&qi->qi_tree_lock); 747 mutex_unlock(&qi->qi_tree_lock);
748 748
749 trace_xfs_dqget_hit(dqp); 749 trace_xfs_dqget_hit(dqp);
750 XFS_STATS_INC(xs_qm_dqcachehits); 750 XFS_STATS_INC(mp, xs_qm_dqcachehits);
751 *O_dqpp = dqp; 751 *O_dqpp = dqp;
752 return 0; 752 return 0;
753 } 753 }
754 mutex_unlock(&qi->qi_tree_lock); 754 mutex_unlock(&qi->qi_tree_lock);
755 XFS_STATS_INC(xs_qm_dqcachemisses); 755 XFS_STATS_INC(mp, xs_qm_dqcachemisses);
756 756
757 /* 757 /*
758 * Dquot cache miss. We don't want to keep the inode lock across 758 * Dquot cache miss. We don't want to keep the inode lock across
@@ -806,7 +806,7 @@ restart:
806 mutex_unlock(&qi->qi_tree_lock); 806 mutex_unlock(&qi->qi_tree_lock);
807 trace_xfs_dqget_dup(dqp); 807 trace_xfs_dqget_dup(dqp);
808 xfs_qm_dqdestroy(dqp); 808 xfs_qm_dqdestroy(dqp);
809 XFS_STATS_INC(xs_qm_dquot_dups); 809 XFS_STATS_INC(mp, xs_qm_dquot_dups);
810 goto restart; 810 goto restart;
811 } 811 }
812 812
@@ -846,7 +846,7 @@ xfs_qm_dqput(
846 trace_xfs_dqput_free(dqp); 846 trace_xfs_dqput_free(dqp);
847 847
848 if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) 848 if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
849 XFS_STATS_INC(xs_qm_dquot_unused); 849 XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused);
850 } 850 }
851 xfs_dqunlock(dqp); 851 xfs_dqunlock(dqp);
852} 852}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f80e90f95ad8..f5392ab2def1 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -242,19 +242,30 @@ xfs_file_fsync(
242 } 242 }
243 243
244 /* 244 /*
245 * All metadata updates are logged, which means that we just have 245 * All metadata updates are logged, which means that we just have to
246 * to flush the log up to the latest LSN that touched the inode. 246 * flush the log up to the latest LSN that touched the inode. If we have
247 * concurrent fsync/fdatasync() calls, we need them to all block on the
248 * log force before we clear the ili_fsync_fields field. This ensures
249 * that we don't get a racing sync operation that does not wait for the
250 * metadata to hit the journal before returning. If we race with
251 * clearing the ili_fsync_fields, then all that will happen is the log
252 * force will do nothing as the lsn will already be on disk. We can't
253 * race with setting ili_fsync_fields because that is done under
254 * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared
255 * until after the ili_fsync_fields is cleared.
247 */ 256 */
248 xfs_ilock(ip, XFS_ILOCK_SHARED); 257 xfs_ilock(ip, XFS_ILOCK_SHARED);
249 if (xfs_ipincount(ip)) { 258 if (xfs_ipincount(ip)) {
250 if (!datasync || 259 if (!datasync ||
251 (ip->i_itemp->ili_fields & ~XFS_ILOG_TIMESTAMP)) 260 (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
252 lsn = ip->i_itemp->ili_last_lsn; 261 lsn = ip->i_itemp->ili_last_lsn;
253 } 262 }
254 xfs_iunlock(ip, XFS_ILOCK_SHARED);
255 263
256 if (lsn) 264 if (lsn) {
257 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); 265 error = _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed);
266 ip->i_itemp->ili_fsync_fields = 0;
267 }
268 xfs_iunlock(ip, XFS_ILOCK_SHARED);
258 269
259 /* 270 /*
260 * If we only have a single device, and the log force about was 271 * If we only have a single device, and the log force about was
@@ -287,7 +298,7 @@ xfs_file_read_iter(
287 xfs_fsize_t n; 298 xfs_fsize_t n;
288 loff_t pos = iocb->ki_pos; 299 loff_t pos = iocb->ki_pos;
289 300
290 XFS_STATS_INC(xs_read_calls); 301 XFS_STATS_INC(mp, xs_read_calls);
291 302
292 if (unlikely(iocb->ki_flags & IOCB_DIRECT)) 303 if (unlikely(iocb->ki_flags & IOCB_DIRECT))
293 ioflags |= XFS_IO_ISDIRECT; 304 ioflags |= XFS_IO_ISDIRECT;
@@ -365,7 +376,7 @@ xfs_file_read_iter(
365 376
366 ret = generic_file_read_iter(iocb, to); 377 ret = generic_file_read_iter(iocb, to);
367 if (ret > 0) 378 if (ret > 0)
368 XFS_STATS_ADD(xs_read_bytes, ret); 379 XFS_STATS_ADD(mp, xs_read_bytes, ret);
369 380
370 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); 381 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
371 return ret; 382 return ret;
@@ -383,7 +394,7 @@ xfs_file_splice_read(
383 int ioflags = 0; 394 int ioflags = 0;
384 ssize_t ret; 395 ssize_t ret;
385 396
386 XFS_STATS_INC(xs_read_calls); 397 XFS_STATS_INC(ip->i_mount, xs_read_calls);
387 398
388 if (infilp->f_mode & FMODE_NOCMTIME) 399 if (infilp->f_mode & FMODE_NOCMTIME)
389 ioflags |= XFS_IO_INVIS; 400 ioflags |= XFS_IO_INVIS;
@@ -401,7 +412,7 @@ xfs_file_splice_read(
401 else 412 else
402 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags); 413 ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
403 if (ret > 0) 414 if (ret > 0)
404 XFS_STATS_ADD(xs_read_bytes, ret); 415 XFS_STATS_ADD(ip->i_mount, xs_read_bytes, ret);
405 416
406 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED); 417 xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
407 return ret; 418 return ret;
@@ -482,6 +493,8 @@ xfs_zero_eof(
482 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); 493 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
483 ASSERT(offset > isize); 494 ASSERT(offset > isize);
484 495
496 trace_xfs_zero_eof(ip, isize, offset - isize);
497
485 /* 498 /*
486 * First handle zeroing the block on which isize resides. 499 * First handle zeroing the block on which isize resides.
487 * 500 *
@@ -574,6 +587,7 @@ xfs_file_aio_write_checks(
574 struct xfs_inode *ip = XFS_I(inode); 587 struct xfs_inode *ip = XFS_I(inode);
575 ssize_t error = 0; 588 ssize_t error = 0;
576 size_t count = iov_iter_count(from); 589 size_t count = iov_iter_count(from);
590 bool drained_dio = false;
577 591
578restart: 592restart:
579 error = generic_write_checks(iocb, from); 593 error = generic_write_checks(iocb, from);
@@ -611,12 +625,13 @@ restart:
611 bool zero = false; 625 bool zero = false;
612 626
613 spin_unlock(&ip->i_flags_lock); 627 spin_unlock(&ip->i_flags_lock);
614 if (*iolock == XFS_IOLOCK_SHARED) { 628 if (!drained_dio) {
615 xfs_rw_iunlock(ip, *iolock); 629 if (*iolock == XFS_IOLOCK_SHARED) {
616 *iolock = XFS_IOLOCK_EXCL; 630 xfs_rw_iunlock(ip, *iolock);
617 xfs_rw_ilock(ip, *iolock); 631 *iolock = XFS_IOLOCK_EXCL;
618 iov_iter_reexpand(from, count); 632 xfs_rw_ilock(ip, *iolock);
619 633 iov_iter_reexpand(from, count);
634 }
620 /* 635 /*
621 * We now have an IO submission barrier in place, but 636 * We now have an IO submission barrier in place, but
622 * AIO can do EOF updates during IO completion and hence 637 * AIO can do EOF updates during IO completion and hence
@@ -626,6 +641,7 @@ restart:
626 * no-op. 641 * no-op.
627 */ 642 */
628 inode_dio_wait(inode); 643 inode_dio_wait(inode);
644 drained_dio = true;
629 goto restart; 645 goto restart;
630 } 646 }
631 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero); 647 error = xfs_zero_eof(ip, iocb->ki_pos, i_size_read(inode), &zero);
@@ -867,7 +883,7 @@ xfs_file_write_iter(
867 ssize_t ret; 883 ssize_t ret;
868 size_t ocount = iov_iter_count(from); 884 size_t ocount = iov_iter_count(from);
869 885
870 XFS_STATS_INC(xs_write_calls); 886 XFS_STATS_INC(ip->i_mount, xs_write_calls);
871 887
872 if (ocount == 0) 888 if (ocount == 0)
873 return 0; 889 return 0;
@@ -883,7 +899,7 @@ xfs_file_write_iter(
883 if (ret > 0) { 899 if (ret > 0) {
884 ssize_t err; 900 ssize_t err;
885 901
886 XFS_STATS_ADD(xs_write_bytes, ret); 902 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret);
887 903
888 /* Handle various SYNC-type writes */ 904 /* Handle various SYNC-type writes */
889 err = generic_write_sync(file, iocb->ki_pos - ret, ret); 905 err = generic_write_sync(file, iocb->ki_pos - ret, ret);
@@ -1477,7 +1493,7 @@ xfs_file_llseek(
1477 * 1493 *
1478 * mmap_sem (MM) 1494 * mmap_sem (MM)
1479 * sb_start_pagefault(vfs, freeze) 1495 * sb_start_pagefault(vfs, freeze)
1480 * i_mmap_lock (XFS - truncate serialisation) 1496 * i_mmaplock (XFS - truncate serialisation)
1481 * page_lock (MM) 1497 * page_lock (MM)
1482 * i_lock (XFS - extent map serialisation) 1498 * i_lock (XFS - extent map serialisation)
1483 */ 1499 */
@@ -1503,8 +1519,7 @@ xfs_filemap_page_mkwrite(
1503 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1519 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1504 1520
1505 if (IS_DAX(inode)) { 1521 if (IS_DAX(inode)) {
1506 ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_direct, 1522 ret = __dax_mkwrite(vma, vmf, xfs_get_blocks_dax_fault, NULL);
1507 xfs_end_io_dax_write);
1508 } else { 1523 } else {
1509 ret = block_page_mkwrite(vma, vmf, xfs_get_blocks); 1524 ret = block_page_mkwrite(vma, vmf, xfs_get_blocks);
1510 ret = block_page_mkwrite_return(ret); 1525 ret = block_page_mkwrite_return(ret);
@@ -1538,7 +1553,7 @@ xfs_filemap_fault(
1538 * changes to xfs_get_blocks_direct() to map unwritten extent 1553 * changes to xfs_get_blocks_direct() to map unwritten extent
1539 * ioend for conversion on read-only mappings. 1554 * ioend for conversion on read-only mappings.
1540 */ 1555 */
1541 ret = __dax_fault(vma, vmf, xfs_get_blocks_direct, NULL); 1556 ret = __dax_fault(vma, vmf, xfs_get_blocks_dax_fault, NULL);
1542 } else 1557 } else
1543 ret = filemap_fault(vma, vmf); 1558 ret = filemap_fault(vma, vmf);
1544 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1559 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
@@ -1546,6 +1561,13 @@ xfs_filemap_fault(
1546 return ret; 1561 return ret;
1547} 1562}
1548 1563
1564/*
1565 * Similar to xfs_filemap_fault(), the DAX fault path can call into here on
1566 * both read and write faults. Hence we need to handle both cases. There is no
1567 * ->pmd_mkwrite callout for huge pages, so we have a single function here to
1568 * handle both cases here. @flags carries the information on the type of fault
1569 * occuring.
1570 */
1549STATIC int 1571STATIC int
1550xfs_filemap_pmd_fault( 1572xfs_filemap_pmd_fault(
1551 struct vm_area_struct *vma, 1573 struct vm_area_struct *vma,
@@ -1562,15 +1584,54 @@ xfs_filemap_pmd_fault(
1562 1584
1563 trace_xfs_filemap_pmd_fault(ip); 1585 trace_xfs_filemap_pmd_fault(ip);
1564 1586
1565 sb_start_pagefault(inode->i_sb); 1587 if (flags & FAULT_FLAG_WRITE) {
1566 file_update_time(vma->vm_file); 1588 sb_start_pagefault(inode->i_sb);
1589 file_update_time(vma->vm_file);
1590 }
1591
1567 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1592 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1568 ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_direct, 1593 ret = __dax_pmd_fault(vma, addr, pmd, flags, xfs_get_blocks_dax_fault,
1569 xfs_end_io_dax_write); 1594 NULL);
1570 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1595 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
1571 sb_end_pagefault(inode->i_sb);
1572 1596
1597 if (flags & FAULT_FLAG_WRITE)
1598 sb_end_pagefault(inode->i_sb);
1599
1600 return ret;
1601}
1602
1603/*
1604 * pfn_mkwrite was originally inteneded to ensure we capture time stamp
1605 * updates on write faults. In reality, it's need to serialise against
1606 * truncate similar to page_mkwrite. Hence we open-code dax_pfn_mkwrite()
1607 * here and cycle the XFS_MMAPLOCK_SHARED to ensure we serialise the fault
1608 * barrier in place.
1609 */
1610static int
1611xfs_filemap_pfn_mkwrite(
1612 struct vm_area_struct *vma,
1613 struct vm_fault *vmf)
1614{
1615
1616 struct inode *inode = file_inode(vma->vm_file);
1617 struct xfs_inode *ip = XFS_I(inode);
1618 int ret = VM_FAULT_NOPAGE;
1619 loff_t size;
1620
1621 trace_xfs_filemap_pfn_mkwrite(ip);
1622
1623 sb_start_pagefault(inode->i_sb);
1624 file_update_time(vma->vm_file);
1625
1626 /* check if the faulting page hasn't raced with truncate */
1627 xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
1628 size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
1629 if (vmf->pgoff >= size)
1630 ret = VM_FAULT_SIGBUS;
1631 xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
1632 sb_end_pagefault(inode->i_sb);
1573 return ret; 1633 return ret;
1634
1574} 1635}
1575 1636
1576static const struct vm_operations_struct xfs_file_vm_ops = { 1637static const struct vm_operations_struct xfs_file_vm_ops = {
@@ -1578,6 +1639,7 @@ static const struct vm_operations_struct xfs_file_vm_ops = {
1578 .pmd_fault = xfs_filemap_pmd_fault, 1639 .pmd_fault = xfs_filemap_pmd_fault,
1579 .map_pages = filemap_map_pages, 1640 .map_pages = filemap_map_pages,
1580 .page_mkwrite = xfs_filemap_page_mkwrite, 1641 .page_mkwrite = xfs_filemap_page_mkwrite,
1642 .pfn_mkwrite = xfs_filemap_pfn_mkwrite,
1581}; 1643};
1582 1644
1583STATIC int 1645STATIC int
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 0a326bd64d4e..d7a490f24ead 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -63,7 +63,7 @@ xfs_inode_alloc(
63 return NULL; 63 return NULL;
64 } 64 }
65 65
66 XFS_STATS_INC(vn_active); 66 XFS_STATS_INC(mp, vn_active);
67 ASSERT(atomic_read(&ip->i_pincount) == 0); 67 ASSERT(atomic_read(&ip->i_pincount) == 0);
68 ASSERT(!spin_is_locked(&ip->i_flags_lock)); 68 ASSERT(!spin_is_locked(&ip->i_flags_lock));
69 ASSERT(!xfs_isiflocked(ip)); 69 ASSERT(!xfs_isiflocked(ip));
@@ -129,7 +129,7 @@ xfs_inode_free(
129 /* asserts to verify all state is correct here */ 129 /* asserts to verify all state is correct here */
130 ASSERT(atomic_read(&ip->i_pincount) == 0); 130 ASSERT(atomic_read(&ip->i_pincount) == 0);
131 ASSERT(!xfs_isiflocked(ip)); 131 ASSERT(!xfs_isiflocked(ip));
132 XFS_STATS_DEC(vn_active); 132 XFS_STATS_DEC(ip->i_mount, vn_active);
133 133
134 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback); 134 call_rcu(&VFS_I(ip)->i_rcu, xfs_inode_free_callback);
135} 135}
@@ -159,7 +159,7 @@ xfs_iget_cache_hit(
159 spin_lock(&ip->i_flags_lock); 159 spin_lock(&ip->i_flags_lock);
160 if (ip->i_ino != ino) { 160 if (ip->i_ino != ino) {
161 trace_xfs_iget_skip(ip); 161 trace_xfs_iget_skip(ip);
162 XFS_STATS_INC(xs_ig_frecycle); 162 XFS_STATS_INC(mp, xs_ig_frecycle);
163 error = -EAGAIN; 163 error = -EAGAIN;
164 goto out_error; 164 goto out_error;
165 } 165 }
@@ -177,7 +177,7 @@ xfs_iget_cache_hit(
177 */ 177 */
178 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) { 178 if (ip->i_flags & (XFS_INEW|XFS_IRECLAIM)) {
179 trace_xfs_iget_skip(ip); 179 trace_xfs_iget_skip(ip);
180 XFS_STATS_INC(xs_ig_frecycle); 180 XFS_STATS_INC(mp, xs_ig_frecycle);
181 error = -EAGAIN; 181 error = -EAGAIN;
182 goto out_error; 182 goto out_error;
183 } 183 }
@@ -259,7 +259,7 @@ xfs_iget_cache_hit(
259 xfs_ilock(ip, lock_flags); 259 xfs_ilock(ip, lock_flags);
260 260
261 xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE); 261 xfs_iflags_clear(ip, XFS_ISTALE | XFS_IDONTCACHE);
262 XFS_STATS_INC(xs_ig_found); 262 XFS_STATS_INC(mp, xs_ig_found);
263 263
264 return 0; 264 return 0;
265 265
@@ -342,7 +342,7 @@ xfs_iget_cache_miss(
342 error = radix_tree_insert(&pag->pag_ici_root, agino, ip); 342 error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
343 if (unlikely(error)) { 343 if (unlikely(error)) {
344 WARN_ON(error != -EEXIST); 344 WARN_ON(error != -EEXIST);
345 XFS_STATS_INC(xs_ig_dup); 345 XFS_STATS_INC(mp, xs_ig_dup);
346 error = -EAGAIN; 346 error = -EAGAIN;
347 goto out_preload_end; 347 goto out_preload_end;
348 } 348 }
@@ -412,7 +412,7 @@ xfs_iget(
412 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) 412 if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
413 return -EINVAL; 413 return -EINVAL;
414 414
415 XFS_STATS_INC(xs_ig_attempts); 415 XFS_STATS_INC(mp, xs_ig_attempts);
416 416
417 /* get the perag structure and ensure that it's inode capable */ 417 /* get the perag structure and ensure that it's inode capable */
418 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); 418 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino));
@@ -429,7 +429,7 @@ again:
429 goto out_error_or_again; 429 goto out_error_or_again;
430 } else { 430 } else {
431 rcu_read_unlock(); 431 rcu_read_unlock();
432 XFS_STATS_INC(xs_ig_missed); 432 XFS_STATS_INC(mp, xs_ig_missed);
433 433
434 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip, 434 error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
435 flags, lock_flags); 435 flags, lock_flags);
@@ -965,7 +965,7 @@ reclaim:
965 xfs_ifunlock(ip); 965 xfs_ifunlock(ip);
966 xfs_iunlock(ip, XFS_ILOCK_EXCL); 966 xfs_iunlock(ip, XFS_ILOCK_EXCL);
967 967
968 XFS_STATS_INC(xs_ig_reclaims); 968 XFS_STATS_INC(ip->i_mount, xs_ig_reclaims);
969 /* 969 /*
970 * Remove the inode from the per-AG radix tree. 970 * Remove the inode from the per-AG radix tree.
971 * 971 *
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index dc40a6d5ae0d..8ee393996b7d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2365,6 +2365,7 @@ retry:
2365 2365
2366 iip->ili_last_fields = iip->ili_fields; 2366 iip->ili_last_fields = iip->ili_fields;
2367 iip->ili_fields = 0; 2367 iip->ili_fields = 0;
2368 iip->ili_fsync_fields = 0;
2368 iip->ili_logged = 1; 2369 iip->ili_logged = 1;
2369 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 2370 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
2370 &iip->ili_item.li_lsn); 2371 &iip->ili_item.li_lsn);
@@ -3271,8 +3272,8 @@ xfs_iflush_cluster(
3271 } 3272 }
3272 3273
3273 if (clcount) { 3274 if (clcount) {
3274 XFS_STATS_INC(xs_icluster_flushcnt); 3275 XFS_STATS_INC(mp, xs_icluster_flushcnt);
3275 XFS_STATS_ADD(xs_icluster_flushinode, clcount); 3276 XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount);
3276 } 3277 }
3277 3278
3278out_free: 3279out_free:
@@ -3345,7 +3346,7 @@ xfs_iflush(
3345 struct xfs_dinode *dip; 3346 struct xfs_dinode *dip;
3346 int error; 3347 int error;
3347 3348
3348 XFS_STATS_INC(xs_iflush_count); 3349 XFS_STATS_INC(mp, xs_iflush_count);
3349 3350
3350 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); 3351 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
3351 ASSERT(xfs_isiflocked(ip)); 3352 ASSERT(xfs_isiflocked(ip));
@@ -3560,6 +3561,7 @@ xfs_iflush_int(
3560 */ 3561 */
3561 iip->ili_last_fields = iip->ili_fields; 3562 iip->ili_last_fields = iip->ili_fields;
3562 iip->ili_fields = 0; 3563 iip->ili_fields = 0;
3564 iip->ili_fsync_fields = 0;
3563 iip->ili_logged = 1; 3565 iip->ili_logged = 1;
3564 3566
3565 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn, 3567 xfs_trans_ail_copy_lsn(mp->m_ail, &iip->ili_flush_lsn,
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 62bd80f4edd9..d14b12b8cfef 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -719,6 +719,7 @@ xfs_iflush_abort(
719 * attempted. 719 * attempted.
720 */ 720 */
721 iip->ili_fields = 0; 721 iip->ili_fields = 0;
722 iip->ili_fsync_fields = 0;
722 } 723 }
723 /* 724 /*
724 * Release the inode's flush lock since we're done with it. 725 * Release the inode's flush lock since we're done with it.
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 488d81254e28..4c7722e325b3 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -34,6 +34,7 @@ typedef struct xfs_inode_log_item {
34 unsigned short ili_logged; /* flushed logged data */ 34 unsigned short ili_logged; /* flushed logged data */
35 unsigned int ili_last_fields; /* fields when flushed */ 35 unsigned int ili_last_fields; /* fields when flushed */
36 unsigned int ili_fields; /* fields to be logged */ 36 unsigned int ili_fields; /* fields to be logged */
37 unsigned int ili_fsync_fields; /* logged since last fsync */
37} xfs_inode_log_item_t; 38} xfs_inode_log_item_t;
38 39
39static inline int xfs_inode_clean(xfs_inode_t *ip) 40static inline int xfs_inode_clean(xfs_inode_t *ip)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index ea7d85af5310..d42738deec6d 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -40,6 +40,7 @@
40#include "xfs_symlink.h" 40#include "xfs_symlink.h"
41#include "xfs_trans.h" 41#include "xfs_trans.h"
42#include "xfs_pnfs.h" 42#include "xfs_pnfs.h"
43#include "xfs_acl.h"
43 44
44#include <linux/capability.h> 45#include <linux/capability.h>
45#include <linux/dcache.h> 46#include <linux/dcache.h>
@@ -411,7 +412,7 @@ xfs_attrlist_by_handle(
411 if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) 412 if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
412 return -EFAULT; 413 return -EFAULT;
413 if (al_hreq.buflen < sizeof(struct attrlist) || 414 if (al_hreq.buflen < sizeof(struct attrlist) ||
414 al_hreq.buflen > XATTR_LIST_MAX) 415 al_hreq.buflen > XFS_XATTR_LIST_MAX)
415 return -EINVAL; 416 return -EINVAL;
416 417
417 /* 418 /*
@@ -455,7 +456,7 @@ xfs_attrmulti_attr_get(
455 unsigned char *kbuf; 456 unsigned char *kbuf;
456 int error = -EFAULT; 457 int error = -EFAULT;
457 458
458 if (*len > XATTR_SIZE_MAX) 459 if (*len > XFS_XATTR_SIZE_MAX)
459 return -EINVAL; 460 return -EINVAL;
460 kbuf = kmem_zalloc_large(*len, KM_SLEEP); 461 kbuf = kmem_zalloc_large(*len, KM_SLEEP);
461 if (!kbuf) 462 if (!kbuf)
@@ -482,17 +483,22 @@ xfs_attrmulti_attr_set(
482 __uint32_t flags) 483 __uint32_t flags)
483{ 484{
484 unsigned char *kbuf; 485 unsigned char *kbuf;
486 int error;
485 487
486 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 488 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
487 return -EPERM; 489 return -EPERM;
488 if (len > XATTR_SIZE_MAX) 490 if (len > XFS_XATTR_SIZE_MAX)
489 return -EINVAL; 491 return -EINVAL;
490 492
491 kbuf = memdup_user(ubuf, len); 493 kbuf = memdup_user(ubuf, len);
492 if (IS_ERR(kbuf)) 494 if (IS_ERR(kbuf))
493 return PTR_ERR(kbuf); 495 return PTR_ERR(kbuf);
494 496
495 return xfs_attr_set(XFS_I(inode), name, kbuf, len, flags); 497 error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
498 if (!error)
499 xfs_forget_acl(inode, name, flags);
500 kfree(kbuf);
501 return error;
496} 502}
497 503
498int 504int
@@ -501,9 +507,14 @@ xfs_attrmulti_attr_remove(
501 unsigned char *name, 507 unsigned char *name,
502 __uint32_t flags) 508 __uint32_t flags)
503{ 509{
510 int error;
511
504 if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) 512 if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
505 return -EPERM; 513 return -EPERM;
506 return xfs_attr_remove(XFS_I(inode), name, flags); 514 error = xfs_attr_remove(XFS_I(inode), name, flags);
515 if (!error)
516 xfs_forget_acl(inode, name, flags);
517 return error;
507} 518}
508 519
509STATIC int 520STATIC int
@@ -1028,7 +1039,7 @@ xfs_ioctl_setattr_xflags(
1028 xfs_diflags_to_linux(ip); 1039 xfs_diflags_to_linux(ip);
1029 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); 1040 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
1030 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 1041 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1031 XFS_STATS_INC(xs_ig_attrchg); 1042 XFS_STATS_INC(mp, xs_ig_attrchg);
1032 return 0; 1043 return 0;
1033} 1044}
1034 1045
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index b88bdc85dd3d..1a05d8ae327d 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -356,7 +356,7 @@ xfs_compat_attrlist_by_handle(
356 sizeof(compat_xfs_fsop_attrlist_handlereq_t))) 356 sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
357 return -EFAULT; 357 return -EFAULT;
358 if (al_hreq.buflen < sizeof(struct attrlist) || 358 if (al_hreq.buflen < sizeof(struct attrlist) ||
359 al_hreq.buflen > XATTR_LIST_MAX) 359 al_hreq.buflen > XFS_XATTR_LIST_MAX)
360 return -EINVAL; 360 return -EINVAL;
361 361
362 /* 362 /*
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 1f86033171c8..f4f5b43cf647 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -131,20 +131,30 @@ xfs_iomap_write_direct(
131 uint qblocks, resblks, resrtextents; 131 uint qblocks, resblks, resrtextents;
132 int committed; 132 int committed;
133 int error; 133 int error;
134 134 int lockmode;
135 error = xfs_qm_dqattach(ip, 0); 135 int bmapi_flags = XFS_BMAPI_PREALLOC;
136 if (error)
137 return error;
138 136
139 rt = XFS_IS_REALTIME_INODE(ip); 137 rt = XFS_IS_REALTIME_INODE(ip);
140 extsz = xfs_get_extsz_hint(ip); 138 extsz = xfs_get_extsz_hint(ip);
139 lockmode = XFS_ILOCK_SHARED; /* locked by caller */
140
141 ASSERT(xfs_isilocked(ip, lockmode));
141 142
142 offset_fsb = XFS_B_TO_FSBT(mp, offset); 143 offset_fsb = XFS_B_TO_FSBT(mp, offset);
143 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); 144 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
144 if ((offset + count) > XFS_ISIZE(ip)) { 145 if ((offset + count) > XFS_ISIZE(ip)) {
146 /*
147 * Assert that the in-core extent list is present since this can
148 * call xfs_iread_extents() and we only have the ilock shared.
149 * This should be safe because the lock was held around a bmapi
150 * call in the caller and we only need it to access the in-core
151 * list.
152 */
153 ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags &
154 XFS_IFEXTENTS);
145 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb); 155 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
146 if (error) 156 if (error)
147 return error; 157 goto out_unlock;
148 } else { 158 } else {
149 if (nmaps && (imap->br_startblock == HOLESTARTBLOCK)) 159 if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
150 last_fsb = MIN(last_fsb, (xfs_fileoff_t) 160 last_fsb = MIN(last_fsb, (xfs_fileoff_t)
@@ -174,9 +184,35 @@ xfs_iomap_write_direct(
174 } 184 }
175 185
176 /* 186 /*
187 * Drop the shared lock acquired by the caller, attach the dquot if
188 * necessary and move on to transaction setup.
189 */
190 xfs_iunlock(ip, lockmode);
191 error = xfs_qm_dqattach(ip, 0);
192 if (error)
193 return error;
194
195 /*
177 * Allocate and setup the transaction 196 * Allocate and setup the transaction
178 */ 197 */
179 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); 198 tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
199
200 /*
201 * For DAX, we do not allocate unwritten extents, but instead we zero
202 * the block before we commit the transaction. Ideally we'd like to do
203 * this outside the transaction context, but if we commit and then crash
204 * we may not have zeroed the blocks and this will be exposed on
205 * recovery of the allocation. Hence we must zero before commit.
206 * Further, if we are mapping unwritten extents here, we need to zero
207 * and convert them to written so that we don't need an unwritten extent
208 * callback for DAX. This also means that we need to be able to dip into
209 * the reserve block pool if there is no space left but we need to do
210 * unwritten extent conversion.
211 */
212 if (IS_DAX(VFS_I(ip))) {
213 bmapi_flags = XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO;
214 tp->t_flags |= XFS_TRANS_RESERVE;
215 }
180 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, 216 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
181 resblks, resrtextents); 217 resblks, resrtextents);
182 /* 218 /*
@@ -187,7 +223,8 @@ xfs_iomap_write_direct(
187 return error; 223 return error;
188 } 224 }
189 225
190 xfs_ilock(ip, XFS_ILOCK_EXCL); 226 lockmode = XFS_ILOCK_EXCL;
227 xfs_ilock(ip, lockmode);
191 228
192 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag); 229 error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
193 if (error) 230 if (error)
@@ -202,8 +239,8 @@ xfs_iomap_write_direct(
202 xfs_bmap_init(&free_list, &firstfsb); 239 xfs_bmap_init(&free_list, &firstfsb);
203 nimaps = 1; 240 nimaps = 1;
204 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 241 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
205 XFS_BMAPI_PREALLOC, &firstfsb, 0, 242 bmapi_flags, &firstfsb, resblks, imap,
206 imap, &nimaps, &free_list); 243 &nimaps, &free_list);
207 if (error) 244 if (error)
208 goto out_bmap_cancel; 245 goto out_bmap_cancel;
209 246
@@ -213,6 +250,7 @@ xfs_iomap_write_direct(
213 error = xfs_bmap_finish(&tp, &free_list, &committed); 250 error = xfs_bmap_finish(&tp, &free_list, &committed);
214 if (error) 251 if (error)
215 goto out_bmap_cancel; 252 goto out_bmap_cancel;
253
216 error = xfs_trans_commit(tp); 254 error = xfs_trans_commit(tp);
217 if (error) 255 if (error)
218 goto out_unlock; 256 goto out_unlock;
@@ -229,7 +267,7 @@ xfs_iomap_write_direct(
229 error = xfs_alert_fsblock_zero(ip, imap); 267 error = xfs_alert_fsblock_zero(ip, imap);
230 268
231out_unlock: 269out_unlock:
232 xfs_iunlock(ip, XFS_ILOCK_EXCL); 270 xfs_iunlock(ip, lockmode);
233 return error; 271 return error;
234 272
235out_bmap_cancel: 273out_bmap_cancel:
@@ -670,7 +708,7 @@ xfs_iomap_write_allocate(
670 count_fsb = imap->br_blockcount; 708 count_fsb = imap->br_blockcount;
671 map_start_fsb = imap->br_startoff; 709 map_start_fsb = imap->br_startoff;
672 710
673 XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb)); 711 XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
674 712
675 while (count_fsb != 0) { 713 while (count_fsb != 0) {
676 /* 714 /*
@@ -750,9 +788,9 @@ xfs_iomap_write_allocate(
750 * pointer that the caller gave to us. 788 * pointer that the caller gave to us.
751 */ 789 */
752 error = xfs_bmapi_write(tp, ip, map_start_fsb, 790 error = xfs_bmapi_write(tp, ip, map_start_fsb,
753 count_fsb, 0, 791 count_fsb, 0, &first_block,
754 &first_block, 1, 792 nres, imap, &nimaps,
755 imap, &nimaps, &free_list); 793 &free_list);
756 if (error) 794 if (error)
757 goto trans_cancel; 795 goto trans_cancel;
758 796
@@ -777,7 +815,7 @@ xfs_iomap_write_allocate(
777 if ((offset_fsb >= imap->br_startoff) && 815 if ((offset_fsb >= imap->br_startoff) &&
778 (offset_fsb < (imap->br_startoff + 816 (offset_fsb < (imap->br_startoff +
779 imap->br_blockcount))) { 817 imap->br_blockcount))) {
780 XFS_STATS_INC(xs_xstrat_quick); 818 XFS_STATS_INC(mp, xs_xstrat_quick);
781 return 0; 819 return 0;
782 } 820 }
783 821
@@ -866,8 +904,8 @@ xfs_iomap_write_unwritten(
866 xfs_bmap_init(&free_list, &firstfsb); 904 xfs_bmap_init(&free_list, &firstfsb);
867 nimaps = 1; 905 nimaps = 1;
868 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, 906 error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
869 XFS_BMAPI_CONVERT, &firstfsb, 907 XFS_BMAPI_CONVERT, &firstfsb, resblks,
870 1, &imap, &nimaps, &free_list); 908 &imap, &nimaps, &free_list);
871 if (error) 909 if (error)
872 goto error_on_bmapi_transaction; 910 goto error_on_bmapi_transaction;
873 911
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 8294132e6a3c..245268a0cdf0 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -695,7 +695,7 @@ xfs_setattr_nonsize(
695 695
696 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 696 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
697 697
698 XFS_STATS_INC(xs_ig_attrchg); 698 XFS_STATS_INC(mp, xs_ig_attrchg);
699 699
700 if (mp->m_flags & XFS_MOUNT_WSYNC) 700 if (mp->m_flags & XFS_MOUNT_WSYNC)
701 xfs_trans_set_sync(tp); 701 xfs_trans_set_sync(tp);
@@ -922,7 +922,7 @@ xfs_setattr_size(
922 922
923 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 923 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
924 924
925 XFS_STATS_INC(xs_ig_attrchg); 925 XFS_STATS_INC(mp, xs_ig_attrchg);
926 926
927 if (mp->m_flags & XFS_MOUNT_WSYNC) 927 if (mp->m_flags & XFS_MOUNT_WSYNC)
928 xfs_trans_set_sync(tp); 928 xfs_trans_set_sync(tp);
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 85f883dd6207..ec0e239a0fa9 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -171,6 +171,13 @@ struct xfs_kobj {
171 struct completion complete; 171 struct completion complete;
172}; 172};
173 173
174struct xstats {
175 struct xfsstats __percpu *xs_stats;
176 struct xfs_kobj xs_kobj;
177};
178
179extern struct xstats xfsstats;
180
174/* Kernel uid/gid conversion. These are used to convert to/from the on disk 181/* Kernel uid/gid conversion. These are used to convert to/from the on disk
175 * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally. 182 * uid_t/gid_t types to the kuid_t/kgid_t types that the kernel uses internally.
176 * The conversion here is type only, the value will remain the same since we 183 * The conversion here is type only, the value will remain the same since we
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index aaadee0969c9..f52c72a1a06f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -268,7 +268,7 @@ xlog_grant_head_wait(
268 __set_current_state(TASK_UNINTERRUPTIBLE); 268 __set_current_state(TASK_UNINTERRUPTIBLE);
269 spin_unlock(&head->lock); 269 spin_unlock(&head->lock);
270 270
271 XFS_STATS_INC(xs_sleep_logspace); 271 XFS_STATS_INC(log->l_mp, xs_sleep_logspace);
272 272
273 trace_xfs_log_grant_sleep(log, tic); 273 trace_xfs_log_grant_sleep(log, tic);
274 schedule(); 274 schedule();
@@ -379,7 +379,7 @@ xfs_log_regrant(
379 if (XLOG_FORCED_SHUTDOWN(log)) 379 if (XLOG_FORCED_SHUTDOWN(log))
380 return -EIO; 380 return -EIO;
381 381
382 XFS_STATS_INC(xs_try_logspace); 382 XFS_STATS_INC(mp, xs_try_logspace);
383 383
384 /* 384 /*
385 * This is a new transaction on the ticket, so we need to change the 385 * This is a new transaction on the ticket, so we need to change the
@@ -448,7 +448,7 @@ xfs_log_reserve(
448 if (XLOG_FORCED_SHUTDOWN(log)) 448 if (XLOG_FORCED_SHUTDOWN(log))
449 return -EIO; 449 return -EIO;
450 450
451 XFS_STATS_INC(xs_try_logspace); 451 XFS_STATS_INC(mp, xs_try_logspace);
452 452
453 ASSERT(*ticp == NULL); 453 ASSERT(*ticp == NULL);
454 tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, 454 tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
@@ -1768,7 +1768,7 @@ xlog_sync(
1768 int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); 1768 int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
1769 int size; 1769 int size;
1770 1770
1771 XFS_STATS_INC(xs_log_writes); 1771 XFS_STATS_INC(log->l_mp, xs_log_writes);
1772 ASSERT(atomic_read(&iclog->ic_refcnt) == 0); 1772 ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
1773 1773
1774 /* Add for LR header */ 1774 /* Add for LR header */
@@ -1805,7 +1805,7 @@ xlog_sync(
1805 bp = iclog->ic_bp; 1805 bp = iclog->ic_bp;
1806 XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); 1806 XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
1807 1807
1808 XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); 1808 XFS_STATS_ADD(log->l_mp, xs_log_blocks, BTOBB(count));
1809 1809
1810 /* Do we need to split this write into 2 parts? */ 1810 /* Do we need to split this write into 2 parts? */
1811 if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { 1811 if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) {
@@ -2422,11 +2422,20 @@ xlog_write(
2422 &partial_copy_len); 2422 &partial_copy_len);
2423 xlog_verify_dest_ptr(log, ptr); 2423 xlog_verify_dest_ptr(log, ptr);
2424 2424
2425 /* copy region */ 2425 /*
2426 * Copy region.
2427 *
2428 * Unmount records just log an opheader, so can have
2429 * empty payloads with no data region to copy. Hence we
2430 * only copy the payload if the vector says it has data
2431 * to copy.
2432 */
2426 ASSERT(copy_len >= 0); 2433 ASSERT(copy_len >= 0);
2427 memcpy(ptr, reg->i_addr + copy_off, copy_len); 2434 if (copy_len > 0) {
2428 xlog_write_adv_cnt(&ptr, &len, &log_offset, copy_len); 2435 memcpy(ptr, reg->i_addr + copy_off, copy_len);
2429 2436 xlog_write_adv_cnt(&ptr, &len, &log_offset,
2437 copy_len);
2438 }
2430 copy_len += start_rec_copy + sizeof(xlog_op_header_t); 2439 copy_len += start_rec_copy + sizeof(xlog_op_header_t);
2431 record_cnt++; 2440 record_cnt++;
2432 data_cnt += contwr ? copy_len : 0; 2441 data_cnt += contwr ? copy_len : 0;
@@ -2913,7 +2922,7 @@ restart:
2913 2922
2914 iclog = log->l_iclog; 2923 iclog = log->l_iclog;
2915 if (iclog->ic_state != XLOG_STATE_ACTIVE) { 2924 if (iclog->ic_state != XLOG_STATE_ACTIVE) {
2916 XFS_STATS_INC(xs_log_noiclogs); 2925 XFS_STATS_INC(log->l_mp, xs_log_noiclogs);
2917 2926
2918 /* Wait for log writes to have flushed */ 2927 /* Wait for log writes to have flushed */
2919 xlog_wait(&log->l_flush_wait, &log->l_icloglock); 2928 xlog_wait(&log->l_flush_wait, &log->l_icloglock);
@@ -3165,11 +3174,19 @@ xlog_state_switch_iclogs(
3165 } 3174 }
3166 3175
3167 if (log->l_curr_block >= log->l_logBBsize) { 3176 if (log->l_curr_block >= log->l_logBBsize) {
3177 /*
3178 * Rewind the current block before the cycle is bumped to make
3179 * sure that the combined LSN never transiently moves forward
3180 * when the log wraps to the next cycle. This is to support the
3181 * unlocked sample of these fields from xlog_valid_lsn(). Most
3182 * other cases should acquire l_icloglock.
3183 */
3184 log->l_curr_block -= log->l_logBBsize;
3185 ASSERT(log->l_curr_block >= 0);
3186 smp_wmb();
3168 log->l_curr_cycle++; 3187 log->l_curr_cycle++;
3169 if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM) 3188 if (log->l_curr_cycle == XLOG_HEADER_MAGIC_NUM)
3170 log->l_curr_cycle++; 3189 log->l_curr_cycle++;
3171 log->l_curr_block -= log->l_logBBsize;
3172 ASSERT(log->l_curr_block >= 0);
3173 } 3190 }
3174 ASSERT(iclog == log->l_iclog); 3191 ASSERT(iclog == log->l_iclog);
3175 log->l_iclog = iclog->ic_next; 3192 log->l_iclog = iclog->ic_next;
@@ -3212,7 +3229,7 @@ _xfs_log_force(
3212 struct xlog_in_core *iclog; 3229 struct xlog_in_core *iclog;
3213 xfs_lsn_t lsn; 3230 xfs_lsn_t lsn;
3214 3231
3215 XFS_STATS_INC(xs_log_force); 3232 XFS_STATS_INC(mp, xs_log_force);
3216 3233
3217 xlog_cil_force(log); 3234 xlog_cil_force(log);
3218 3235
@@ -3297,7 +3314,7 @@ maybe_sleep:
3297 spin_unlock(&log->l_icloglock); 3314 spin_unlock(&log->l_icloglock);
3298 return -EIO; 3315 return -EIO;
3299 } 3316 }
3300 XFS_STATS_INC(xs_log_force_sleep); 3317 XFS_STATS_INC(mp, xs_log_force_sleep);
3301 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); 3318 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
3302 /* 3319 /*
3303 * No need to grab the log lock here since we're 3320 * No need to grab the log lock here since we're
@@ -3362,7 +3379,7 @@ _xfs_log_force_lsn(
3362 3379
3363 ASSERT(lsn != 0); 3380 ASSERT(lsn != 0);
3364 3381
3365 XFS_STATS_INC(xs_log_force); 3382 XFS_STATS_INC(mp, xs_log_force);
3366 3383
3367 lsn = xlog_cil_force_lsn(log, lsn); 3384 lsn = xlog_cil_force_lsn(log, lsn);
3368 if (lsn == NULLCOMMITLSN) 3385 if (lsn == NULLCOMMITLSN)
@@ -3411,7 +3428,7 @@ try_again:
3411 (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) { 3428 (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
3412 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); 3429 ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
3413 3430
3414 XFS_STATS_INC(xs_log_force_sleep); 3431 XFS_STATS_INC(mp, xs_log_force_sleep);
3415 3432
3416 xlog_wait(&iclog->ic_prev->ic_write_wait, 3433 xlog_wait(&iclog->ic_prev->ic_write_wait,
3417 &log->l_icloglock); 3434 &log->l_icloglock);
@@ -3441,7 +3458,7 @@ try_again:
3441 spin_unlock(&log->l_icloglock); 3458 spin_unlock(&log->l_icloglock);
3442 return -EIO; 3459 return -EIO;
3443 } 3460 }
3444 XFS_STATS_INC(xs_log_force_sleep); 3461 XFS_STATS_INC(mp, xs_log_force_sleep);
3445 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock); 3462 xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
3446 /* 3463 /*
3447 * No need to grab the log lock here since we're 3464 * No need to grab the log lock here since we're
@@ -4023,3 +4040,45 @@ xlog_iclogs_empty(
4023 return 1; 4040 return 1;
4024} 4041}
4025 4042
4043/*
4044 * Verify that an LSN stamped into a piece of metadata is valid. This is
4045 * intended for use in read verifiers on v5 superblocks.
4046 */
4047bool
4048xfs_log_check_lsn(
4049 struct xfs_mount *mp,
4050 xfs_lsn_t lsn)
4051{
4052 struct xlog *log = mp->m_log;
4053 bool valid;
4054
4055 /*
4056 * norecovery mode skips mount-time log processing and unconditionally
4057 * resets the in-core LSN. We can't validate in this mode, but
4058 * modifications are not allowed anyways so just return true.
4059 */
4060 if (mp->m_flags & XFS_MOUNT_NORECOVERY)
4061 return true;
4062
4063 /*
4064 * Some metadata LSNs are initialized to NULL (e.g., the agfl). This is
4065 * handled by recovery and thus safe to ignore here.
4066 */
4067 if (lsn == NULLCOMMITLSN)
4068 return true;
4069
4070 valid = xlog_valid_lsn(mp->m_log, lsn);
4071
4072 /* warn the user about what's gone wrong before verifier failure */
4073 if (!valid) {
4074 spin_lock(&log->l_icloglock);
4075 xfs_warn(mp,
4076"Corruption warning: Metadata has LSN (%d:%d) ahead of current LSN (%d:%d). "
4077"Please unmount and run xfs_repair (>= v4.3) to resolve.",
4078 CYCLE_LSN(lsn), BLOCK_LSN(lsn),
4079 log->l_curr_cycle, log->l_curr_block);
4080 spin_unlock(&log->l_icloglock);
4081 }
4082
4083 return valid;
4084}
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 09d91d3166cd..aa533a7d50f2 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -181,5 +181,6 @@ bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
181void xfs_log_work_queue(struct xfs_mount *mp); 181void xfs_log_work_queue(struct xfs_mount *mp);
182void xfs_log_worker(struct work_struct *work); 182void xfs_log_worker(struct work_struct *work);
183void xfs_log_quiesce(struct xfs_mount *mp); 183void xfs_log_quiesce(struct xfs_mount *mp);
184bool xfs_log_check_lsn(struct xfs_mount *, xfs_lsn_t);
184 185
185#endif /* __XFS_LOG_H__ */ 186#endif /* __XFS_LOG_H__ */
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 950f3f94720c..8daba7491b13 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -560,4 +560,55 @@ static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
560 remove_wait_queue(wq, &wait); 560 remove_wait_queue(wq, &wait);
561} 561}
562 562
563/*
564 * The LSN is valid so long as it is behind the current LSN. If it isn't, this
565 * means that the next log record that includes this metadata could have a
566 * smaller LSN. In turn, this means that the modification in the log would not
567 * replay.
568 */
569static inline bool
570xlog_valid_lsn(
571 struct xlog *log,
572 xfs_lsn_t lsn)
573{
574 int cur_cycle;
575 int cur_block;
576 bool valid = true;
577
578 /*
579 * First, sample the current lsn without locking to avoid added
580 * contention from metadata I/O. The current cycle and block are updated
581 * (in xlog_state_switch_iclogs()) and read here in a particular order
582 * to avoid false negatives (e.g., thinking the metadata LSN is valid
583 * when it is not).
584 *
585 * The current block is always rewound before the cycle is bumped in
586 * xlog_state_switch_iclogs() to ensure the current LSN is never seen in
587 * a transiently forward state. Instead, we can see the LSN in a
588 * transiently behind state if we happen to race with a cycle wrap.
589 */
590 cur_cycle = ACCESS_ONCE(log->l_curr_cycle);
591 smp_rmb();
592 cur_block = ACCESS_ONCE(log->l_curr_block);
593
594 if ((CYCLE_LSN(lsn) > cur_cycle) ||
595 (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block)) {
596 /*
597 * If the metadata LSN appears invalid, it's possible the check
598 * above raced with a wrap to the next log cycle. Grab the lock
599 * to check for sure.
600 */
601 spin_lock(&log->l_icloglock);
602 cur_cycle = log->l_curr_cycle;
603 cur_block = log->l_curr_block;
604 spin_unlock(&log->l_icloglock);
605
606 if ((CYCLE_LSN(lsn) > cur_cycle) ||
607 (CYCLE_LSN(lsn) == cur_cycle && BLOCK_LSN(lsn) > cur_block))
608 valid = false;
609 }
610
611 return valid;
612}
613
563#endif /* __XFS_LOG_PRIV_H__ */ 614#endif /* __XFS_LOG_PRIV_H__ */
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 512a0945d52a..c5ecaacdd218 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3431,7 +3431,7 @@ xlog_recover_add_to_cont_trans(
3431 * previous record. Copy the rest of the header. 3431 * previous record. Copy the rest of the header.
3432 */ 3432 */
3433 if (list_empty(&trans->r_itemq)) { 3433 if (list_empty(&trans->r_itemq)) {
3434 ASSERT(len < sizeof(struct xfs_trans_header)); 3434 ASSERT(len <= sizeof(struct xfs_trans_header));
3435 if (len > sizeof(struct xfs_trans_header)) { 3435 if (len > sizeof(struct xfs_trans_header)) {
3436 xfs_warn(log->l_mp, "%s: bad header length", __func__); 3436 xfs_warn(log->l_mp, "%s: bad header length", __func__);
3437 return -EIO; 3437 return -EIO;
@@ -4609,9 +4609,19 @@ xlog_recover(
4609 int error; 4609 int error;
4610 4610
4611 /* find the tail of the log */ 4611 /* find the tail of the log */
4612 if ((error = xlog_find_tail(log, &head_blk, &tail_blk))) 4612 error = xlog_find_tail(log, &head_blk, &tail_blk);
4613 if (error)
4613 return error; 4614 return error;
4614 4615
4616 /*
4617 * The superblock was read before the log was available and thus the LSN
4618 * could not be verified. Check the superblock LSN against the current
4619 * LSN now that it's known.
4620 */
4621 if (xfs_sb_version_hascrc(&log->l_mp->m_sb) &&
4622 !xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn))
4623 return -EINVAL;
4624
4615 if (tail_blk != head_blk) { 4625 if (tail_blk != head_blk) {
4616 /* There used to be a comment here: 4626 /* There used to be a comment here:
4617 * 4627 *
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index d8b67547ab34..11792d888e4e 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -17,6 +17,7 @@
17 17
18#include "xfs.h" 18#include "xfs.h"
19#include "xfs_fs.h" 19#include "xfs_fs.h"
20#include "xfs_error.h"
20#include "xfs_format.h" 21#include "xfs_format.h"
21#include "xfs_log_format.h" 22#include "xfs_log_format.h"
22#include "xfs_trans_resv.h" 23#include "xfs_trans_resv.h"
@@ -43,6 +44,7 @@ void func(const struct xfs_mount *mp, const char *fmt, ...) \
43{ \ 44{ \
44 struct va_format vaf; \ 45 struct va_format vaf; \
45 va_list args; \ 46 va_list args; \
47 int level; \
46 \ 48 \
47 va_start(args, fmt); \ 49 va_start(args, fmt); \
48 \ 50 \
@@ -51,6 +53,11 @@ void func(const struct xfs_mount *mp, const char *fmt, ...) \
51 \ 53 \
52 __xfs_printk(kern_level, mp, &vaf); \ 54 __xfs_printk(kern_level, mp, &vaf); \
53 va_end(args); \ 55 va_end(args); \
56 \
57 if (!kstrtoint(kern_level, 0, &level) && \
58 level <= LOGLEVEL_ERR && \
59 xfs_error_level >= XFS_ERRLEVEL_HIGH) \
60 xfs_stack_trace(); \
54} \ 61} \
55 62
56define_xfs_printk_level(xfs_emerg, KERN_EMERG); 63define_xfs_printk_level(xfs_emerg, KERN_EMERG);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index bf92e0c037c7..bb753b359bee 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -47,6 +47,16 @@ static DEFINE_MUTEX(xfs_uuid_table_mutex);
47static int xfs_uuid_table_size; 47static int xfs_uuid_table_size;
48static uuid_t *xfs_uuid_table; 48static uuid_t *xfs_uuid_table;
49 49
50void
51xfs_uuid_table_free(void)
52{
53 if (xfs_uuid_table_size == 0)
54 return;
55 kmem_free(xfs_uuid_table);
56 xfs_uuid_table = NULL;
57 xfs_uuid_table_size = 0;
58}
59
50/* 60/*
51 * See if the UUID is unique among mounted XFS filesystems. 61 * See if the UUID is unique among mounted XFS filesystems.
52 * Mount fails if UUID is nil or a FS with the same UUID is already mounted. 62 * Mount fails if UUID is nil or a FS with the same UUID is already mounted.
@@ -693,10 +703,15 @@ xfs_mountfs(
693 if (error) 703 if (error)
694 goto out; 704 goto out;
695 705
696 error = xfs_uuid_mount(mp); 706 error = xfs_sysfs_init(&mp->m_stats.xs_kobj, &xfs_stats_ktype,
707 &mp->m_kobj, "stats");
697 if (error) 708 if (error)
698 goto out_remove_sysfs; 709 goto out_remove_sysfs;
699 710
711 error = xfs_uuid_mount(mp);
712 if (error)
713 goto out_del_stats;
714
700 /* 715 /*
701 * Set the minimum read and write sizes 716 * Set the minimum read and write sizes
702 */ 717 */
@@ -971,6 +986,8 @@ xfs_mountfs(
971 xfs_da_unmount(mp); 986 xfs_da_unmount(mp);
972 out_remove_uuid: 987 out_remove_uuid:
973 xfs_uuid_unmount(mp); 988 xfs_uuid_unmount(mp);
989 out_del_stats:
990 xfs_sysfs_del(&mp->m_stats.xs_kobj);
974 out_remove_sysfs: 991 out_remove_sysfs:
975 xfs_sysfs_del(&mp->m_kobj); 992 xfs_sysfs_del(&mp->m_kobj);
976 out: 993 out:
@@ -1047,6 +1064,7 @@ xfs_unmountfs(
1047 xfs_warn(mp, "Unable to update superblock counters. " 1064 xfs_warn(mp, "Unable to update superblock counters. "
1048 "Freespace may not be correct on next mount."); 1065 "Freespace may not be correct on next mount.");
1049 1066
1067
1050 xfs_log_unmount(mp); 1068 xfs_log_unmount(mp);
1051 xfs_da_unmount(mp); 1069 xfs_da_unmount(mp);
1052 xfs_uuid_unmount(mp); 1070 xfs_uuid_unmount(mp);
@@ -1056,6 +1074,7 @@ xfs_unmountfs(
1056#endif 1074#endif
1057 xfs_free_perag(mp); 1075 xfs_free_perag(mp);
1058 1076
1077 xfs_sysfs_del(&mp->m_stats.xs_kobj);
1059 xfs_sysfs_del(&mp->m_kobj); 1078 xfs_sysfs_del(&mp->m_kobj);
1060} 1079}
1061 1080
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7999e91cd49a..b57098481c10 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -127,6 +127,7 @@ typedef struct xfs_mount {
127 int64_t m_low_space[XFS_LOWSP_MAX]; 127 int64_t m_low_space[XFS_LOWSP_MAX];
128 /* low free space thresholds */ 128 /* low free space thresholds */
129 struct xfs_kobj m_kobj; 129 struct xfs_kobj m_kobj;
130 struct xstats m_stats; /* per-fs stats */
130 131
131 struct workqueue_struct *m_buf_workqueue; 132 struct workqueue_struct *m_buf_workqueue;
132 struct workqueue_struct *m_data_workqueue; 133 struct workqueue_struct *m_data_workqueue;
@@ -312,6 +313,7 @@ typedef struct xfs_perag {
312 int pagb_count; /* pagb slots in use */ 313 int pagb_count; /* pagb slots in use */
313} xfs_perag_t; 314} xfs_perag_t;
314 315
316extern void xfs_uuid_table_free(void);
315extern int xfs_log_sbcount(xfs_mount_t *); 317extern int xfs_log_sbcount(xfs_mount_t *);
316extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); 318extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
317extern int xfs_mountfs(xfs_mount_t *mp); 319extern int xfs_mountfs(xfs_mount_t *mp);
@@ -336,4 +338,7 @@ extern int xfs_dev_is_read_only(struct xfs_mount *, char *);
336 338
337extern void xfs_set_low_space_thresholds(struct xfs_mount *); 339extern void xfs_set_low_space_thresholds(struct xfs_mount *);
338 340
341int xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
342 xfs_off_t count_fsb);
343
339#endif /* __XFS_MOUNT_H__ */ 344#endif /* __XFS_MOUNT_H__ */
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index ab4a6066f7ca..dc6221942b85 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -181,6 +181,11 @@ xfs_fs_map_blocks(
181 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 181 ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
182 182
183 if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) { 183 if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) {
184 /*
185 * xfs_iomap_write_direct() expects to take ownership of
186 * the shared ilock.
187 */
188 xfs_ilock(ip, XFS_ILOCK_SHARED);
184 error = xfs_iomap_write_direct(ip, offset, length, 189 error = xfs_iomap_write_direct(ip, offset, length,
185 &imap, nimaps); 190 &imap, nimaps);
186 if (error) 191 if (error)
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 587174fd4f2c..532ab79d38fe 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -184,7 +184,7 @@ xfs_qm_dqpurge(
184 */ 184 */
185 ASSERT(!list_empty(&dqp->q_lru)); 185 ASSERT(!list_empty(&dqp->q_lru));
186 list_lru_del(&qi->qi_lru, &dqp->q_lru); 186 list_lru_del(&qi->qi_lru, &dqp->q_lru);
187 XFS_STATS_DEC(xs_qm_dquot_unused); 187 XFS_STATS_DEC(mp, xs_qm_dquot_unused);
188 188
189 xfs_qm_dqdestroy(dqp); 189 xfs_qm_dqdestroy(dqp);
190 return 0; 190 return 0;
@@ -448,11 +448,11 @@ xfs_qm_dquot_isolate(
448 */ 448 */
449 if (dqp->q_nrefs) { 449 if (dqp->q_nrefs) {
450 xfs_dqunlock(dqp); 450 xfs_dqunlock(dqp);
451 XFS_STATS_INC(xs_qm_dqwants); 451 XFS_STATS_INC(dqp->q_mount, xs_qm_dqwants);
452 452
453 trace_xfs_dqreclaim_want(dqp); 453 trace_xfs_dqreclaim_want(dqp);
454 list_lru_isolate(lru, &dqp->q_lru); 454 list_lru_isolate(lru, &dqp->q_lru);
455 XFS_STATS_DEC(xs_qm_dquot_unused); 455 XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
456 return LRU_REMOVED; 456 return LRU_REMOVED;
457 } 457 }
458 458
@@ -496,19 +496,19 @@ xfs_qm_dquot_isolate(
496 496
497 ASSERT(dqp->q_nrefs == 0); 497 ASSERT(dqp->q_nrefs == 0);
498 list_lru_isolate_move(lru, &dqp->q_lru, &isol->dispose); 498 list_lru_isolate_move(lru, &dqp->q_lru, &isol->dispose);
499 XFS_STATS_DEC(xs_qm_dquot_unused); 499 XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot_unused);
500 trace_xfs_dqreclaim_done(dqp); 500 trace_xfs_dqreclaim_done(dqp);
501 XFS_STATS_INC(xs_qm_dqreclaims); 501 XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims);
502 return LRU_REMOVED; 502 return LRU_REMOVED;
503 503
504out_miss_busy: 504out_miss_busy:
505 trace_xfs_dqreclaim_busy(dqp); 505 trace_xfs_dqreclaim_busy(dqp);
506 XFS_STATS_INC(xs_qm_dqreclaim_misses); 506 XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
507 return LRU_SKIP; 507 return LRU_SKIP;
508 508
509out_unlock_dirty: 509out_unlock_dirty:
510 trace_xfs_dqreclaim_busy(dqp); 510 trace_xfs_dqreclaim_busy(dqp);
511 XFS_STATS_INC(xs_qm_dqreclaim_misses); 511 XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
512 xfs_dqunlock(dqp); 512 xfs_dqunlock(dqp);
513 spin_lock(lru_lock); 513 spin_lock(lru_lock);
514 return LRU_RETRY; 514 return LRU_RETRY;
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index f2240383d4bb..8686df6c7609 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -18,20 +18,21 @@
18#include "xfs.h" 18#include "xfs.h"
19#include <linux/proc_fs.h> 19#include <linux/proc_fs.h>
20 20
21DEFINE_PER_CPU(struct xfsstats, xfsstats); 21struct xstats xfsstats;
22 22
23static int counter_val(int idx) 23static int counter_val(struct xfsstats __percpu *stats, int idx)
24{ 24{
25 int val = 0, cpu; 25 int val = 0, cpu;
26 26
27 for_each_possible_cpu(cpu) 27 for_each_possible_cpu(cpu)
28 val += *(((__u32 *)&per_cpu(xfsstats, cpu) + idx)); 28 val += *(((__u32 *)per_cpu_ptr(stats, cpu) + idx));
29 return val; 29 return val;
30} 30}
31 31
32static int xfs_stat_proc_show(struct seq_file *m, void *v) 32int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
33{ 33{
34 int i, j; 34 int i, j;
35 int len = 0;
35 __uint64_t xs_xstrat_bytes = 0; 36 __uint64_t xs_xstrat_bytes = 0;
36 __uint64_t xs_write_bytes = 0; 37 __uint64_t xs_write_bytes = 0;
37 __uint64_t xs_read_bytes = 0; 38 __uint64_t xs_read_bytes = 0;
@@ -65,54 +66,59 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)
65 }; 66 };
66 67
67 /* Loop over all stats groups */ 68 /* Loop over all stats groups */
69
68 for (i = j = 0; i < ARRAY_SIZE(xstats); i++) { 70 for (i = j = 0; i < ARRAY_SIZE(xstats); i++) {
69 seq_printf(m, "%s", xstats[i].desc); 71 len += snprintf(buf + len, PATH_MAX - len, "%s",
72 xstats[i].desc);
70 /* inner loop does each group */ 73 /* inner loop does each group */
71 for (; j < xstats[i].endpoint; j++) 74 for (; j < xstats[i].endpoint; j++)
72 seq_printf(m, " %u", counter_val(j)); 75 len += snprintf(buf + len, PATH_MAX - len, " %u",
73 seq_putc(m, '\n'); 76 counter_val(stats, j));
77 len += snprintf(buf + len, PATH_MAX - len, "\n");
74 } 78 }
75 /* extra precision counters */ 79 /* extra precision counters */
76 for_each_possible_cpu(i) { 80 for_each_possible_cpu(i) {
77 xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes; 81 xs_xstrat_bytes += per_cpu_ptr(stats, i)->xs_xstrat_bytes;
78 xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes; 82 xs_write_bytes += per_cpu_ptr(stats, i)->xs_write_bytes;
79 xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes; 83 xs_read_bytes += per_cpu_ptr(stats, i)->xs_read_bytes;
80 } 84 }
81 85
82 seq_printf(m, "xpc %Lu %Lu %Lu\n", 86 len += snprintf(buf + len, PATH_MAX-len, "xpc %Lu %Lu %Lu\n",
83 xs_xstrat_bytes, xs_write_bytes, xs_read_bytes); 87 xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
84 seq_printf(m, "debug %u\n", 88 len += snprintf(buf + len, PATH_MAX-len, "debug %u\n",
85#if defined(DEBUG) 89#if defined(DEBUG)
86 1); 90 1);
87#else 91#else
88 0); 92 0);
89#endif 93#endif
90 return 0; 94
95 return len;
91} 96}
92 97
93static int xfs_stat_proc_open(struct inode *inode, struct file *file) 98void xfs_stats_clearall(struct xfsstats __percpu *stats)
94{ 99{
95 return single_open(file, xfs_stat_proc_show, NULL); 100 int c;
101 __uint32_t vn_active;
102
103 xfs_notice(NULL, "Clearing xfsstats");
104 for_each_possible_cpu(c) {
105 preempt_disable();
106 /* save vn_active, it's a universal truth! */
107 vn_active = per_cpu_ptr(stats, c)->vn_active;
108 memset(per_cpu_ptr(stats, c), 0, sizeof(*stats));
109 per_cpu_ptr(stats, c)->vn_active = vn_active;
110 preempt_enable();
111 }
96} 112}
97 113
98static const struct file_operations xfs_stat_proc_fops = {
99 .owner = THIS_MODULE,
100 .open = xfs_stat_proc_open,
101 .read = seq_read,
102 .llseek = seq_lseek,
103 .release = single_release,
104};
105
106/* legacy quota interfaces */ 114/* legacy quota interfaces */
107#ifdef CONFIG_XFS_QUOTA 115#ifdef CONFIG_XFS_QUOTA
108static int xqm_proc_show(struct seq_file *m, void *v) 116static int xqm_proc_show(struct seq_file *m, void *v)
109{ 117{
110 /* maximum; incore; ratio free to inuse; freelist */ 118 /* maximum; incore; ratio free to inuse; freelist */
111 seq_printf(m, "%d\t%d\t%d\t%u\n", 119 seq_printf(m, "%d\t%d\t%d\t%u\n",
112 0, 120 0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT),
113 counter_val(XFSSTAT_END_XQMSTAT), 121 0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT + 1));
114 0,
115 counter_val(XFSSTAT_END_XQMSTAT + 1));
116 return 0; 122 return 0;
117} 123}
118 124
@@ -136,7 +142,7 @@ static int xqmstat_proc_show(struct seq_file *m, void *v)
136 142
137 seq_printf(m, "qm"); 143 seq_printf(m, "qm");
138 for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++) 144 for (j = XFSSTAT_END_IBT_V2; j < XFSSTAT_END_XQMSTAT; j++)
139 seq_printf(m, " %u", counter_val(j)); 145 seq_printf(m, " %u", counter_val(xfsstats.xs_stats, j));
140 seq_putc(m, '\n'); 146 seq_putc(m, '\n');
141 return 0; 147 return 0;
142} 148}
@@ -155,44 +161,35 @@ static const struct file_operations xqmstat_proc_fops = {
155}; 161};
156#endif /* CONFIG_XFS_QUOTA */ 162#endif /* CONFIG_XFS_QUOTA */
157 163
164#ifdef CONFIG_PROC_FS
158int 165int
159xfs_init_procfs(void) 166xfs_init_procfs(void)
160{ 167{
161 if (!proc_mkdir("fs/xfs", NULL)) 168 if (!proc_mkdir("fs/xfs", NULL))
169 return -ENOMEM;
170
171 if (!proc_symlink("fs/xfs/stat", NULL,
172 "/sys/fs/xfs/stats/stats"))
162 goto out; 173 goto out;
163 174
164 if (!proc_create("fs/xfs/stat", 0, NULL,
165 &xfs_stat_proc_fops))
166 goto out_remove_xfs_dir;
167#ifdef CONFIG_XFS_QUOTA 175#ifdef CONFIG_XFS_QUOTA
168 if (!proc_create("fs/xfs/xqmstat", 0, NULL, 176 if (!proc_create("fs/xfs/xqmstat", 0, NULL,
169 &xqmstat_proc_fops)) 177 &xqmstat_proc_fops))
170 goto out_remove_stat_file; 178 goto out;
171 if (!proc_create("fs/xfs/xqm", 0, NULL, 179 if (!proc_create("fs/xfs/xqm", 0, NULL,
172 &xqm_proc_fops)) 180 &xqm_proc_fops))
173 goto out_remove_xqmstat_file; 181 goto out;
174#endif 182#endif
175 return 0; 183 return 0;
176 184
177#ifdef CONFIG_XFS_QUOTA 185out:
178 out_remove_xqmstat_file: 186 remove_proc_subtree("fs/xfs", NULL);
179 remove_proc_entry("fs/xfs/xqmstat", NULL);
180 out_remove_stat_file:
181 remove_proc_entry("fs/xfs/stat", NULL);
182#endif
183 out_remove_xfs_dir:
184 remove_proc_entry("fs/xfs", NULL);
185 out:
186 return -ENOMEM; 187 return -ENOMEM;
187} 188}
188 189
189void 190void
190xfs_cleanup_procfs(void) 191xfs_cleanup_procfs(void)
191{ 192{
192#ifdef CONFIG_XFS_QUOTA 193 remove_proc_subtree("fs/xfs", NULL);
193 remove_proc_entry("fs/xfs/xqm", NULL);
194 remove_proc_entry("fs/xfs/xqmstat", NULL);
195#endif
196 remove_proc_entry("fs/xfs/stat", NULL);
197 remove_proc_entry("fs/xfs", NULL);
198} 194}
195#endif /* CONFIG_PROC_FS */
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index c8f238b8299a..483b0eff1988 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -19,8 +19,6 @@
19#define __XFS_STATS_H__ 19#define __XFS_STATS_H__
20 20
21 21
22#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
23
24#include <linux/percpu.h> 22#include <linux/percpu.h>
25 23
26/* 24/*
@@ -215,15 +213,29 @@ struct xfsstats {
215 __uint64_t xs_read_bytes; 213 __uint64_t xs_read_bytes;
216}; 214};
217 215
218DECLARE_PER_CPU(struct xfsstats, xfsstats); 216int xfs_stats_format(struct xfsstats __percpu *stats, char *buf);
217void xfs_stats_clearall(struct xfsstats __percpu *stats);
218extern struct xstats xfsstats;
219 219
220/* 220#define XFS_STATS_INC(mp, v) \
221 * We don't disable preempt, not too worried about poking the 221do { \
222 * wrong CPU's stat for now (also aggregated before reporting). 222 per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v++; \
223 */ 223 per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v++; \
224#define XFS_STATS_INC(v) (per_cpu(xfsstats, current_cpu()).v++) 224} while (0)
225#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--) 225
226#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc)) 226#define XFS_STATS_DEC(mp, v) \
227do { \
228 per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v--; \
229 per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v--; \
230} while (0)
231
232#define XFS_STATS_ADD(mp, v, inc) \
233do { \
234 per_cpu_ptr(xfsstats.xs_stats, current_cpu())->v += (inc); \
235 per_cpu_ptr(mp->m_stats.xs_stats, current_cpu())->v += (inc); \
236} while (0)
237
238#if defined(CONFIG_PROC_FS)
227 239
228extern int xfs_init_procfs(void); 240extern int xfs_init_procfs(void);
229extern void xfs_cleanup_procfs(void); 241extern void xfs_cleanup_procfs(void);
@@ -231,10 +243,6 @@ extern void xfs_cleanup_procfs(void);
231 243
232#else /* !CONFIG_PROC_FS */ 244#else /* !CONFIG_PROC_FS */
233 245
234# define XFS_STATS_INC(count)
235# define XFS_STATS_DEC(count)
236# define XFS_STATS_ADD(count, inc)
237
238static inline int xfs_init_procfs(void) 246static inline int xfs_init_procfs(void)
239{ 247{
240 return 0; 248 return 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 904f637cfa5f..36bd8825bfb0 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -838,17 +838,18 @@ xfs_init_mount_workqueues(
838 goto out_destroy_unwritten; 838 goto out_destroy_unwritten;
839 839
840 mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s", 840 mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
841 WQ_FREEZABLE, 0, mp->m_fsname); 841 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
842 if (!mp->m_reclaim_workqueue) 842 if (!mp->m_reclaim_workqueue)
843 goto out_destroy_cil; 843 goto out_destroy_cil;
844 844
845 mp->m_log_workqueue = alloc_workqueue("xfs-log/%s", 845 mp->m_log_workqueue = alloc_workqueue("xfs-log/%s",
846 WQ_FREEZABLE|WQ_HIGHPRI, 0, mp->m_fsname); 846 WQ_MEM_RECLAIM|WQ_FREEZABLE|WQ_HIGHPRI, 0,
847 mp->m_fsname);
847 if (!mp->m_log_workqueue) 848 if (!mp->m_log_workqueue)
848 goto out_destroy_reclaim; 849 goto out_destroy_reclaim;
849 850
850 mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s", 851 mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
851 WQ_FREEZABLE, 0, mp->m_fsname); 852 WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
852 if (!mp->m_eofblocks_workqueue) 853 if (!mp->m_eofblocks_workqueue)
853 goto out_destroy_log; 854 goto out_destroy_log;
854 855
@@ -922,7 +923,7 @@ xfs_fs_destroy_inode(
922 923
923 trace_xfs_destroy_inode(ip); 924 trace_xfs_destroy_inode(ip);
924 925
925 XFS_STATS_INC(vn_reclaim); 926 XFS_STATS_INC(ip->i_mount, vn_reclaim);
926 927
927 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0); 928 ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
928 929
@@ -983,8 +984,8 @@ xfs_fs_evict_inode(
983 984
984 truncate_inode_pages_final(&inode->i_data); 985 truncate_inode_pages_final(&inode->i_data);
985 clear_inode(inode); 986 clear_inode(inode);
986 XFS_STATS_INC(vn_rele); 987 XFS_STATS_INC(ip->i_mount, vn_rele);
987 XFS_STATS_INC(vn_remove); 988 XFS_STATS_INC(ip->i_mount, vn_remove);
988 989
989 xfs_inactive(ip); 990 xfs_inactive(ip);
990} 991}
@@ -1474,9 +1475,16 @@ xfs_fs_fill_super(
1474 if (error) 1475 if (error)
1475 goto out_destroy_workqueues; 1476 goto out_destroy_workqueues;
1476 1477
1478 /* Allocate stats memory before we do operations that might use it */
1479 mp->m_stats.xs_stats = alloc_percpu(struct xfsstats);
1480 if (!mp->m_stats.xs_stats) {
1481 error = -ENOMEM;
1482 goto out_destroy_counters;
1483 }
1484
1477 error = xfs_readsb(mp, flags); 1485 error = xfs_readsb(mp, flags);
1478 if (error) 1486 if (error)
1479 goto out_destroy_counters; 1487 goto out_free_stats;
1480 1488
1481 error = xfs_finish_flags(mp); 1489 error = xfs_finish_flags(mp);
1482 if (error) 1490 if (error)
@@ -1545,9 +1553,11 @@ xfs_fs_fill_super(
1545 xfs_filestream_unmount(mp); 1553 xfs_filestream_unmount(mp);
1546 out_free_sb: 1554 out_free_sb:
1547 xfs_freesb(mp); 1555 xfs_freesb(mp);
1556 out_free_stats:
1557 free_percpu(mp->m_stats.xs_stats);
1548 out_destroy_counters: 1558 out_destroy_counters:
1549 xfs_destroy_percpu_counters(mp); 1559 xfs_destroy_percpu_counters(mp);
1550out_destroy_workqueues: 1560 out_destroy_workqueues:
1551 xfs_destroy_mount_workqueues(mp); 1561 xfs_destroy_mount_workqueues(mp);
1552 out_close_devices: 1562 out_close_devices:
1553 xfs_close_devices(mp); 1563 xfs_close_devices(mp);
@@ -1574,6 +1584,7 @@ xfs_fs_put_super(
1574 xfs_unmountfs(mp); 1584 xfs_unmountfs(mp);
1575 1585
1576 xfs_freesb(mp); 1586 xfs_freesb(mp);
1587 free_percpu(mp->m_stats.xs_stats);
1577 xfs_destroy_percpu_counters(mp); 1588 xfs_destroy_percpu_counters(mp);
1578 xfs_destroy_mount_workqueues(mp); 1589 xfs_destroy_mount_workqueues(mp);
1579 xfs_close_devices(mp); 1590 xfs_close_devices(mp);
@@ -1838,19 +1849,32 @@ init_xfs_fs(void)
1838 xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj); 1849 xfs_kset = kset_create_and_add("xfs", NULL, fs_kobj);
1839 if (!xfs_kset) { 1850 if (!xfs_kset) {
1840 error = -ENOMEM; 1851 error = -ENOMEM;
1841 goto out_sysctl_unregister;; 1852 goto out_sysctl_unregister;
1842 } 1853 }
1843 1854
1855 xfsstats.xs_kobj.kobject.kset = xfs_kset;
1856
1857 xfsstats.xs_stats = alloc_percpu(struct xfsstats);
1858 if (!xfsstats.xs_stats) {
1859 error = -ENOMEM;
1860 goto out_kset_unregister;
1861 }
1862
1863 error = xfs_sysfs_init(&xfsstats.xs_kobj, &xfs_stats_ktype, NULL,
1864 "stats");
1865 if (error)
1866 goto out_free_stats;
1867
1844#ifdef DEBUG 1868#ifdef DEBUG
1845 xfs_dbg_kobj.kobject.kset = xfs_kset; 1869 xfs_dbg_kobj.kobject.kset = xfs_kset;
1846 error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug"); 1870 error = xfs_sysfs_init(&xfs_dbg_kobj, &xfs_dbg_ktype, NULL, "debug");
1847 if (error) 1871 if (error)
1848 goto out_kset_unregister; 1872 goto out_remove_stats_kobj;
1849#endif 1873#endif
1850 1874
1851 error = xfs_qm_init(); 1875 error = xfs_qm_init();
1852 if (error) 1876 if (error)
1853 goto out_remove_kobj; 1877 goto out_remove_dbg_kobj;
1854 1878
1855 error = register_filesystem(&xfs_fs_type); 1879 error = register_filesystem(&xfs_fs_type);
1856 if (error) 1880 if (error)
@@ -1859,11 +1883,15 @@ init_xfs_fs(void)
1859 1883
1860 out_qm_exit: 1884 out_qm_exit:
1861 xfs_qm_exit(); 1885 xfs_qm_exit();
1862 out_remove_kobj: 1886 out_remove_dbg_kobj:
1863#ifdef DEBUG 1887#ifdef DEBUG
1864 xfs_sysfs_del(&xfs_dbg_kobj); 1888 xfs_sysfs_del(&xfs_dbg_kobj);
1865 out_kset_unregister: 1889 out_remove_stats_kobj:
1866#endif 1890#endif
1891 xfs_sysfs_del(&xfsstats.xs_kobj);
1892 out_free_stats:
1893 free_percpu(xfsstats.xs_stats);
1894 out_kset_unregister:
1867 kset_unregister(xfs_kset); 1895 kset_unregister(xfs_kset);
1868 out_sysctl_unregister: 1896 out_sysctl_unregister:
1869 xfs_sysctl_unregister(); 1897 xfs_sysctl_unregister();
@@ -1889,6 +1917,8 @@ exit_xfs_fs(void)
1889#ifdef DEBUG 1917#ifdef DEBUG
1890 xfs_sysfs_del(&xfs_dbg_kobj); 1918 xfs_sysfs_del(&xfs_dbg_kobj);
1891#endif 1919#endif
1920 xfs_sysfs_del(&xfsstats.xs_kobj);
1921 free_percpu(xfsstats.xs_stats);
1892 kset_unregister(xfs_kset); 1922 kset_unregister(xfs_kset);
1893 xfs_sysctl_unregister(); 1923 xfs_sysctl_unregister();
1894 xfs_cleanup_procfs(); 1924 xfs_cleanup_procfs();
@@ -1896,6 +1926,7 @@ exit_xfs_fs(void)
1896 xfs_mru_cache_uninit(); 1926 xfs_mru_cache_uninit();
1897 xfs_destroy_workqueues(); 1927 xfs_destroy_workqueues();
1898 xfs_destroy_zones(); 1928 xfs_destroy_zones();
1929 xfs_uuid_table_free();
1899} 1930}
1900 1931
1901module_init(init_xfs_fs); 1932module_init(init_xfs_fs);
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index a0c8067cea6f..aed74d3f8da9 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -19,6 +19,7 @@
19#include <linux/sysctl.h> 19#include <linux/sysctl.h>
20#include <linux/proc_fs.h> 20#include <linux/proc_fs.h>
21#include "xfs_error.h" 21#include "xfs_error.h"
22#include "xfs_stats.h"
22 23
23static struct ctl_table_header *xfs_table_header; 24static struct ctl_table_header *xfs_table_header;
24 25
@@ -31,22 +32,12 @@ xfs_stats_clear_proc_handler(
31 size_t *lenp, 32 size_t *lenp,
32 loff_t *ppos) 33 loff_t *ppos)
33{ 34{
34 int c, ret, *valp = ctl->data; 35 int ret, *valp = ctl->data;
35 __uint32_t vn_active;
36 36
37 ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos); 37 ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
38 38
39 if (!ret && write && *valp) { 39 if (!ret && write && *valp) {
40 xfs_notice(NULL, "Clearing xfsstats"); 40 xfs_stats_clearall(xfsstats.xs_stats);
41 for_each_possible_cpu(c) {
42 preempt_disable();
43 /* save vn_active, it's a universal truth! */
44 vn_active = per_cpu(xfsstats, c).vn_active;
45 memset(&per_cpu(xfsstats, c), 0,
46 sizeof(struct xfsstats));
47 per_cpu(xfsstats, c).vn_active = vn_active;
48 preempt_enable();
49 }
50 xfs_stats_clear = 0; 41 xfs_stats_clear = 0;
51 } 42 }
52 43
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index aa03670851d8..ee70f5dec9dc 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -21,11 +21,13 @@
21#include "xfs_log_format.h" 21#include "xfs_log_format.h"
22#include "xfs_log.h" 22#include "xfs_log.h"
23#include "xfs_log_priv.h" 23#include "xfs_log_priv.h"
24#include "xfs_stats.h"
24 25
25struct xfs_sysfs_attr { 26struct xfs_sysfs_attr {
26 struct attribute attr; 27 struct attribute attr;
27 ssize_t (*show)(char *buf, void *data); 28 ssize_t (*show)(struct kobject *kobject, char *buf);
28 ssize_t (*store)(const char *buf, size_t count, void *data); 29 ssize_t (*store)(struct kobject *kobject, const char *buf,
30 size_t count);
29}; 31};
30 32
31static inline struct xfs_sysfs_attr * 33static inline struct xfs_sysfs_attr *
@@ -38,6 +40,8 @@ to_attr(struct attribute *attr)
38 static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name) 40 static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RW(name)
39#define XFS_SYSFS_ATTR_RO(name) \ 41#define XFS_SYSFS_ATTR_RO(name) \
40 static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name) 42 static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_RO(name)
43#define XFS_SYSFS_ATTR_WO(name) \
44 static struct xfs_sysfs_attr xfs_sysfs_attr_##name = __ATTR_WO(name)
41 45
42#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr 46#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
43 47
@@ -51,14 +55,42 @@ struct kobj_type xfs_mp_ktype = {
51 .release = xfs_sysfs_release, 55 .release = xfs_sysfs_release,
52}; 56};
53 57
58STATIC ssize_t
59xfs_sysfs_object_show(
60 struct kobject *kobject,
61 struct attribute *attr,
62 char *buf)
63{
64 struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
65
66 return xfs_attr->show ? xfs_attr->show(kobject, buf) : 0;
67}
68
69STATIC ssize_t
70xfs_sysfs_object_store(
71 struct kobject *kobject,
72 struct attribute *attr,
73 const char *buf,
74 size_t count)
75{
76 struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
77
78 return xfs_attr->store ? xfs_attr->store(kobject, buf, count) : 0;
79}
80
81static const struct sysfs_ops xfs_sysfs_ops = {
82 .show = xfs_sysfs_object_show,
83 .store = xfs_sysfs_object_store,
84};
85
54#ifdef DEBUG 86#ifdef DEBUG
55/* debug */ 87/* debug */
56 88
57STATIC ssize_t 89STATIC ssize_t
58log_recovery_delay_store( 90log_recovery_delay_store(
91 struct kobject *kobject,
59 const char *buf, 92 const char *buf,
60 size_t count, 93 size_t count)
61 void *data)
62{ 94{
63 int ret; 95 int ret;
64 int val; 96 int val;
@@ -77,8 +109,8 @@ log_recovery_delay_store(
77 109
78STATIC ssize_t 110STATIC ssize_t
79log_recovery_delay_show( 111log_recovery_delay_show(
80 char *buf, 112 struct kobject *kobject,
81 void *data) 113 char *buf)
82{ 114{
83 return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay); 115 return snprintf(buf, PAGE_SIZE, "%d\n", xfs_globals.log_recovery_delay);
84} 116}
@@ -89,52 +121,87 @@ static struct attribute *xfs_dbg_attrs[] = {
89 NULL, 121 NULL,
90}; 122};
91 123
124struct kobj_type xfs_dbg_ktype = {
125 .release = xfs_sysfs_release,
126 .sysfs_ops = &xfs_sysfs_ops,
127 .default_attrs = xfs_dbg_attrs,
128};
129
130#endif /* DEBUG */
131
132/* stats */
133
134static inline struct xstats *
135to_xstats(struct kobject *kobject)
136{
137 struct xfs_kobj *kobj = to_kobj(kobject);
138
139 return container_of(kobj, struct xstats, xs_kobj);
140}
141
92STATIC ssize_t 142STATIC ssize_t
93xfs_dbg_show( 143stats_show(
94 struct kobject *kobject, 144 struct kobject *kobject,
95 struct attribute *attr, 145 char *buf)
96 char *buf)
97{ 146{
98 struct xfs_sysfs_attr *xfs_attr = to_attr(attr); 147 struct xstats *stats = to_xstats(kobject);
99 148
100 return xfs_attr->show ? xfs_attr->show(buf, NULL) : 0; 149 return xfs_stats_format(stats->xs_stats, buf);
101} 150}
151XFS_SYSFS_ATTR_RO(stats);
102 152
103STATIC ssize_t 153STATIC ssize_t
104xfs_dbg_store( 154stats_clear_store(
105 struct kobject *kobject, 155 struct kobject *kobject,
106 struct attribute *attr, 156 const char *buf,
107 const char *buf, 157 size_t count)
108 size_t count)
109{ 158{
110 struct xfs_sysfs_attr *xfs_attr = to_attr(attr); 159 int ret;
160 int val;
161 struct xstats *stats = to_xstats(kobject);
162
163 ret = kstrtoint(buf, 0, &val);
164 if (ret)
165 return ret;
111 166
112 return xfs_attr->store ? xfs_attr->store(buf, count, NULL) : 0; 167 if (val != 1)
168 return -EINVAL;
169
170 xfs_stats_clearall(stats->xs_stats);
171 return count;
113} 172}
173XFS_SYSFS_ATTR_WO(stats_clear);
114 174
115static struct sysfs_ops xfs_dbg_ops = { 175static struct attribute *xfs_stats_attrs[] = {
116 .show = xfs_dbg_show, 176 ATTR_LIST(stats),
117 .store = xfs_dbg_store, 177 ATTR_LIST(stats_clear),
178 NULL,
118}; 179};
119 180
120struct kobj_type xfs_dbg_ktype = { 181struct kobj_type xfs_stats_ktype = {
121 .release = xfs_sysfs_release, 182 .release = xfs_sysfs_release,
122 .sysfs_ops = &xfs_dbg_ops, 183 .sysfs_ops = &xfs_sysfs_ops,
123 .default_attrs = xfs_dbg_attrs, 184 .default_attrs = xfs_stats_attrs,
124}; 185};
125 186
126#endif /* DEBUG */
127
128/* xlog */ 187/* xlog */
129 188
189static inline struct xlog *
190to_xlog(struct kobject *kobject)
191{
192 struct xfs_kobj *kobj = to_kobj(kobject);
193
194 return container_of(kobj, struct xlog, l_kobj);
195}
196
130STATIC ssize_t 197STATIC ssize_t
131log_head_lsn_show( 198log_head_lsn_show(
132 char *buf, 199 struct kobject *kobject,
133 void *data) 200 char *buf)
134{ 201{
135 struct xlog *log = data;
136 int cycle; 202 int cycle;
137 int block; 203 int block;
204 struct xlog *log = to_xlog(kobject);
138 205
139 spin_lock(&log->l_icloglock); 206 spin_lock(&log->l_icloglock);
140 cycle = log->l_curr_cycle; 207 cycle = log->l_curr_cycle;
@@ -147,12 +214,12 @@ XFS_SYSFS_ATTR_RO(log_head_lsn);
147 214
148STATIC ssize_t 215STATIC ssize_t
149log_tail_lsn_show( 216log_tail_lsn_show(
150 char *buf, 217 struct kobject *kobject,
151 void *data) 218 char *buf)
152{ 219{
153 struct xlog *log = data;
154 int cycle; 220 int cycle;
155 int block; 221 int block;
222 struct xlog *log = to_xlog(kobject);
156 223
157 xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block); 224 xlog_crack_atomic_lsn(&log->l_tail_lsn, &cycle, &block);
158 return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block); 225 return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, block);
@@ -161,12 +228,13 @@ XFS_SYSFS_ATTR_RO(log_tail_lsn);
161 228
162STATIC ssize_t 229STATIC ssize_t
163reserve_grant_head_show( 230reserve_grant_head_show(
164 char *buf, 231 struct kobject *kobject,
165 void *data) 232 char *buf)
233
166{ 234{
167 struct xlog *log = data;
168 int cycle; 235 int cycle;
169 int bytes; 236 int bytes;
237 struct xlog *log = to_xlog(kobject);
170 238
171 xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes); 239 xlog_crack_grant_head(&log->l_reserve_head.grant, &cycle, &bytes);
172 return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); 240 return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
@@ -175,12 +243,12 @@ XFS_SYSFS_ATTR_RO(reserve_grant_head);
175 243
176STATIC ssize_t 244STATIC ssize_t
177write_grant_head_show( 245write_grant_head_show(
178 char *buf, 246 struct kobject *kobject,
179 void *data) 247 char *buf)
180{ 248{
181 struct xlog *log = data;
182 int cycle; 249 int cycle;
183 int bytes; 250 int bytes;
251 struct xlog *log = to_xlog(kobject);
184 252
185 xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes); 253 xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &bytes);
186 return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes); 254 return snprintf(buf, PAGE_SIZE, "%d:%d\n", cycle, bytes);
@@ -195,45 +263,8 @@ static struct attribute *xfs_log_attrs[] = {
195 NULL, 263 NULL,
196}; 264};
197 265
198static inline struct xlog *
199to_xlog(struct kobject *kobject)
200{
201 struct xfs_kobj *kobj = to_kobj(kobject);
202 return container_of(kobj, struct xlog, l_kobj);
203}
204
205STATIC ssize_t
206xfs_log_show(
207 struct kobject *kobject,
208 struct attribute *attr,
209 char *buf)
210{
211 struct xlog *log = to_xlog(kobject);
212 struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
213
214 return xfs_attr->show ? xfs_attr->show(buf, log) : 0;
215}
216
217STATIC ssize_t
218xfs_log_store(
219 struct kobject *kobject,
220 struct attribute *attr,
221 const char *buf,
222 size_t count)
223{
224 struct xlog *log = to_xlog(kobject);
225 struct xfs_sysfs_attr *xfs_attr = to_attr(attr);
226
227 return xfs_attr->store ? xfs_attr->store(buf, count, log) : 0;
228}
229
230static struct sysfs_ops xfs_log_ops = {
231 .show = xfs_log_show,
232 .store = xfs_log_store,
233};
234
235struct kobj_type xfs_log_ktype = { 266struct kobj_type xfs_log_ktype = {
236 .release = xfs_sysfs_release, 267 .release = xfs_sysfs_release,
237 .sysfs_ops = &xfs_log_ops, 268 .sysfs_ops = &xfs_sysfs_ops,
238 .default_attrs = xfs_log_attrs, 269 .default_attrs = xfs_log_attrs,
239}; 270};
diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h
index 240eee35f342..be692e59938d 100644
--- a/fs/xfs/xfs_sysfs.h
+++ b/fs/xfs/xfs_sysfs.h
@@ -22,6 +22,7 @@
22extern struct kobj_type xfs_mp_ktype; /* xfs_mount */ 22extern struct kobj_type xfs_mp_ktype; /* xfs_mount */
23extern struct kobj_type xfs_dbg_ktype; /* debug */ 23extern struct kobj_type xfs_dbg_ktype; /* debug */
24extern struct kobj_type xfs_log_ktype; /* xlog */ 24extern struct kobj_type xfs_log_ktype; /* xlog */
25extern struct kobj_type xfs_stats_ktype; /* stats */
25 26
26static inline struct xfs_kobj * 27static inline struct xfs_kobj *
27to_kobj(struct kobject *kobject) 28to_kobj(struct kobject *kobject)
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 5ed36b1e04c1..877079eb0f8f 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -689,6 +689,7 @@ DEFINE_INODE_EVENT(xfs_inode_free_eofblocks_invalid);
689DEFINE_INODE_EVENT(xfs_filemap_fault); 689DEFINE_INODE_EVENT(xfs_filemap_fault);
690DEFINE_INODE_EVENT(xfs_filemap_pmd_fault); 690DEFINE_INODE_EVENT(xfs_filemap_pmd_fault);
691DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite); 691DEFINE_INODE_EVENT(xfs_filemap_page_mkwrite);
692DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite);
692 693
693DECLARE_EVENT_CLASS(xfs_iref_class, 694DECLARE_EVENT_CLASS(xfs_iref_class,
694 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), 695 TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
@@ -1312,6 +1313,7 @@ DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
1312DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); 1313DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
1313DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); 1314DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
1314DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); 1315DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
1316DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof);
1315 1317
1316DECLARE_EVENT_CLASS(xfs_itrunc_class, 1318DECLARE_EVENT_CLASS(xfs_itrunc_class,
1317 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), 1319 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index a0ab1dae9c31..748b16aff45a 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -930,9 +930,9 @@ __xfs_trans_commit(
930 */ 930 */
931 if (sync) { 931 if (sync) {
932 error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); 932 error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);
933 XFS_STATS_INC(xs_trans_sync); 933 XFS_STATS_INC(mp, xs_trans_sync);
934 } else { 934 } else {
935 XFS_STATS_INC(xs_trans_async); 935 XFS_STATS_INC(mp, xs_trans_async);
936 } 936 }
937 937
938 return error; 938 return error;
@@ -955,7 +955,7 @@ out_unreserve:
955 xfs_trans_free_items(tp, NULLCOMMITLSN, !!error); 955 xfs_trans_free_items(tp, NULLCOMMITLSN, !!error);
956 xfs_trans_free(tp); 956 xfs_trans_free(tp);
957 957
958 XFS_STATS_INC(xs_trans_empty); 958 XFS_STATS_INC(mp, xs_trans_empty);
959 return error; 959 return error;
960} 960}
961 961
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 1098cf490189..aa67339b9537 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -349,7 +349,7 @@ xfsaild_push(
349 xfs_ail_min_lsn(ailp))) { 349 xfs_ail_min_lsn(ailp))) {
350 ailp->xa_log_flush = 0; 350 ailp->xa_log_flush = 0;
351 351
352 XFS_STATS_INC(xs_push_ail_flush); 352 XFS_STATS_INC(mp, xs_push_ail_flush);
353 xfs_log_force(mp, XFS_LOG_SYNC); 353 xfs_log_force(mp, XFS_LOG_SYNC);
354 } 354 }
355 355
@@ -371,7 +371,7 @@ xfsaild_push(
371 goto out_done; 371 goto out_done;
372 } 372 }
373 373
374 XFS_STATS_INC(xs_push_ail); 374 XFS_STATS_INC(mp, xs_push_ail);
375 375
376 lsn = lip->li_lsn; 376 lsn = lip->li_lsn;
377 while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) { 377 while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
@@ -385,7 +385,7 @@ xfsaild_push(
385 lock_result = lip->li_ops->iop_push(lip, &ailp->xa_buf_list); 385 lock_result = lip->li_ops->iop_push(lip, &ailp->xa_buf_list);
386 switch (lock_result) { 386 switch (lock_result) {
387 case XFS_ITEM_SUCCESS: 387 case XFS_ITEM_SUCCESS:
388 XFS_STATS_INC(xs_push_ail_success); 388 XFS_STATS_INC(mp, xs_push_ail_success);
389 trace_xfs_ail_push(lip); 389 trace_xfs_ail_push(lip);
390 390
391 ailp->xa_last_pushed_lsn = lsn; 391 ailp->xa_last_pushed_lsn = lsn;
@@ -403,7 +403,7 @@ xfsaild_push(
403 * re-try the flushing relatively soon if most of the 403 * re-try the flushing relatively soon if most of the
404 * AIL is beeing flushed. 404 * AIL is beeing flushed.
405 */ 405 */
406 XFS_STATS_INC(xs_push_ail_flushing); 406 XFS_STATS_INC(mp, xs_push_ail_flushing);
407 trace_xfs_ail_flushing(lip); 407 trace_xfs_ail_flushing(lip);
408 408
409 flushing++; 409 flushing++;
@@ -411,14 +411,14 @@ xfsaild_push(
411 break; 411 break;
412 412
413 case XFS_ITEM_PINNED: 413 case XFS_ITEM_PINNED:
414 XFS_STATS_INC(xs_push_ail_pinned); 414 XFS_STATS_INC(mp, xs_push_ail_pinned);
415 trace_xfs_ail_pinned(lip); 415 trace_xfs_ail_pinned(lip);
416 416
417 stuck++; 417 stuck++;
418 ailp->xa_log_flush++; 418 ailp->xa_log_flush++;
419 break; 419 break;
420 case XFS_ITEM_LOCKED: 420 case XFS_ITEM_LOCKED:
421 XFS_STATS_INC(xs_push_ail_locked); 421 XFS_STATS_INC(mp, xs_push_ail_locked);
422 trace_xfs_ail_locked(lip); 422 trace_xfs_ail_locked(lip);
423 423
424 stuck++; 424 stuck++;
@@ -497,6 +497,7 @@ xfsaild(
497 long tout = 0; /* milliseconds */ 497 long tout = 0; /* milliseconds */
498 498
499 current->flags |= PF_MEMALLOC; 499 current->flags |= PF_MEMALLOC;
500 set_freezable();
500 501
501 while (!kthread_should_stop()) { 502 while (!kthread_should_stop()) {
502 if (tout && tout <= 20) 503 if (tout && tout <= 20)
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 17280cd71934..b97f1df910ab 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -108,6 +108,15 @@ xfs_trans_log_inode(
108 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 108 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
109 109
110 /* 110 /*
111 * Record the specific change for fdatasync optimisation. This
112 * allows fdatasync to skip log forces for inodes that are only
113 * timestamp dirty. We do this before the change count so that
114 * the core being logged in this case does not impact on fdatasync
115 * behaviour.
116 */
117 ip->i_itemp->ili_fsync_fields |= flags;
118
119 /*
111 * First time we log the inode in a transaction, bump the inode change 120 * First time we log the inode in a transaction, bump the inode change
112 * counter if it is configured for this to occur. We don't use 121 * counter if it is configured for this to occur. We don't use
113 * inode_inc_version() because there is no need for extra locking around 122 * inode_inc_version() because there is no need for extra locking around
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index c036815183cb..8294f86441bf 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -53,11 +53,34 @@ xfs_xattr_get(struct dentry *dentry, const char *name,
53 return asize; 53 return asize;
54} 54}
55 55
56void
57xfs_forget_acl(
58 struct inode *inode,
59 const char *name,
60 int xflags)
61{
62 /*
63 * Invalidate any cached ACLs if the user has bypassed the ACL
64 * interface. We don't validate the content whatsoever so it is caller
65 * responsibility to provide data in valid format and ensure i_mode is
66 * consistent.
67 */
68 if (xflags & ATTR_ROOT) {
69#ifdef CONFIG_XFS_POSIX_ACL
70 if (!strcmp(name, SGI_ACL_FILE))
71 forget_cached_acl(inode, ACL_TYPE_ACCESS);
72 else if (!strcmp(name, SGI_ACL_DEFAULT))
73 forget_cached_acl(inode, ACL_TYPE_DEFAULT);
74#endif
75 }
76}
77
56static int 78static int
57xfs_xattr_set(struct dentry *dentry, const char *name, const void *value, 79xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
58 size_t size, int flags, int xflags) 80 size_t size, int flags, int xflags)
59{ 81{
60 struct xfs_inode *ip = XFS_I(d_inode(dentry)); 82 struct xfs_inode *ip = XFS_I(d_inode(dentry));
83 int error;
61 84
62 if (strcmp(name, "") == 0) 85 if (strcmp(name, "") == 0)
63 return -EINVAL; 86 return -EINVAL;
@@ -70,8 +93,12 @@ xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
70 93
71 if (!value) 94 if (!value)
72 return xfs_attr_remove(ip, (unsigned char *)name, xflags); 95 return xfs_attr_remove(ip, (unsigned char *)name, xflags);
73 return xfs_attr_set(ip, (unsigned char *)name, 96 error = xfs_attr_set(ip, (unsigned char *)name,
74 (void *)value, size, xflags); 97 (void *)value, size, xflags);
98 if (!error)
99 xfs_forget_acl(d_inode(dentry), name, xflags);
100
101 return error;
75} 102}
76 103
77static const struct xattr_handler xfs_xattr_user_handler = { 104static const struct xattr_handler xfs_xattr_user_handler = {