aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c4
-rw-r--r--fs/9p/acl.h2
-rw-r--r--fs/9p/cache.c20
-rw-r--r--fs/9p/cache.h9
-rw-r--r--fs/9p/v9fs.c45
-rw-r--r--fs/9p/v9fs.h29
-rw-r--r--fs/9p/v9fs_vfs.h3
-rw-r--r--fs/9p/vfs_file.c22
-rw-r--r--fs/9p/vfs_inode.c124
-rw-r--r--fs/9p/vfs_inode_dotl.c71
-rw-r--r--fs/affs/affs.h2
-rw-r--r--fs/affs/file.c8
-rw-r--r--fs/afs/afs_vl.h2
-rw-r--r--fs/afs/internal.h4
-rw-r--r--fs/afs/security.c6
-rw-r--r--fs/afs/write.c18
-rw-r--r--fs/attr.c6
-rw-r--r--fs/bad_inode.c5
-rw-r--r--fs/binfmt_elf.c3
-rw-r--r--fs/binfmt_elf_fdpic.c3
-rw-r--r--fs/binfmt_misc.c3
-rw-r--r--fs/block_dev.c17
-rw-r--r--fs/btrfs/acl.c5
-rw-r--r--fs/btrfs/ctree.h9
-rw-r--r--fs/btrfs/disk-io.c15
-rw-r--r--fs/btrfs/file.c169
-rw-r--r--fs/btrfs/inode.c25
-rw-r--r--fs/btrfs/ioctl.c16
-rw-r--r--fs/cachefiles/bind.c2
-rw-r--r--fs/ceph/caps.c6
-rw-r--r--fs/ceph/dir.c21
-rw-r--r--fs/ceph/file.c22
-rw-r--r--fs/ceph/inode.c6
-rw-r--r--fs/ceph/super.h5
-rw-r--r--fs/cifs/cifsfs.c11
-rw-r--r--fs/cifs/cifsfs.h4
-rw-r--r--fs/cifs/connect.c5
-rw-r--r--fs/cifs/dir.c14
-rw-r--r--fs/cifs/file.c20
-rw-r--r--fs/cifs/readdir.c2
-rw-r--r--fs/coda/coda_int.h2
-rw-r--r--fs/coda/coda_linux.h2
-rw-r--r--fs/coda/dir.c9
-rw-r--r--fs/coda/file.c8
-rw-r--r--fs/coda/pioctl.c4
-rw-r--r--fs/dcache.c264
-rw-r--r--fs/direct-io.c88
-rw-r--r--fs/dlm/ast.c265
-rw-r--r--fs/dlm/ast.h15
-rw-r--r--fs/dlm/config.c75
-rw-r--r--fs/dlm/config.h2
-rw-r--r--fs/dlm/dlm_internal.h29
-rw-r--r--fs/dlm/lock.c225
-rw-r--r--fs/dlm/lockspace.c177
-rw-r--r--fs/dlm/lowcomms.c9
-rw-r--r--fs/dlm/memory.c22
-rw-r--r--fs/dlm/memory.h2
-rw-r--r--fs/dlm/recoverd.c12
-rw-r--r--fs/dlm/user.c12
-rw-r--r--fs/ecryptfs/file.c7
-rw-r--r--fs/ecryptfs/inode.c37
-rw-r--r--fs/efs/namei.c7
-rw-r--r--fs/exec.c41
-rw-r--r--fs/exofs/file.c10
-rw-r--r--fs/exofs/namei.c7
-rw-r--r--fs/ext2/acl.c4
-rw-r--r--fs/ext2/acl.h2
-rw-r--r--fs/ext2/ext2.h3
-rw-r--r--fs/ext2/file.c4
-rw-r--r--fs/ext2/inode.c6
-rw-r--r--fs/ext2/namei.c14
-rw-r--r--fs/ext3/acl.c4
-rw-r--r--fs/ext3/acl.h2
-rw-r--r--fs/ext3/fsync.c18
-rw-r--r--fs/ext3/inode.c8
-rw-r--r--fs/ext3/namei.c14
-rw-r--r--fs/ext3/super.c2
-rw-r--r--fs/ext4/acl.c4
-rw-r--r--fs/ext4/acl.h2
-rw-r--r--fs/ext4/ext4.h2
-rw-r--r--fs/ext4/file.c21
-rw-r--r--fs/ext4/fsync.c38
-rw-r--r--fs/ext4/inode.c125
-rw-r--r--fs/ext4/namei.c14
-rw-r--r--fs/fat/fat.h4
-rw-r--r--fs/fat/file.c8
-rw-r--r--fs/fat/inode.c10
-rw-r--r--fs/fat/namei_msdos.c29
-rw-r--r--fs/fat/namei_vfat.c6
-rw-r--r--fs/fs-writeback.c28
-rw-r--r--fs/fscache/page.c14
-rw-r--r--fs/fuse/dir.c28
-rw-r--r--fs/fuse/file.c45
-rw-r--r--fs/fuse/fuse_i.h3
-rw-r--r--fs/generic_acl.c4
-rw-r--r--fs/gfs2/acl.c4
-rw-r--r--fs/gfs2/acl.h2
-rw-r--r--fs/gfs2/bmap.c14
-rw-r--r--fs/gfs2/dir.c221
-rw-r--r--fs/gfs2/dir.h1
-rw-r--r--fs/gfs2/file.c23
-rw-r--r--fs/gfs2/glock.c39
-rw-r--r--fs/gfs2/glock.h6
-rw-r--r--fs/gfs2/glops.c7
-rw-r--r--fs/gfs2/incore.h3
-rw-r--r--fs/gfs2/inode.c35
-rw-r--r--fs/gfs2/inode.h2
-rw-r--r--fs/gfs2/main.c1
-rw-r--r--fs/gfs2/ops_fstype.c1
-rw-r--r--fs/gfs2/rgrp.c52
-rw-r--r--fs/gfs2/rgrp.h4
-rw-r--r--fs/gfs2/super.c2
-rw-r--r--fs/hfs/inode.c15
-rw-r--r--fs/hfsplus/brec.c4
-rw-r--r--fs/hfsplus/catalog.c14
-rw-r--r--fs/hfsplus/dir.c8
-rw-r--r--fs/hfsplus/extents.c50
-rw-r--r--fs/hfsplus/hfsplus_fs.h21
-rw-r--r--fs/hfsplus/inode.c28
-rw-r--r--fs/hfsplus/part_tbl.c32
-rw-r--r--fs/hfsplus/super.c43
-rw-r--r--fs/hfsplus/unicode.c35
-rw-r--r--fs/hfsplus/wrapper.c92
-rw-r--r--fs/hostfs/hostfs_kern.c21
-rw-r--r--fs/hpfs/dir.c4
-rw-r--r--fs/hpfs/file.c7
-rw-r--r--fs/hpfs/hpfs_fn.h2
-rw-r--r--fs/hpfs/namei.c2
-rw-r--r--fs/hppfs/hppfs.c5
-rw-r--r--fs/inode.c129
-rw-r--r--fs/internal.h6
-rw-r--r--fs/isofs/dir.c3
-rw-r--r--fs/isofs/inode.c1
-rw-r--r--fs/isofs/isofs.h1
-rw-r--r--fs/isofs/namei.c13
-rw-r--r--fs/isofs/rock.c3
-rw-r--r--fs/jffs2/acl.c4
-rw-r--r--fs/jffs2/acl.h2
-rw-r--r--fs/jffs2/dir.c9
-rw-r--r--fs/jffs2/file.c9
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jfs/acl.c4
-rw-r--r--fs/jfs/file.c11
-rw-r--r--fs/jfs/inode.c4
-rw-r--r--fs/jfs/jfs_acl.h2
-rw-r--r--fs/jfs/jfs_inode.h2
-rw-r--r--fs/jfs/namei.c49
-rw-r--r--fs/libfs.c26
-rw-r--r--fs/logfs/dir.c4
-rw-r--r--fs/logfs/file.c11
-rw-r--r--fs/logfs/logfs.h2
-rw-r--r--fs/minix/inode.c3
-rw-r--r--fs/namei.c462
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/ncpfs/file.c4
-rw-r--r--fs/nfs/cache_lib.c9
-rw-r--r--fs/nfs/dir.c87
-rw-r--r--fs/nfs/direct.c4
-rw-r--r--fs/nfs/file.c18
-rw-r--r--fs/nfs/inode.c20
-rw-r--r--fs/nfs/nfs4_fs.h10
-rw-r--r--fs/nfs/nfs4proc.c70
-rw-r--r--fs/nfs/nfs4state.c12
-rw-r--r--fs/nfs/pagelist.c4
-rw-r--r--fs/nfs/read.c8
-rw-r--r--fs/nfs/super.c16
-rw-r--r--fs/nfs/write.c22
-rw-r--r--fs/nfsd/nfs4recover.c52
-rw-r--r--fs/nilfs2/file.c12
-rw-r--r--fs/nilfs2/inode.c10
-rw-r--r--fs/nilfs2/namei.c7
-rw-r--r--fs/nilfs2/nilfs.h4
-rw-r--r--fs/ntfs/dir.c10
-rw-r--r--fs/ntfs/file.c13
-rw-r--r--fs/ntfs/inode.c10
-rw-r--r--fs/ocfs2/acl.c4
-rw-r--r--fs/ocfs2/acl.h2
-rw-r--r--fs/ocfs2/aops.c10
-rw-r--r--fs/ocfs2/file.c41
-rw-r--r--fs/ocfs2/file.h2
-rw-r--r--fs/ocfs2/namei.c1
-rw-r--r--fs/ocfs2/refcounttree.c49
-rw-r--r--fs/open.c2
-rw-r--r--fs/proc/array.c2
-rw-r--r--fs/proc/base.c8
-rw-r--r--fs/proc/proc_sysctl.c4
-rw-r--r--fs/read_write.c44
-rw-r--r--fs/reiserfs/dir.c13
-rw-r--r--fs/reiserfs/file.c10
-rw-r--r--fs/reiserfs/inode.c8
-rw-r--r--fs/reiserfs/namei.c4
-rw-r--r--fs/reiserfs/super.c1
-rw-r--r--fs/reiserfs/xattr.c25
-rw-r--r--fs/squashfs/namei.c10
-rw-r--r--fs/super.c176
-rw-r--r--fs/sync.c25
-rw-r--r--fs/sysfs/inode.c6
-rw-r--r--fs/sysfs/sysfs.h2
-rw-r--r--fs/ubifs/commit.c8
-rw-r--r--fs/ubifs/debug.c762
-rw-r--r--fs/ubifs/debug.h241
-rw-r--r--fs/ubifs/dir.c16
-rw-r--r--fs/ubifs/file.c23
-rw-r--r--fs/ubifs/io.c168
-rw-r--r--fs/ubifs/log.c6
-rw-r--r--fs/ubifs/lprops.c8
-rw-r--r--fs/ubifs/lpt.c37
-rw-r--r--fs/ubifs/lpt_commit.c40
-rw-r--r--fs/ubifs/misc.h103
-rw-r--r--fs/ubifs/orphan.c2
-rw-r--r--fs/ubifs/recovery.c43
-rw-r--r--fs/ubifs/replay.c3
-rw-r--r--fs/ubifs/sb.c6
-rw-r--r--fs/ubifs/scan.c4
-rw-r--r--fs/ubifs/super.c6
-rw-r--r--fs/ubifs/tnc.c26
-rw-r--r--fs/ubifs/tnc_commit.c145
-rw-r--r--fs/ubifs/ubifs.h23
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/ufs/namei.c2
-rw-r--r--fs/xfs/Makefile2
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c23
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c79
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h64
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c433
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c55
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c81
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h13
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h60
-rw-r--r--fs/xfs/quota/xfs_dquot.c48
-rw-r--r--fs/xfs/quota/xfs_dquot.h6
-rw-r--r--fs/xfs/quota/xfs_qm.c49
-rw-r--r--fs/xfs/quota/xfs_qm.h6
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c355
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c15
-rw-r--r--fs/xfs/xfs.h1
-rw-r--r--fs/xfs/xfs_acl.h2
-rw-r--r--fs/xfs/xfs_alloc.c14
-rw-r--r--fs/xfs/xfs_alloc_btree.c84
-rw-r--r--fs/xfs/xfs_arch.h136
-rw-r--r--fs/xfs/xfs_attr.c41
-rw-r--r--fs/xfs/xfs_attr_leaf.c60
-rw-r--r--fs/xfs/xfs_bmap.c41
-rw-r--r--fs/xfs/xfs_bmap_btree.c106
-rw-r--r--fs/xfs/xfs_btree.c29
-rw-r--r--fs/xfs/xfs_btree.h38
-rw-r--r--fs/xfs/xfs_btree_trace.c249
-rw-r--r--fs/xfs/xfs_btree_trace.h99
-rw-r--r--fs/xfs/xfs_buf_item.c75
-rw-r--r--fs/xfs/xfs_da_btree.c272
-rw-r--r--fs/xfs/xfs_da_btree.h13
-rw-r--r--fs/xfs/xfs_dir2.c140
-rw-r--r--fs/xfs/xfs_dir2.h54
-rw-r--r--fs/xfs/xfs_dir2_block.c253
-rw-r--r--fs/xfs/xfs_dir2_block.h92
-rw-r--r--fs/xfs/xfs_dir2_data.c327
-rw-r--r--fs/xfs/xfs_dir2_data.h184
-rw-r--r--fs/xfs/xfs_dir2_format.h597
-rw-r--r--fs/xfs/xfs_dir2_leaf.c417
-rw-r--r--fs/xfs/xfs_dir2_leaf.h253
-rw-r--r--fs/xfs/xfs_dir2_node.c201
-rw-r--r--fs/xfs/xfs_dir2_node.h100
-rw-r--r--fs/xfs/xfs_dir2_priv.h135
-rw-r--r--fs/xfs/xfs_dir2_sf.c338
-rw-r--r--fs/xfs/xfs_dir2_sf.h171
-rw-r--r--fs/xfs/xfs_fs.h5
-rw-r--r--fs/xfs/xfs_ialloc.c14
-rw-r--r--fs/xfs/xfs_ialloc_btree.c75
-rw-r--r--fs/xfs/xfs_iget.c1
-rw-r--r--fs/xfs/xfs_inode.c537
-rw-r--r--fs/xfs/xfs_inode.h25
-rw-r--r--fs/xfs/xfs_inode_item.c17
-rw-r--r--fs/xfs/xfs_inum.h11
-rw-r--r--fs/xfs/xfs_log.c64
-rw-r--r--fs/xfs/xfs_log_recover.c38
-rw-r--r--fs/xfs/xfs_mount.c71
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_trans.c27
-rw-r--r--fs/xfs/xfs_trans_ail.c214
-rw-r--r--fs/xfs/xfs_trans_buf.c118
-rw-r--r--fs/xfs/xfs_trans_inode.c9
-rw-r--r--fs/xfs/xfs_trans_priv.h14
-rw-r--r--fs/xfs/xfs_vnodeops.c479
-rw-r--r--fs/xfs/xfs_vnodeops.h3
288 files changed, 6651 insertions, 7571 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 535ab6eccb1a..e98f56d3787d 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -96,12 +96,12 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
96 return acl; 96 return acl;
97} 97}
98 98
99int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags) 99int v9fs_check_acl(struct inode *inode, int mask)
100{ 100{
101 struct posix_acl *acl; 101 struct posix_acl *acl;
102 struct v9fs_session_info *v9ses; 102 struct v9fs_session_info *v9ses;
103 103
104 if (flags & IPERM_FLAG_RCU) 104 if (mask & MAY_NOT_BLOCK)
105 return -ECHILD; 105 return -ECHILD;
106 106
107 v9ses = v9fs_inode2v9ses(inode); 107 v9ses = v9fs_inode2v9ses(inode);
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index 7ef3ac9f6d95..59e18c2e8c7e 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -16,7 +16,7 @@
16 16
17#ifdef CONFIG_9P_FS_POSIX_ACL 17#ifdef CONFIG_9P_FS_POSIX_ACL
18extern int v9fs_get_acl(struct inode *, struct p9_fid *); 18extern int v9fs_get_acl(struct inode *, struct p9_fid *);
19extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags); 19extern int v9fs_check_acl(struct inode *inode, int mask);
20extern int v9fs_acl_chmod(struct dentry *); 20extern int v9fs_acl_chmod(struct dentry *);
21extern int v9fs_set_create_acl(struct dentry *, 21extern int v9fs_set_create_acl(struct dentry *,
22 struct posix_acl *, struct posix_acl *); 22 struct posix_acl *, struct posix_acl *);
diff --git a/fs/9p/cache.c b/fs/9p/cache.c
index 5b335c5086a1..945aa5f02f9b 100644
--- a/fs/9p/cache.c
+++ b/fs/9p/cache.c
@@ -108,11 +108,10 @@ static uint16_t v9fs_cache_inode_get_key(const void *cookie_netfs_data,
108 void *buffer, uint16_t bufmax) 108 void *buffer, uint16_t bufmax)
109{ 109{
110 const struct v9fs_inode *v9inode = cookie_netfs_data; 110 const struct v9fs_inode *v9inode = cookie_netfs_data;
111 memcpy(buffer, &v9inode->fscache_key->path, 111 memcpy(buffer, &v9inode->qid.path, sizeof(v9inode->qid.path));
112 sizeof(v9inode->fscache_key->path));
113 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode, 112 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get key %llu", &v9inode->vfs_inode,
114 v9inode->fscache_key->path); 113 v9inode->qid.path);
115 return sizeof(v9inode->fscache_key->path); 114 return sizeof(v9inode->qid.path);
116} 115}
117 116
118static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data, 117static void v9fs_cache_inode_get_attr(const void *cookie_netfs_data,
@@ -129,11 +128,10 @@ static uint16_t v9fs_cache_inode_get_aux(const void *cookie_netfs_data,
129 void *buffer, uint16_t buflen) 128 void *buffer, uint16_t buflen)
130{ 129{
131 const struct v9fs_inode *v9inode = cookie_netfs_data; 130 const struct v9fs_inode *v9inode = cookie_netfs_data;
132 memcpy(buffer, &v9inode->fscache_key->version, 131 memcpy(buffer, &v9inode->qid.version, sizeof(v9inode->qid.version));
133 sizeof(v9inode->fscache_key->version));
134 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode, 132 P9_DPRINTK(P9_DEBUG_FSC, "inode %p get aux %u", &v9inode->vfs_inode,
135 v9inode->fscache_key->version); 133 v9inode->qid.version);
136 return sizeof(v9inode->fscache_key->version); 134 return sizeof(v9inode->qid.version);
137} 135}
138 136
139static enum 137static enum
@@ -143,11 +141,11 @@ fscache_checkaux v9fs_cache_inode_check_aux(void *cookie_netfs_data,
143{ 141{
144 const struct v9fs_inode *v9inode = cookie_netfs_data; 142 const struct v9fs_inode *v9inode = cookie_netfs_data;
145 143
146 if (buflen != sizeof(v9inode->fscache_key->version)) 144 if (buflen != sizeof(v9inode->qid.version))
147 return FSCACHE_CHECKAUX_OBSOLETE; 145 return FSCACHE_CHECKAUX_OBSOLETE;
148 146
149 if (memcmp(buffer, &v9inode->fscache_key->version, 147 if (memcmp(buffer, &v9inode->qid.version,
150 sizeof(v9inode->fscache_key->version))) 148 sizeof(v9inode->qid.version)))
151 return FSCACHE_CHECKAUX_OBSOLETE; 149 return FSCACHE_CHECKAUX_OBSOLETE;
152 150
153 return FSCACHE_CHECKAUX_OKAY; 151 return FSCACHE_CHECKAUX_OKAY;
diff --git a/fs/9p/cache.h b/fs/9p/cache.h
index 049507a5b01c..40cc54ced5d9 100644
--- a/fs/9p/cache.h
+++ b/fs/9p/cache.h
@@ -93,15 +93,6 @@ static inline void v9fs_uncache_page(struct inode *inode, struct page *page)
93 BUG_ON(PageFsCache(page)); 93 BUG_ON(PageFsCache(page));
94} 94}
95 95
96static inline void v9fs_fscache_set_key(struct inode *inode,
97 struct p9_qid *qid)
98{
99 struct v9fs_inode *v9inode = V9FS_I(inode);
100 spin_lock(&v9inode->fscache_lock);
101 v9inode->fscache_key = qid;
102 spin_unlock(&v9inode->fscache_lock);
103}
104
105static inline void v9fs_fscache_wait_on_page_write(struct inode *inode, 96static inline void v9fs_fscache_wait_on_page_write(struct inode *inode,
106 struct page *page) 97 struct page *page)
107{ 98{
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index c82b017f51f3..ef9661886112 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -78,6 +78,25 @@ static const match_table_t tokens = {
78 {Opt_err, NULL} 78 {Opt_err, NULL}
79}; 79};
80 80
81/* Interpret mount options for cache mode */
82static int get_cache_mode(char *s)
83{
84 int version = -EINVAL;
85
86 if (!strcmp(s, "loose")) {
87 version = CACHE_LOOSE;
88 P9_DPRINTK(P9_DEBUG_9P, "Cache mode: loose\n");
89 } else if (!strcmp(s, "fscache")) {
90 version = CACHE_FSCACHE;
91 P9_DPRINTK(P9_DEBUG_9P, "Cache mode: fscache\n");
92 } else if (!strcmp(s, "none")) {
93 version = CACHE_NONE;
94 P9_DPRINTK(P9_DEBUG_9P, "Cache mode: none\n");
95 } else
96 printk(KERN_INFO "9p: Unknown Cache mode %s.\n", s);
97 return version;
98}
99
81/** 100/**
82 * v9fs_parse_options - parse mount options into session structure 101 * v9fs_parse_options - parse mount options into session structure
83 * @v9ses: existing v9fs session information 102 * @v9ses: existing v9fs session information
@@ -97,7 +116,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
97 /* setup defaults */ 116 /* setup defaults */
98 v9ses->afid = ~0; 117 v9ses->afid = ~0;
99 v9ses->debug = 0; 118 v9ses->debug = 0;
100 v9ses->cache = 0; 119 v9ses->cache = CACHE_NONE;
101#ifdef CONFIG_9P_FSCACHE 120#ifdef CONFIG_9P_FSCACHE
102 v9ses->cachetag = NULL; 121 v9ses->cachetag = NULL;
103#endif 122#endif
@@ -171,13 +190,13 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
171 "problem allocating copy of cache arg\n"); 190 "problem allocating copy of cache arg\n");
172 goto free_and_return; 191 goto free_and_return;
173 } 192 }
193 ret = get_cache_mode(s);
194 if (ret == -EINVAL) {
195 kfree(s);
196 goto free_and_return;
197 }
174 198
175 if (strcmp(s, "loose") == 0) 199 v9ses->cache = ret;
176 v9ses->cache = CACHE_LOOSE;
177 else if (strcmp(s, "fscache") == 0)
178 v9ses->cache = CACHE_FSCACHE;
179 else
180 v9ses->cache = CACHE_NONE;
181 kfree(s); 200 kfree(s);
182 break; 201 break;
183 202
@@ -200,9 +219,15 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
200 } else { 219 } else {
201 v9ses->flags |= V9FS_ACCESS_SINGLE; 220 v9ses->flags |= V9FS_ACCESS_SINGLE;
202 v9ses->uid = simple_strtoul(s, &e, 10); 221 v9ses->uid = simple_strtoul(s, &e, 10);
203 if (*e != '\0') 222 if (*e != '\0') {
204 v9ses->uid = ~0; 223 ret = -EINVAL;
224 printk(KERN_INFO "9p: Unknown access "
225 "argument %s.\n", s);
226 kfree(s);
227 goto free_and_return;
228 }
205 } 229 }
230
206 kfree(s); 231 kfree(s);
207 break; 232 break;
208 233
@@ -487,8 +512,8 @@ static void v9fs_inode_init_once(void *foo)
487 struct v9fs_inode *v9inode = (struct v9fs_inode *)foo; 512 struct v9fs_inode *v9inode = (struct v9fs_inode *)foo;
488#ifdef CONFIG_9P_FSCACHE 513#ifdef CONFIG_9P_FSCACHE
489 v9inode->fscache = NULL; 514 v9inode->fscache = NULL;
490 v9inode->fscache_key = NULL;
491#endif 515#endif
516 memset(&v9inode->qid, 0, sizeof(v9inode->qid));
492 inode_init_once(&v9inode->vfs_inode); 517 inode_init_once(&v9inode->vfs_inode);
493} 518}
494 519
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index e5ebedfc5ed8..e78956cbd702 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -125,8 +125,8 @@ struct v9fs_inode {
125#ifdef CONFIG_9P_FSCACHE 125#ifdef CONFIG_9P_FSCACHE
126 spinlock_t fscache_lock; 126 spinlock_t fscache_lock;
127 struct fscache_cookie *fscache; 127 struct fscache_cookie *fscache;
128 struct p9_qid *fscache_key;
129#endif 128#endif
129 struct p9_qid qid;
130 unsigned int cache_validity; 130 unsigned int cache_validity;
131 struct p9_fid *writeback_fid; 131 struct p9_fid *writeback_fid;
132 struct mutex v_mutex; 132 struct mutex v_mutex;
@@ -153,13 +153,13 @@ extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd,
153 void *p); 153 void *p);
154extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses, 154extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
155 struct p9_fid *fid, 155 struct p9_fid *fid,
156 struct super_block *sb); 156 struct super_block *sb, int new);
157extern const struct inode_operations v9fs_dir_inode_operations_dotl; 157extern const struct inode_operations v9fs_dir_inode_operations_dotl;
158extern const struct inode_operations v9fs_file_inode_operations_dotl; 158extern const struct inode_operations v9fs_file_inode_operations_dotl;
159extern const struct inode_operations v9fs_symlink_inode_operations_dotl; 159extern const struct inode_operations v9fs_symlink_inode_operations_dotl;
160extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, 160extern struct inode *v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses,
161 struct p9_fid *fid, 161 struct p9_fid *fid,
162 struct super_block *sb); 162 struct super_block *sb, int new);
163 163
164/* other default globals */ 164/* other default globals */
165#define V9FS_PORT 564 165#define V9FS_PORT 564
@@ -201,8 +201,27 @@ v9fs_get_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
201 struct super_block *sb) 201 struct super_block *sb)
202{ 202{
203 if (v9fs_proto_dotl(v9ses)) 203 if (v9fs_proto_dotl(v9ses))
204 return v9fs_inode_from_fid_dotl(v9ses, fid, sb); 204 return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 0);
205 else 205 else
206 return v9fs_inode_from_fid(v9ses, fid, sb); 206 return v9fs_inode_from_fid(v9ses, fid, sb, 0);
207} 207}
208
209/**
210 * v9fs_get_new_inode_from_fid - Helper routine to populate an inode by
211 * issuing a attribute request
212 * @v9ses: session information
213 * @fid: fid to issue attribute request for
214 * @sb: superblock on which to create inode
215 *
216 */
217static inline struct inode *
218v9fs_get_new_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
219 struct super_block *sb)
220{
221 if (v9fs_proto_dotl(v9ses))
222 return v9fs_inode_from_fid_dotl(v9ses, fid, sb, 1);
223 else
224 return v9fs_inode_from_fid(v9ses, fid, sb, 1);
225}
226
208#endif 227#endif
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 4014160903a9..46ce357ca1ab 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -70,7 +70,8 @@ ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
70ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64); 70ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
71void v9fs_blank_wstat(struct p9_wstat *wstat); 71void v9fs_blank_wstat(struct p9_wstat *wstat);
72int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); 72int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
73int v9fs_file_fsync_dotl(struct file *filp, int datasync); 73int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
74 int datasync);
74ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *, 75ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
75 const char __user *, size_t, loff_t *, int); 76 const char __user *, size_t, loff_t *, int);
76int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode); 77int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index ffed55817f0c..3c173fcc2c5a 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -519,32 +519,50 @@ out:
519} 519}
520 520
521 521
522static int v9fs_file_fsync(struct file *filp, int datasync) 522static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
523 int datasync)
523{ 524{
524 struct p9_fid *fid; 525 struct p9_fid *fid;
526 struct inode *inode = filp->f_mapping->host;
525 struct p9_wstat wstat; 527 struct p9_wstat wstat;
526 int retval; 528 int retval;
527 529
530 retval = filemap_write_and_wait_range(inode->i_mapping, start, end);
531 if (retval)
532 return retval;
533
534 mutex_lock(&inode->i_mutex);
528 P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync); 535 P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
529 536
530 fid = filp->private_data; 537 fid = filp->private_data;
531 v9fs_blank_wstat(&wstat); 538 v9fs_blank_wstat(&wstat);
532 539
533 retval = p9_client_wstat(fid, &wstat); 540 retval = p9_client_wstat(fid, &wstat);
541 mutex_unlock(&inode->i_mutex);
542
534 return retval; 543 return retval;
535} 544}
536 545
537int v9fs_file_fsync_dotl(struct file *filp, int datasync) 546int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
547 int datasync)
538{ 548{
539 struct p9_fid *fid; 549 struct p9_fid *fid;
550 struct inode *inode = filp->f_mapping->host;
540 int retval; 551 int retval;
541 552
553 retval = filemap_write_and_wait_range(inode->i_mapping, start, end);
554 if (retval)
555 return retval;
556
557 mutex_lock(&inode->i_mutex);
542 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n", 558 P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n",
543 filp, datasync); 559 filp, datasync);
544 560
545 fid = filp->private_data; 561 fid = filp->private_data;
546 562
547 retval = p9_client_fsync(fid, datasync); 563 retval = p9_client_fsync(fid, datasync);
564 mutex_unlock(&inode->i_mutex);
565
548 return retval; 566 return retval;
549} 567}
550 568
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 7f6c67703195..8bb5507e822f 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -216,7 +216,6 @@ struct inode *v9fs_alloc_inode(struct super_block *sb)
216 return NULL; 216 return NULL;
217#ifdef CONFIG_9P_FSCACHE 217#ifdef CONFIG_9P_FSCACHE
218 v9inode->fscache = NULL; 218 v9inode->fscache = NULL;
219 v9inode->fscache_key = NULL;
220 spin_lock_init(&v9inode->fscache_lock); 219 spin_lock_init(&v9inode->fscache_lock);
221#endif 220#endif
222 v9inode->writeback_fid = NULL; 221 v9inode->writeback_fid = NULL;
@@ -433,17 +432,60 @@ void v9fs_evict_inode(struct inode *inode)
433 } 432 }
434} 433}
435 434
435static int v9fs_test_inode(struct inode *inode, void *data)
436{
437 int umode;
438 struct v9fs_inode *v9inode = V9FS_I(inode);
439 struct p9_wstat *st = (struct p9_wstat *)data;
440 struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
441
442 umode = p9mode2unixmode(v9ses, st->mode);
443 /* don't match inode of different type */
444 if ((inode->i_mode & S_IFMT) != (umode & S_IFMT))
445 return 0;
446
447 /* compare qid details */
448 if (memcmp(&v9inode->qid.version,
449 &st->qid.version, sizeof(v9inode->qid.version)))
450 return 0;
451
452 if (v9inode->qid.type != st->qid.type)
453 return 0;
454 return 1;
455}
456
457static int v9fs_test_new_inode(struct inode *inode, void *data)
458{
459 return 0;
460}
461
462static int v9fs_set_inode(struct inode *inode, void *data)
463{
464 struct v9fs_inode *v9inode = V9FS_I(inode);
465 struct p9_wstat *st = (struct p9_wstat *)data;
466
467 memcpy(&v9inode->qid, &st->qid, sizeof(st->qid));
468 return 0;
469}
470
436static struct inode *v9fs_qid_iget(struct super_block *sb, 471static struct inode *v9fs_qid_iget(struct super_block *sb,
437 struct p9_qid *qid, 472 struct p9_qid *qid,
438 struct p9_wstat *st) 473 struct p9_wstat *st,
474 int new)
439{ 475{
440 int retval, umode; 476 int retval, umode;
441 unsigned long i_ino; 477 unsigned long i_ino;
442 struct inode *inode; 478 struct inode *inode;
443 struct v9fs_session_info *v9ses = sb->s_fs_info; 479 struct v9fs_session_info *v9ses = sb->s_fs_info;
480 int (*test)(struct inode *, void *);
481
482 if (new)
483 test = v9fs_test_new_inode;
484 else
485 test = v9fs_test_inode;
444 486
445 i_ino = v9fs_qid2ino(qid); 487 i_ino = v9fs_qid2ino(qid);
446 inode = iget_locked(sb, i_ino); 488 inode = iget5_locked(sb, i_ino, test, v9fs_set_inode, st);
447 if (!inode) 489 if (!inode)
448 return ERR_PTR(-ENOMEM); 490 return ERR_PTR(-ENOMEM);
449 if (!(inode->i_state & I_NEW)) 491 if (!(inode->i_state & I_NEW))
@@ -453,6 +495,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
453 * FIXME!! we may need support for stale inodes 495 * FIXME!! we may need support for stale inodes
454 * later. 496 * later.
455 */ 497 */
498 inode->i_ino = i_ino;
456 umode = p9mode2unixmode(v9ses, st->mode); 499 umode = p9mode2unixmode(v9ses, st->mode);
457 retval = v9fs_init_inode(v9ses, inode, umode); 500 retval = v9fs_init_inode(v9ses, inode, umode);
458 if (retval) 501 if (retval)
@@ -460,7 +503,6 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
460 503
461 v9fs_stat2inode(st, inode, sb); 504 v9fs_stat2inode(st, inode, sb);
462#ifdef CONFIG_9P_FSCACHE 505#ifdef CONFIG_9P_FSCACHE
463 v9fs_fscache_set_key(inode, &st->qid);
464 v9fs_cache_inode_get_cookie(inode); 506 v9fs_cache_inode_get_cookie(inode);
465#endif 507#endif
466 unlock_new_inode(inode); 508 unlock_new_inode(inode);
@@ -474,7 +516,7 @@ error:
474 516
475struct inode * 517struct inode *
476v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, 518v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
477 struct super_block *sb) 519 struct super_block *sb, int new)
478{ 520{
479 struct p9_wstat *st; 521 struct p9_wstat *st;
480 struct inode *inode = NULL; 522 struct inode *inode = NULL;
@@ -483,7 +525,7 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
483 if (IS_ERR(st)) 525 if (IS_ERR(st))
484 return ERR_CAST(st); 526 return ERR_CAST(st);
485 527
486 inode = v9fs_qid_iget(sb, &st->qid, st); 528 inode = v9fs_qid_iget(sb, &st->qid, st, new);
487 p9stat_free(st); 529 p9stat_free(st);
488 kfree(st); 530 kfree(st);
489 return inode; 531 return inode;
@@ -492,38 +534,50 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
492/** 534/**
493 * v9fs_remove - helper function to remove files and directories 535 * v9fs_remove - helper function to remove files and directories
494 * @dir: directory inode that is being deleted 536 * @dir: directory inode that is being deleted
495 * @file: dentry that is being deleted 537 * @dentry: dentry that is being deleted
496 * @rmdir: removing a directory 538 * @rmdir: removing a directory
497 * 539 *
498 */ 540 */
499 541
500static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir) 542static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
501{ 543{
502 int retval; 544 struct inode *inode;
503 struct p9_fid *v9fid; 545 int retval = -EOPNOTSUPP;
504 struct inode *file_inode; 546 struct p9_fid *v9fid, *dfid;
505 547 struct v9fs_session_info *v9ses;
506 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
507 rmdir);
508 548
509 file_inode = file->d_inode; 549 P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %x\n",
510 v9fid = v9fs_fid_clone(file); 550 dir, dentry, flags);
511 if (IS_ERR(v9fid))
512 return PTR_ERR(v9fid);
513 551
514 retval = p9_client_remove(v9fid); 552 v9ses = v9fs_inode2v9ses(dir);
553 inode = dentry->d_inode;
554 dfid = v9fs_fid_lookup(dentry->d_parent);
555 if (IS_ERR(dfid)) {
556 retval = PTR_ERR(dfid);
557 P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", retval);
558 return retval;
559 }
560 if (v9fs_proto_dotl(v9ses))
561 retval = p9_client_unlinkat(dfid, dentry->d_name.name, flags);
562 if (retval == -EOPNOTSUPP) {
563 /* Try the one based on path */
564 v9fid = v9fs_fid_clone(dentry);
565 if (IS_ERR(v9fid))
566 return PTR_ERR(v9fid);
567 retval = p9_client_remove(v9fid);
568 }
515 if (!retval) { 569 if (!retval) {
516 /* 570 /*
517 * directories on unlink should have zero 571 * directories on unlink should have zero
518 * link count 572 * link count
519 */ 573 */
520 if (rmdir) { 574 if (flags & AT_REMOVEDIR) {
521 clear_nlink(file_inode); 575 clear_nlink(inode);
522 drop_nlink(dir); 576 drop_nlink(dir);
523 } else 577 } else
524 drop_nlink(file_inode); 578 drop_nlink(inode);
525 579
526 v9fs_invalidate_inode_attr(file_inode); 580 v9fs_invalidate_inode_attr(inode);
527 v9fs_invalidate_inode_attr(dir); 581 v9fs_invalidate_inode_attr(dir);
528 } 582 }
529 return retval; 583 return retval;
@@ -585,7 +639,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
585 } 639 }
586 640
587 /* instantiate inode and assign the unopened fid to the dentry */ 641 /* instantiate inode and assign the unopened fid to the dentry */
588 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); 642 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
589 if (IS_ERR(inode)) { 643 if (IS_ERR(inode)) {
590 err = PTR_ERR(inode); 644 err = PTR_ERR(inode);
591 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 645 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -633,8 +687,8 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
633 fid = NULL; 687 fid = NULL;
634 v9ses = v9fs_inode2v9ses(dir); 688 v9ses = v9fs_inode2v9ses(dir);
635 perm = unixmode2p9mode(v9ses, mode); 689 perm = unixmode2p9mode(v9ses, mode);
636 if (nd && nd->flags & LOOKUP_OPEN) 690 if (nd)
637 flags = nd->intent.open.flags - 1; 691 flags = nd->intent.open.flags;
638 else 692 else
639 flags = O_RDWR; 693 flags = O_RDWR;
640 694
@@ -649,7 +703,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
649 703
650 v9fs_invalidate_inode_attr(dir); 704 v9fs_invalidate_inode_attr(dir);
651 /* if we are opening a file, assign the open fid to the file */ 705 /* if we are opening a file, assign the open fid to the file */
652 if (nd && nd->flags & LOOKUP_OPEN) { 706 if (nd) {
653 v9inode = V9FS_I(dentry->d_inode); 707 v9inode = V9FS_I(dentry->d_inode);
654 mutex_lock(&v9inode->v_mutex); 708 mutex_lock(&v9inode->v_mutex);
655 if (v9ses->cache && !v9inode->writeback_fid && 709 if (v9ses->cache && !v9inode->writeback_fid &&
@@ -814,7 +868,7 @@ int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
814 868
815int v9fs_vfs_rmdir(struct inode *i, struct dentry *d) 869int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
816{ 870{
817 return v9fs_remove(i, d, 1); 871 return v9fs_remove(i, d, AT_REMOVEDIR);
818} 872}
819 873
820/** 874/**
@@ -862,9 +916,12 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
862 916
863 down_write(&v9ses->rename_sem); 917 down_write(&v9ses->rename_sem);
864 if (v9fs_proto_dotl(v9ses)) { 918 if (v9fs_proto_dotl(v9ses)) {
865 retval = p9_client_rename(oldfid, newdirfid, 919 retval = p9_client_renameat(olddirfid, old_dentry->d_name.name,
866 (char *) new_dentry->d_name.name); 920 newdirfid, new_dentry->d_name.name);
867 if (retval != -ENOSYS) 921 if (retval == -EOPNOTSUPP)
922 retval = p9_client_rename(oldfid, newdirfid,
923 new_dentry->d_name.name);
924 if (retval != -EOPNOTSUPP)
868 goto clunk_newdir; 925 goto clunk_newdir;
869 } 926 }
870 if (old_dentry->d_parent != new_dentry->d_parent) { 927 if (old_dentry->d_parent != new_dentry->d_parent) {
@@ -889,11 +946,6 @@ clunk_newdir:
889 clear_nlink(new_inode); 946 clear_nlink(new_inode);
890 else 947 else
891 drop_nlink(new_inode); 948 drop_nlink(new_inode);
892 /*
893 * Work around vfs rename rehash bug with
894 * FS_RENAME_DOES_D_MOVE
895 */
896 v9fs_invalidate_inode_attr(new_inode);
897 } 949 }
898 if (S_ISDIR(old_inode->i_mode)) { 950 if (S_ISDIR(old_inode->i_mode)) {
899 if (!new_inode) 951 if (!new_inode)
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 691c78f58bef..276f4a69ecd4 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -86,18 +86,63 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
86 return dentry; 86 return dentry;
87} 87}
88 88
89static int v9fs_test_inode_dotl(struct inode *inode, void *data)
90{
91 struct v9fs_inode *v9inode = V9FS_I(inode);
92 struct p9_stat_dotl *st = (struct p9_stat_dotl *)data;
93
94 /* don't match inode of different type */
95 if ((inode->i_mode & S_IFMT) != (st->st_mode & S_IFMT))
96 return 0;
97
98 if (inode->i_generation != st->st_gen)
99 return 0;
100
101 /* compare qid details */
102 if (memcmp(&v9inode->qid.version,
103 &st->qid.version, sizeof(v9inode->qid.version)))
104 return 0;
105
106 if (v9inode->qid.type != st->qid.type)
107 return 0;
108 return 1;
109}
110
111/* Always get a new inode */
112static int v9fs_test_new_inode_dotl(struct inode *inode, void *data)
113{
114 return 0;
115}
116
117static int v9fs_set_inode_dotl(struct inode *inode, void *data)
118{
119 struct v9fs_inode *v9inode = V9FS_I(inode);
120 struct p9_stat_dotl *st = (struct p9_stat_dotl *)data;
121
122 memcpy(&v9inode->qid, &st->qid, sizeof(st->qid));
123 inode->i_generation = st->st_gen;
124 return 0;
125}
126
89static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, 127static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
90 struct p9_qid *qid, 128 struct p9_qid *qid,
91 struct p9_fid *fid, 129 struct p9_fid *fid,
92 struct p9_stat_dotl *st) 130 struct p9_stat_dotl *st,
131 int new)
93{ 132{
94 int retval; 133 int retval;
95 unsigned long i_ino; 134 unsigned long i_ino;
96 struct inode *inode; 135 struct inode *inode;
97 struct v9fs_session_info *v9ses = sb->s_fs_info; 136 struct v9fs_session_info *v9ses = sb->s_fs_info;
137 int (*test)(struct inode *, void *);
138
139 if (new)
140 test = v9fs_test_new_inode_dotl;
141 else
142 test = v9fs_test_inode_dotl;
98 143
99 i_ino = v9fs_qid2ino(qid); 144 i_ino = v9fs_qid2ino(qid);
100 inode = iget_locked(sb, i_ino); 145 inode = iget5_locked(sb, i_ino, test, v9fs_set_inode_dotl, st);
101 if (!inode) 146 if (!inode)
102 return ERR_PTR(-ENOMEM); 147 return ERR_PTR(-ENOMEM);
103 if (!(inode->i_state & I_NEW)) 148 if (!(inode->i_state & I_NEW))
@@ -107,13 +152,13 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
107 * FIXME!! we may need support for stale inodes 152 * FIXME!! we may need support for stale inodes
108 * later. 153 * later.
109 */ 154 */
155 inode->i_ino = i_ino;
110 retval = v9fs_init_inode(v9ses, inode, st->st_mode); 156 retval = v9fs_init_inode(v9ses, inode, st->st_mode);
111 if (retval) 157 if (retval)
112 goto error; 158 goto error;
113 159
114 v9fs_stat2inode_dotl(st, inode); 160 v9fs_stat2inode_dotl(st, inode);
115#ifdef CONFIG_9P_FSCACHE 161#ifdef CONFIG_9P_FSCACHE
116 v9fs_fscache_set_key(inode, &st->qid);
117 v9fs_cache_inode_get_cookie(inode); 162 v9fs_cache_inode_get_cookie(inode);
118#endif 163#endif
119 retval = v9fs_get_acl(inode, fid); 164 retval = v9fs_get_acl(inode, fid);
@@ -131,16 +176,16 @@ error:
131 176
132struct inode * 177struct inode *
133v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid, 178v9fs_inode_from_fid_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
134 struct super_block *sb) 179 struct super_block *sb, int new)
135{ 180{
136 struct p9_stat_dotl *st; 181 struct p9_stat_dotl *st;
137 struct inode *inode = NULL; 182 struct inode *inode = NULL;
138 183
139 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC); 184 st = p9_client_getattr_dotl(fid, P9_STATS_BASIC | P9_STATS_GEN);
140 if (IS_ERR(st)) 185 if (IS_ERR(st))
141 return ERR_CAST(st); 186 return ERR_CAST(st);
142 187
143 inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st); 188 inode = v9fs_qid_iget_dotl(sb, &st->qid, fid, st, new);
144 kfree(st); 189 kfree(st);
145 return inode; 190 return inode;
146} 191}
@@ -173,8 +218,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
173 struct posix_acl *pacl = NULL, *dacl = NULL; 218 struct posix_acl *pacl = NULL, *dacl = NULL;
174 219
175 v9ses = v9fs_inode2v9ses(dir); 220 v9ses = v9fs_inode2v9ses(dir);
176 if (nd && nd->flags & LOOKUP_OPEN) 221 if (nd)
177 flags = nd->intent.open.flags - 1; 222 flags = nd->intent.open.flags;
178 else { 223 else {
179 /* 224 /*
180 * create call without LOOKUP_OPEN is due 225 * create call without LOOKUP_OPEN is due
@@ -230,7 +275,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
230 fid = NULL; 275 fid = NULL;
231 goto error; 276 goto error;
232 } 277 }
233 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); 278 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
234 if (IS_ERR(inode)) { 279 if (IS_ERR(inode)) {
235 err = PTR_ERR(inode); 280 err = PTR_ERR(inode);
236 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); 281 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
@@ -350,7 +395,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
350 goto error; 395 goto error;
351 } 396 }
352 397
353 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); 398 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
354 if (IS_ERR(inode)) { 399 if (IS_ERR(inode)) {
355 err = PTR_ERR(inode); 400 err = PTR_ERR(inode);
356 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 401 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -547,7 +592,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
547 inode->i_blocks = stat->st_blocks; 592 inode->i_blocks = stat->st_blocks;
548 } 593 }
549 if (stat->st_result_mask & P9_STATS_GEN) 594 if (stat->st_result_mask & P9_STATS_GEN)
550 inode->i_generation = stat->st_gen; 595 inode->i_generation = stat->st_gen;
551 596
552 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION 597 /* Currently we don't support P9_STATS_BTIME and P9_STATS_DATA_VERSION
553 * because the inode structure does not have fields for them. 598 * because the inode structure does not have fields for them.
@@ -603,7 +648,7 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
603 } 648 }
604 649
605 /* instantiate inode and assign the unopened fid to dentry */ 650 /* instantiate inode and assign the unopened fid to dentry */
606 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); 651 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
607 if (IS_ERR(inode)) { 652 if (IS_ERR(inode)) {
608 err = PTR_ERR(inode); 653 err = PTR_ERR(inode);
609 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 654 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
@@ -756,7 +801,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
756 goto error; 801 goto error;
757 } 802 }
758 803
759 inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); 804 inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb);
760 if (IS_ERR(inode)) { 805 if (IS_ERR(inode)) {
761 err = PTR_ERR(inode); 806 err = PTR_ERR(inode);
762 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", 807 P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0e95f73a7023..c2b9c79eb64e 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -182,7 +182,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent
182 182
183void affs_free_prealloc(struct inode *inode); 183void affs_free_prealloc(struct inode *inode);
184extern void affs_truncate(struct inode *); 184extern void affs_truncate(struct inode *);
185int affs_file_fsync(struct file *, int); 185int affs_file_fsync(struct file *, loff_t, loff_t, int);
186 186
187/* dir.c */ 187/* dir.c */
188 188
diff --git a/fs/affs/file.c b/fs/affs/file.c
index acf321b70fcd..2f4c935cb327 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -923,14 +923,20 @@ affs_truncate(struct inode *inode)
923 affs_free_prealloc(inode); 923 affs_free_prealloc(inode);
924} 924}
925 925
926int affs_file_fsync(struct file *filp, int datasync) 926int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
927{ 927{
928 struct inode *inode = filp->f_mapping->host; 928 struct inode *inode = filp->f_mapping->host;
929 int ret, err; 929 int ret, err;
930 930
931 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
932 if (err)
933 return err;
934
935 mutex_lock(&inode->i_mutex);
931 ret = write_inode_now(inode, 0); 936 ret = write_inode_now(inode, 0);
932 err = sync_blockdev(inode->i_sb->s_bdev); 937 err = sync_blockdev(inode->i_sb->s_bdev);
933 if (!ret) 938 if (!ret)
934 ret = err; 939 ret = err;
940 mutex_unlock(&inode->i_mutex);
935 return ret; 941 return ret;
936} 942}
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
index 8bbefe009ed4..800f607ffaf5 100644
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@@ -49,7 +49,7 @@ enum AFSVL_Errors {
49 AFSVL_BADVOLOPER = 363542, /* Bad volume operation code */ 49 AFSVL_BADVOLOPER = 363542, /* Bad volume operation code */
50 AFSVL_BADRELLOCKTYPE = 363543, /* Bad release lock type */ 50 AFSVL_BADRELLOCKTYPE = 363543, /* Bad release lock type */
51 AFSVL_RERELEASE = 363544, /* Status report: last release was aborted */ 51 AFSVL_RERELEASE = 363544, /* Status report: last release was aborted */
52 AFSVL_BADSERVERFLAG = 363545, /* Invalid replication site server °ag */ 52 AFSVL_BADSERVERFLAG = 363545, /* Invalid replication site server flag */
53 AFSVL_PERM = 363546, /* No permission access */ 53 AFSVL_PERM = 363546, /* No permission access */
54 AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */ 54 AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */
55}; 55};
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5a9b6843bac1..d2b0888126d4 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -627,7 +627,7 @@ extern void afs_clear_permits(struct afs_vnode *);
627extern void afs_cache_permit(struct afs_vnode *, struct key *, long); 627extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
628extern void afs_zap_permits(struct rcu_head *); 628extern void afs_zap_permits(struct rcu_head *);
629extern struct key *afs_request_key(struct afs_cell *); 629extern struct key *afs_request_key(struct afs_cell *);
630extern int afs_permission(struct inode *, int, unsigned int); 630extern int afs_permission(struct inode *, int);
631 631
632/* 632/*
633 * server.c 633 * server.c
@@ -750,7 +750,7 @@ extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
750extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, 750extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
751 unsigned long, loff_t); 751 unsigned long, loff_t);
752extern int afs_writeback_all(struct afs_vnode *); 752extern int afs_writeback_all(struct afs_vnode *);
753extern int afs_fsync(struct file *, int); 753extern int afs_fsync(struct file *, loff_t, loff_t, int);
754 754
755 755
756/*****************************************************************************/ 756/*****************************************************************************/
diff --git a/fs/afs/security.c b/fs/afs/security.c
index f44b9d355377..8d010422dc89 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -285,14 +285,14 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
285 * - AFS ACLs are attached to directories only, and a file is controlled by its 285 * - AFS ACLs are attached to directories only, and a file is controlled by its
286 * parent directory's ACL 286 * parent directory's ACL
287 */ 287 */
288int afs_permission(struct inode *inode, int mask, unsigned int flags) 288int afs_permission(struct inode *inode, int mask)
289{ 289{
290 struct afs_vnode *vnode = AFS_FS_I(inode); 290 struct afs_vnode *vnode = AFS_FS_I(inode);
291 afs_access_t uninitialized_var(access); 291 afs_access_t uninitialized_var(access);
292 struct key *key; 292 struct key *key;
293 int ret; 293 int ret;
294 294
295 if (flags & IPERM_FLAG_RCU) 295 if (mask & MAY_NOT_BLOCK)
296 return -ECHILD; 296 return -ECHILD;
297 297
298 _enter("{{%x:%u},%lx},%x,", 298 _enter("{{%x:%u},%lx},%x,",
@@ -350,7 +350,7 @@ int afs_permission(struct inode *inode, int mask, unsigned int flags)
350 } 350 }
351 351
352 key_put(key); 352 key_put(key);
353 ret = generic_permission(inode, mask, flags, NULL); 353 ret = generic_permission(inode, mask);
354 _leave(" = %d", ret); 354 _leave(" = %d", ret);
355 return ret; 355 return ret;
356 356
diff --git a/fs/afs/write.c b/fs/afs/write.c
index b806285ff853..9aa52d93c73c 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -681,9 +681,10 @@ int afs_writeback_all(struct afs_vnode *vnode)
681 * - the return status from this call provides a reliable indication of 681 * - the return status from this call provides a reliable indication of
682 * whether any write errors occurred for this process. 682 * whether any write errors occurred for this process.
683 */ 683 */
684int afs_fsync(struct file *file, int datasync) 684int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
685{ 685{
686 struct dentry *dentry = file->f_path.dentry; 686 struct dentry *dentry = file->f_path.dentry;
687 struct inode *inode = file->f_mapping->host;
687 struct afs_writeback *wb, *xwb; 688 struct afs_writeback *wb, *xwb;
688 struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); 689 struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
689 int ret; 690 int ret;
@@ -692,12 +693,19 @@ int afs_fsync(struct file *file, int datasync)
692 vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name, 693 vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
693 datasync); 694 datasync);
694 695
696 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
697 if (ret)
698 return ret;
699 mutex_lock(&inode->i_mutex);
700
695 /* use a writeback record as a marker in the queue - when this reaches 701 /* use a writeback record as a marker in the queue - when this reaches
696 * the front of the queue, all the outstanding writes are either 702 * the front of the queue, all the outstanding writes are either
697 * completed or rejected */ 703 * completed or rejected */
698 wb = kzalloc(sizeof(*wb), GFP_KERNEL); 704 wb = kzalloc(sizeof(*wb), GFP_KERNEL);
699 if (!wb) 705 if (!wb) {
700 return -ENOMEM; 706 ret = -ENOMEM;
707 goto out;
708 }
701 wb->vnode = vnode; 709 wb->vnode = vnode;
702 wb->first = 0; 710 wb->first = 0;
703 wb->last = -1; 711 wb->last = -1;
@@ -720,7 +728,7 @@ int afs_fsync(struct file *file, int datasync)
720 if (ret < 0) { 728 if (ret < 0) {
721 afs_put_writeback(wb); 729 afs_put_writeback(wb);
722 _leave(" = %d [wb]", ret); 730 _leave(" = %d [wb]", ret);
723 return ret; 731 goto out;
724 } 732 }
725 733
726 /* wait for the preceding writes to actually complete */ 734 /* wait for the preceding writes to actually complete */
@@ -729,6 +737,8 @@ int afs_fsync(struct file *file, int datasync)
729 vnode->writebacks.next == &wb->link); 737 vnode->writebacks.next == &wb->link);
730 afs_put_writeback(wb); 738 afs_put_writeback(wb);
731 _leave(" = %d", ret); 739 _leave(" = %d", ret);
740out:
741 mutex_unlock(&inode->i_mutex);
732 return ret; 742 return ret;
733} 743}
734 744
diff --git a/fs/attr.c b/fs/attr.c
index caf2aa521e2b..538e27959d3f 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -232,17 +232,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
232 if (error) 232 if (error)
233 return error; 233 return error;
234 234
235 if (ia_valid & ATTR_SIZE)
236 down_write(&dentry->d_inode->i_alloc_sem);
237
238 if (inode->i_op->setattr) 235 if (inode->i_op->setattr)
239 error = inode->i_op->setattr(dentry, attr); 236 error = inode->i_op->setattr(dentry, attr);
240 else 237 else
241 error = simple_setattr(dentry, attr); 238 error = simple_setattr(dentry, attr);
242 239
243 if (ia_valid & ATTR_SIZE)
244 up_write(&dentry->d_inode->i_alloc_sem);
245
246 if (!error) 240 if (!error)
247 fsnotify_change(dentry, ia_valid); 241 fsnotify_change(dentry, ia_valid);
248 242
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index bfcb18feb1df..9205cf25f1c6 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -87,7 +87,8 @@ static int bad_file_release(struct inode *inode, struct file *filp)
87 return -EIO; 87 return -EIO;
88} 88}
89 89
90static int bad_file_fsync(struct file *file, int datasync) 90static int bad_file_fsync(struct file *file, loff_t start, loff_t end,
91 int datasync)
91{ 92{
92 return -EIO; 93 return -EIO;
93} 94}
@@ -229,7 +230,7 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
229 return -EIO; 230 return -EIO;
230} 231}
231 232
232static int bad_inode_permission(struct inode *inode, int mask, unsigned int flags) 233static int bad_inode_permission(struct inode *inode, int mask)
233{ 234{
234 return -EIO; 235 return -EIO;
235} 236}
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 303983fabfd6..dd0fdfc56d38 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -668,8 +668,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
668 * mm->dumpable = 0 regardless of the interpreter's 668 * mm->dumpable = 0 regardless of the interpreter's
669 * permissions. 669 * permissions.
670 */ 670 */
671 if (file_permission(interpreter, MAY_READ) < 0) 671 would_dump(bprm, interpreter);
672 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
673 672
674 retval = kernel_read(interpreter, 0, bprm->buf, 673 retval = kernel_read(interpreter, 0, bprm->buf,
675 BINPRM_BUF_SIZE); 674 BINPRM_BUF_SIZE);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 2bc5dc644b4c..30745f459faf 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -245,8 +245,7 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
245 * mm->dumpable = 0 regardless of the interpreter's 245 * mm->dumpable = 0 regardless of the interpreter's
246 * permissions. 246 * permissions.
247 */ 247 */
248 if (file_permission(interpreter, MAY_READ) < 0) 248 would_dump(bprm, interpreter);
249 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
250 249
251 retval = kernel_read(interpreter, 0, bprm->buf, 250 retval = kernel_read(interpreter, 0, bprm->buf,
252 BINPRM_BUF_SIZE); 251 BINPRM_BUF_SIZE);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 1befe2ec8186..ba1a1ae4a18a 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -149,8 +149,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
149 149
150 /* if the binary is not readable than enforce mm->dumpable=0 150 /* if the binary is not readable than enforce mm->dumpable=0
151 regardless of the interpreter's permissions */ 151 regardless of the interpreter's permissions */
152 if (file_permission(bprm->file, MAY_READ)) 152 would_dump(bprm, bprm->file);
153 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
154 153
155 allow_write_access(bprm->file); 154 allow_write_access(bprm->file);
156 bprm->file = NULL; 155 bprm->file = NULL;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 610e8e0b04b8..9fb0b15331d3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -355,25 +355,30 @@ static loff_t block_llseek(struct file *file, loff_t offset, int origin)
355 mutex_lock(&bd_inode->i_mutex); 355 mutex_lock(&bd_inode->i_mutex);
356 size = i_size_read(bd_inode); 356 size = i_size_read(bd_inode);
357 357
358 retval = -EINVAL;
358 switch (origin) { 359 switch (origin) {
359 case 2: 360 case SEEK_END:
360 offset += size; 361 offset += size;
361 break; 362 break;
362 case 1: 363 case SEEK_CUR:
363 offset += file->f_pos; 364 offset += file->f_pos;
365 case SEEK_SET:
366 break;
367 default:
368 goto out;
364 } 369 }
365 retval = -EINVAL;
366 if (offset >= 0 && offset <= size) { 370 if (offset >= 0 && offset <= size) {
367 if (offset != file->f_pos) { 371 if (offset != file->f_pos) {
368 file->f_pos = offset; 372 file->f_pos = offset;
369 } 373 }
370 retval = offset; 374 retval = offset;
371 } 375 }
376out:
372 mutex_unlock(&bd_inode->i_mutex); 377 mutex_unlock(&bd_inode->i_mutex);
373 return retval; 378 return retval;
374} 379}
375 380
376int blkdev_fsync(struct file *filp, int datasync) 381int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
377{ 382{
378 struct inode *bd_inode = filp->f_mapping->host; 383 struct inode *bd_inode = filp->f_mapping->host;
379 struct block_device *bdev = I_BDEV(bd_inode); 384 struct block_device *bdev = I_BDEV(bd_inode);
@@ -384,14 +389,10 @@ int blkdev_fsync(struct file *filp, int datasync)
384 * i_mutex and doing so causes performance issues with concurrent 389 * i_mutex and doing so causes performance issues with concurrent
385 * O_SYNC writers to a block device. 390 * O_SYNC writers to a block device.
386 */ 391 */
387 mutex_unlock(&bd_inode->i_mutex);
388
389 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL); 392 error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL);
390 if (error == -EOPNOTSUPP) 393 if (error == -EOPNOTSUPP)
391 error = 0; 394 error = 0;
392 395
393 mutex_lock(&bd_inode->i_mutex);
394
395 return error; 396 return error;
396} 397}
397EXPORT_SYMBOL(blkdev_fsync); 398EXPORT_SYMBOL(blkdev_fsync);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index f66fc9959733..9f62ab2a7282 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -195,14 +195,13 @@ out:
195 return ret; 195 return ret;
196} 196}
197 197
198int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags) 198int btrfs_check_acl(struct inode *inode, int mask)
199{ 199{
200 int error = -EAGAIN; 200 int error = -EAGAIN;
201 201
202 if (flags & IPERM_FLAG_RCU) { 202 if (mask & MAY_NOT_BLOCK) {
203 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 203 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
204 error = -ECHILD; 204 error = -ECHILD;
205
206 } else { 205 } else {
207 struct posix_acl *acl; 206 struct posix_acl *acl;
208 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); 207 acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3b859a3e6a0e..82be74efbb26 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1219,7 +1219,7 @@ struct btrfs_root {
1219 * right now this just gets used so that a root has its own devid 1219 * right now this just gets used so that a root has its own devid
1220 * for stat. It may be used for more later 1220 * for stat. It may be used for more later
1221 */ 1221 */
1222 struct super_block anon_super; 1222 dev_t anon_dev;
1223}; 1223};
1224 1224
1225struct btrfs_ioctl_defrag_range_args { 1225struct btrfs_ioctl_defrag_range_args {
@@ -2510,6 +2510,9 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
2510int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, 2510int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
2511 struct list_head *list, int search_commit); 2511 struct list_head *list, int search_commit);
2512/* inode.c */ 2512/* inode.c */
2513struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
2514 size_t pg_offset, u64 start, u64 len,
2515 int create);
2513 2516
2514/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */ 2517/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
2515#if defined(ClearPageFsMisc) && !defined(ClearPageChecked) 2518#if defined(ClearPageFsMisc) && !defined(ClearPageChecked)
@@ -2602,7 +2605,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
2602int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, 2605int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
2603 struct inode *inode); 2606 struct inode *inode);
2604int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); 2607int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
2605int btrfs_sync_file(struct file *file, int datasync); 2608int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
2606int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, 2609int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
2607 int skip_pinned); 2610 int skip_pinned);
2608extern const struct file_operations btrfs_file_operations; 2611extern const struct file_operations btrfs_file_operations;
@@ -2642,7 +2645,7 @@ do { \
2642 2645
2643/* acl.c */ 2646/* acl.c */
2644#ifdef CONFIG_BTRFS_FS_POSIX_ACL 2647#ifdef CONFIG_BTRFS_FS_POSIX_ACL
2645int btrfs_check_acl(struct inode *inode, int mask, unsigned int flags); 2648int btrfs_check_acl(struct inode *inode, int mask);
2646#else 2649#else
2647#define btrfs_check_acl NULL 2650#define btrfs_check_acl NULL
2648#endif 2651#endif
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1ac8db5dc0a3..b231ae13b269 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1077,12 +1077,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1077 init_completion(&root->kobj_unregister); 1077 init_completion(&root->kobj_unregister);
1078 root->defrag_running = 0; 1078 root->defrag_running = 0;
1079 root->root_key.objectid = objectid; 1079 root->root_key.objectid = objectid;
1080 root->anon_super.s_root = NULL; 1080 root->anon_dev = 0;
1081 root->anon_super.s_dev = 0;
1082 INIT_LIST_HEAD(&root->anon_super.s_list);
1083 INIT_LIST_HEAD(&root->anon_super.s_instances);
1084 init_rwsem(&root->anon_super.s_umount);
1085
1086 return 0; 1081 return 0;
1087} 1082}
1088 1083
@@ -1311,7 +1306,7 @@ again:
1311 spin_lock_init(&root->cache_lock); 1306 spin_lock_init(&root->cache_lock);
1312 init_waitqueue_head(&root->cache_wait); 1307 init_waitqueue_head(&root->cache_wait);
1313 1308
1314 ret = set_anon_super(&root->anon_super, NULL); 1309 ret = get_anon_bdev(&root->anon_dev);
1315 if (ret) 1310 if (ret)
1316 goto fail; 1311 goto fail;
1317 1312
@@ -2393,10 +2388,8 @@ static void free_fs_root(struct btrfs_root *root)
2393{ 2388{
2394 iput(root->cache_inode); 2389 iput(root->cache_inode);
2395 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree)); 2390 WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
2396 if (root->anon_super.s_dev) { 2391 if (root->anon_dev)
2397 down_write(&root->anon_super.s_umount); 2392 free_anon_bdev(root->anon_dev);
2398 kill_anon_super(&root->anon_super);
2399 }
2400 free_extent_buffer(root->node); 2393 free_extent_buffer(root->node);
2401 free_extent_buffer(root->commit_root); 2394 free_extent_buffer(root->commit_root);
2402 kfree(root->free_ino_ctl); 2395 kfree(root->free_ino_ctl);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fa4ef18b66b1..59cbdb120ad0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1452,7 +1452,7 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
1452 * important optimization for directories because holding the mutex prevents 1452 * important optimization for directories because holding the mutex prevents
1453 * new operations on the dir while we write to disk. 1453 * new operations on the dir while we write to disk.
1454 */ 1454 */
1455int btrfs_sync_file(struct file *file, int datasync) 1455int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1456{ 1456{
1457 struct dentry *dentry = file->f_path.dentry; 1457 struct dentry *dentry = file->f_path.dentry;
1458 struct inode *inode = dentry->d_inode; 1458 struct inode *inode = dentry->d_inode;
@@ -1462,9 +1462,13 @@ int btrfs_sync_file(struct file *file, int datasync)
1462 1462
1463 trace_btrfs_sync_file(file, datasync); 1463 trace_btrfs_sync_file(file, datasync);
1464 1464
1465 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
1466 if (ret)
1467 return ret;
1468 mutex_lock(&inode->i_mutex);
1469
1465 /* we wait first, since the writeback may change the inode */ 1470 /* we wait first, since the writeback may change the inode */
1466 root->log_batch++; 1471 root->log_batch++;
1467 /* the VFS called filemap_fdatawrite for us */
1468 btrfs_wait_ordered_range(inode, 0, (u64)-1); 1472 btrfs_wait_ordered_range(inode, 0, (u64)-1);
1469 root->log_batch++; 1473 root->log_batch++;
1470 1474
@@ -1472,8 +1476,10 @@ int btrfs_sync_file(struct file *file, int datasync)
1472 * check the transaction that last modified this inode 1476 * check the transaction that last modified this inode
1473 * and see if its already been committed 1477 * and see if its already been committed
1474 */ 1478 */
1475 if (!BTRFS_I(inode)->last_trans) 1479 if (!BTRFS_I(inode)->last_trans) {
1480 mutex_unlock(&inode->i_mutex);
1476 goto out; 1481 goto out;
1482 }
1477 1483
1478 /* 1484 /*
1479 * if the last transaction that changed this file was before 1485 * if the last transaction that changed this file was before
@@ -1484,6 +1490,7 @@ int btrfs_sync_file(struct file *file, int datasync)
1484 if (BTRFS_I(inode)->last_trans <= 1490 if (BTRFS_I(inode)->last_trans <=
1485 root->fs_info->last_trans_committed) { 1491 root->fs_info->last_trans_committed) {
1486 BTRFS_I(inode)->last_trans = 0; 1492 BTRFS_I(inode)->last_trans = 0;
1493 mutex_unlock(&inode->i_mutex);
1487 goto out; 1494 goto out;
1488 } 1495 }
1489 1496
@@ -1496,12 +1503,15 @@ int btrfs_sync_file(struct file *file, int datasync)
1496 trans = btrfs_start_transaction(root, 0); 1503 trans = btrfs_start_transaction(root, 0);
1497 if (IS_ERR(trans)) { 1504 if (IS_ERR(trans)) {
1498 ret = PTR_ERR(trans); 1505 ret = PTR_ERR(trans);
1506 mutex_unlock(&inode->i_mutex);
1499 goto out; 1507 goto out;
1500 } 1508 }
1501 1509
1502 ret = btrfs_log_dentry_safe(trans, root, dentry); 1510 ret = btrfs_log_dentry_safe(trans, root, dentry);
1503 if (ret < 0) 1511 if (ret < 0) {
1512 mutex_unlock(&inode->i_mutex);
1504 goto out; 1513 goto out;
1514 }
1505 1515
1506 /* we've logged all the items and now have a consistent 1516 /* we've logged all the items and now have a consistent
1507 * version of the file in the log. It is possible that 1517 * version of the file in the log. It is possible that
@@ -1513,7 +1523,7 @@ int btrfs_sync_file(struct file *file, int datasync)
1513 * file again, but that will end up using the synchronization 1523 * file again, but that will end up using the synchronization
1514 * inside btrfs_sync_log to keep things safe. 1524 * inside btrfs_sync_log to keep things safe.
1515 */ 1525 */
1516 mutex_unlock(&dentry->d_inode->i_mutex); 1526 mutex_unlock(&inode->i_mutex);
1517 1527
1518 if (ret != BTRFS_NO_LOG_SYNC) { 1528 if (ret != BTRFS_NO_LOG_SYNC) {
1519 if (ret > 0) { 1529 if (ret > 0) {
@@ -1528,7 +1538,6 @@ int btrfs_sync_file(struct file *file, int datasync)
1528 } else { 1538 } else {
1529 ret = btrfs_end_transaction(trans, root); 1539 ret = btrfs_end_transaction(trans, root);
1530 } 1540 }
1531 mutex_lock(&dentry->d_inode->i_mutex);
1532out: 1541out:
1533 return ret > 0 ? -EIO : ret; 1542 return ret > 0 ? -EIO : ret;
1534} 1543}
@@ -1664,8 +1673,154 @@ out:
1664 return ret; 1673 return ret;
1665} 1674}
1666 1675
1676static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
1677{
1678 struct btrfs_root *root = BTRFS_I(inode)->root;
1679 struct extent_map *em;
1680 struct extent_state *cached_state = NULL;
1681 u64 lockstart = *offset;
1682 u64 lockend = i_size_read(inode);
1683 u64 start = *offset;
1684 u64 orig_start = *offset;
1685 u64 len = i_size_read(inode);
1686 u64 last_end = 0;
1687 int ret = 0;
1688
1689 lockend = max_t(u64, root->sectorsize, lockend);
1690 if (lockend <= lockstart)
1691 lockend = lockstart + root->sectorsize;
1692
1693 len = lockend - lockstart + 1;
1694
1695 len = max_t(u64, len, root->sectorsize);
1696 if (inode->i_size == 0)
1697 return -ENXIO;
1698
1699 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
1700 &cached_state, GFP_NOFS);
1701
1702 /*
1703 * Delalloc is such a pain. If we have a hole and we have pending
1704 * delalloc for a portion of the hole we will get back a hole that
1705 * exists for the entire range since it hasn't been actually written
1706 * yet. So to take care of this case we need to look for an extent just
1707 * before the position we want in case there is outstanding delalloc
1708 * going on here.
1709 */
1710 if (origin == SEEK_HOLE && start != 0) {
1711 if (start <= root->sectorsize)
1712 em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
1713 root->sectorsize, 0);
1714 else
1715 em = btrfs_get_extent_fiemap(inode, NULL, 0,
1716 start - root->sectorsize,
1717 root->sectorsize, 0);
1718 if (IS_ERR(em)) {
1719 ret = -ENXIO;
1720 goto out;
1721 }
1722 last_end = em->start + em->len;
1723 if (em->block_start == EXTENT_MAP_DELALLOC)
1724 last_end = min_t(u64, last_end, inode->i_size);
1725 free_extent_map(em);
1726 }
1727
1728 while (1) {
1729 em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
1730 if (IS_ERR(em)) {
1731 ret = -ENXIO;
1732 break;
1733 }
1734
1735 if (em->block_start == EXTENT_MAP_HOLE) {
1736 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
1737 if (last_end <= orig_start) {
1738 free_extent_map(em);
1739 ret = -ENXIO;
1740 break;
1741 }
1742 }
1743
1744 if (origin == SEEK_HOLE) {
1745 *offset = start;
1746 free_extent_map(em);
1747 break;
1748 }
1749 } else {
1750 if (origin == SEEK_DATA) {
1751 if (em->block_start == EXTENT_MAP_DELALLOC) {
1752 if (start >= inode->i_size) {
1753 free_extent_map(em);
1754 ret = -ENXIO;
1755 break;
1756 }
1757 }
1758
1759 *offset = start;
1760 free_extent_map(em);
1761 break;
1762 }
1763 }
1764
1765 start = em->start + em->len;
1766 last_end = em->start + em->len;
1767
1768 if (em->block_start == EXTENT_MAP_DELALLOC)
1769 last_end = min_t(u64, last_end, inode->i_size);
1770
1771 if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
1772 free_extent_map(em);
1773 ret = -ENXIO;
1774 break;
1775 }
1776 free_extent_map(em);
1777 cond_resched();
1778 }
1779 if (!ret)
1780 *offset = min(*offset, inode->i_size);
1781out:
1782 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
1783 &cached_state, GFP_NOFS);
1784 return ret;
1785}
1786
1787static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
1788{
1789 struct inode *inode = file->f_mapping->host;
1790 int ret;
1791
1792 mutex_lock(&inode->i_mutex);
1793 switch (origin) {
1794 case SEEK_END:
1795 case SEEK_CUR:
1796 offset = generic_file_llseek_unlocked(file, offset, origin);
1797 goto out;
1798 case SEEK_DATA:
1799 case SEEK_HOLE:
1800 ret = find_desired_extent(inode, &offset, origin);
1801 if (ret) {
1802 mutex_unlock(&inode->i_mutex);
1803 return ret;
1804 }
1805 }
1806
1807 if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET))
1808 return -EINVAL;
1809 if (offset > inode->i_sb->s_maxbytes)
1810 return -EINVAL;
1811
1812 /* Special lock needed here? */
1813 if (offset != file->f_pos) {
1814 file->f_pos = offset;
1815 file->f_version = 0;
1816 }
1817out:
1818 mutex_unlock(&inode->i_mutex);
1819 return offset;
1820}
1821
1667const struct file_operations btrfs_file_operations = { 1822const struct file_operations btrfs_file_operations = {
1668 .llseek = generic_file_llseek, 1823 .llseek = btrfs_file_llseek,
1669 .read = do_sync_read, 1824 .read = do_sync_read,
1670 .write = do_sync_write, 1825 .write = do_sync_write,
1671 .aio_read = generic_file_aio_read, 1826 .aio_read = generic_file_aio_read,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3601f0aebddf..2548a04a0230 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4079,13 +4079,7 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
4079static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, 4079static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
4080 struct nameidata *nd) 4080 struct nameidata *nd)
4081{ 4081{
4082 struct inode *inode; 4082 return d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
4083
4084 inode = btrfs_lookup_dentry(dir, dentry);
4085 if (IS_ERR(inode))
4086 return ERR_CAST(inode);
4087
4088 return d_splice_alias(inode, dentry);
4089} 4083}
4090 4084
4091unsigned char btrfs_filetype_table[] = { 4085unsigned char btrfs_filetype_table[] = {
@@ -4772,11 +4766,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
4772 if (err) { 4766 if (err) {
4773 drop_inode = 1; 4767 drop_inode = 1;
4774 } else { 4768 } else {
4775 struct dentry *parent = dget_parent(dentry); 4769 struct dentry *parent = dentry->d_parent;
4776 err = btrfs_update_inode(trans, root, inode); 4770 err = btrfs_update_inode(trans, root, inode);
4777 BUG_ON(err); 4771 BUG_ON(err);
4778 btrfs_log_new_name(trans, inode, NULL, parent); 4772 btrfs_log_new_name(trans, inode, NULL, parent);
4779 dput(parent);
4780 } 4773 }
4781 4774
4782 nr = trans->blocks_used; 4775 nr = trans->blocks_used;
@@ -6900,7 +6893,7 @@ static int btrfs_getattr(struct vfsmount *mnt,
6900{ 6893{
6901 struct inode *inode = dentry->d_inode; 6894 struct inode *inode = dentry->d_inode;
6902 generic_fillattr(inode, stat); 6895 generic_fillattr(inode, stat);
6903 stat->dev = BTRFS_I(inode)->root->anon_super.s_dev; 6896 stat->dev = BTRFS_I(inode)->root->anon_dev;
6904 stat->blksize = PAGE_CACHE_SIZE; 6897 stat->blksize = PAGE_CACHE_SIZE;
6905 stat->blocks = (inode_get_bytes(inode) + 6898 stat->blocks = (inode_get_bytes(inode) +
6906 BTRFS_I(inode)->delalloc_bytes) >> 9; 6899 BTRFS_I(inode)->delalloc_bytes) >> 9;
@@ -7068,9 +7061,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
7068 BUG_ON(ret); 7061 BUG_ON(ret);
7069 7062
7070 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { 7063 if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
7071 struct dentry *parent = dget_parent(new_dentry); 7064 struct dentry *parent = new_dentry->d_parent;
7072 btrfs_log_new_name(trans, old_inode, old_dir, parent); 7065 btrfs_log_new_name(trans, old_inode, old_dir, parent);
7073 dput(parent);
7074 btrfs_end_log_trans(root); 7066 btrfs_end_log_trans(root);
7075 } 7067 }
7076out_fail: 7068out_fail:
@@ -7331,7 +7323,7 @@ static int btrfs_set_page_dirty(struct page *page)
7331 return __set_page_dirty_nobuffers(page); 7323 return __set_page_dirty_nobuffers(page);
7332} 7324}
7333 7325
7334static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) 7326static int btrfs_permission(struct inode *inode, int mask)
7335{ 7327{
7336 struct btrfs_root *root = BTRFS_I(inode)->root; 7328 struct btrfs_root *root = BTRFS_I(inode)->root;
7337 7329
@@ -7339,7 +7331,7 @@ static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
7339 return -EROFS; 7331 return -EROFS;
7340 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) 7332 if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
7341 return -EACCES; 7333 return -EACCES;
7342 return generic_permission(inode, mask, flags, btrfs_check_acl); 7334 return generic_permission(inode, mask);
7343} 7335}
7344 7336
7345static const struct inode_operations btrfs_dir_inode_operations = { 7337static const struct inode_operations btrfs_dir_inode_operations = {
@@ -7359,10 +7351,12 @@ static const struct inode_operations btrfs_dir_inode_operations = {
7359 .listxattr = btrfs_listxattr, 7351 .listxattr = btrfs_listxattr,
7360 .removexattr = btrfs_removexattr, 7352 .removexattr = btrfs_removexattr,
7361 .permission = btrfs_permission, 7353 .permission = btrfs_permission,
7354 .check_acl = btrfs_check_acl,
7362}; 7355};
7363static const struct inode_operations btrfs_dir_ro_inode_operations = { 7356static const struct inode_operations btrfs_dir_ro_inode_operations = {
7364 .lookup = btrfs_lookup, 7357 .lookup = btrfs_lookup,
7365 .permission = btrfs_permission, 7358 .permission = btrfs_permission,
7359 .check_acl = btrfs_check_acl,
7366}; 7360};
7367 7361
7368static const struct file_operations btrfs_dir_file_operations = { 7362static const struct file_operations btrfs_dir_file_operations = {
@@ -7431,6 +7425,7 @@ static const struct inode_operations btrfs_file_inode_operations = {
7431 .removexattr = btrfs_removexattr, 7425 .removexattr = btrfs_removexattr,
7432 .permission = btrfs_permission, 7426 .permission = btrfs_permission,
7433 .fiemap = btrfs_fiemap, 7427 .fiemap = btrfs_fiemap,
7428 .check_acl = btrfs_check_acl,
7434}; 7429};
7435static const struct inode_operations btrfs_special_inode_operations = { 7430static const struct inode_operations btrfs_special_inode_operations = {
7436 .getattr = btrfs_getattr, 7431 .getattr = btrfs_getattr,
@@ -7440,6 +7435,7 @@ static const struct inode_operations btrfs_special_inode_operations = {
7440 .getxattr = btrfs_getxattr, 7435 .getxattr = btrfs_getxattr,
7441 .listxattr = btrfs_listxattr, 7436 .listxattr = btrfs_listxattr,
7442 .removexattr = btrfs_removexattr, 7437 .removexattr = btrfs_removexattr,
7438 .check_acl = btrfs_check_acl,
7443}; 7439};
7444static const struct inode_operations btrfs_symlink_inode_operations = { 7440static const struct inode_operations btrfs_symlink_inode_operations = {
7445 .readlink = generic_readlink, 7441 .readlink = generic_readlink,
@@ -7451,6 +7447,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
7451 .getxattr = btrfs_getxattr, 7447 .getxattr = btrfs_getxattr,
7452 .listxattr = btrfs_listxattr, 7448 .listxattr = btrfs_listxattr,
7453 .removexattr = btrfs_removexattr, 7449 .removexattr = btrfs_removexattr,
7450 .check_acl = btrfs_check_acl,
7454}; 7451};
7455 7452
7456const struct dentry_operations btrfs_dentry_operations = { 7453const struct dentry_operations btrfs_dentry_operations = {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a3c4751e07db..622543309eb2 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -323,7 +323,7 @@ static noinline int create_subvol(struct btrfs_root *root,
323 struct btrfs_inode_item *inode_item; 323 struct btrfs_inode_item *inode_item;
324 struct extent_buffer *leaf; 324 struct extent_buffer *leaf;
325 struct btrfs_root *new_root; 325 struct btrfs_root *new_root;
326 struct dentry *parent = dget_parent(dentry); 326 struct dentry *parent = dentry->d_parent;
327 struct inode *dir; 327 struct inode *dir;
328 int ret; 328 int ret;
329 int err; 329 int err;
@@ -332,10 +332,8 @@ static noinline int create_subvol(struct btrfs_root *root,
332 u64 index = 0; 332 u64 index = 0;
333 333
334 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); 334 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid);
335 if (ret) { 335 if (ret)
336 dput(parent);
337 return ret; 336 return ret;
338 }
339 337
340 dir = parent->d_inode; 338 dir = parent->d_inode;
341 339
@@ -346,10 +344,8 @@ static noinline int create_subvol(struct btrfs_root *root,
346 * 2 - dir items 344 * 2 - dir items
347 */ 345 */
348 trans = btrfs_start_transaction(root, 6); 346 trans = btrfs_start_transaction(root, 6);
349 if (IS_ERR(trans)) { 347 if (IS_ERR(trans))
350 dput(parent);
351 return PTR_ERR(trans); 348 return PTR_ERR(trans);
352 }
353 349
354 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 350 leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
355 0, objectid, NULL, 0, 0, 0); 351 0, objectid, NULL, 0, 0, 0);
@@ -439,7 +435,6 @@ static noinline int create_subvol(struct btrfs_root *root,
439 435
440 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 436 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
441fail: 437fail:
442 dput(parent);
443 if (async_transid) { 438 if (async_transid) {
444 *async_transid = trans->transid; 439 *async_transid = trans->transid;
445 err = btrfs_commit_transaction_async(trans, root, 1); 440 err = btrfs_commit_transaction_async(trans, root, 1);
@@ -456,7 +451,6 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
456 bool readonly) 451 bool readonly)
457{ 452{
458 struct inode *inode; 453 struct inode *inode;
459 struct dentry *parent;
460 struct btrfs_pending_snapshot *pending_snapshot; 454 struct btrfs_pending_snapshot *pending_snapshot;
461 struct btrfs_trans_handle *trans; 455 struct btrfs_trans_handle *trans;
462 int ret; 456 int ret;
@@ -504,9 +498,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
504 if (ret) 498 if (ret)
505 goto fail; 499 goto fail;
506 500
507 parent = dget_parent(dentry); 501 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
508 inode = btrfs_lookup_dentry(parent->d_inode, dentry);
509 dput(parent);
510 if (IS_ERR(inode)) { 502 if (IS_ERR(inode)) {
511 ret = PTR_ERR(inode); 503 ret = PTR_ERR(inode);
512 goto fail; 504 goto fail;
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index a2603e7c0bb5..622f4696e484 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -129,8 +129,6 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
129 !root->d_inode->i_op->mkdir || 129 !root->d_inode->i_op->mkdir ||
130 !root->d_inode->i_op->setxattr || 130 !root->d_inode->i_op->setxattr ||
131 !root->d_inode->i_op->getxattr || 131 !root->d_inode->i_op->getxattr ||
132 !root->d_sb ||
133 !root->d_sb->s_op ||
134 !root->d_sb->s_op->statfs || 132 !root->d_sb->s_op->statfs ||
135 !root->d_sb->s_op->sync_fs) 133 !root->d_sb->s_op->sync_fs)
136 goto error_unsupported; 134 goto error_unsupported;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index f605753c8fe9..8d74ad7ba556 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1811,7 +1811,7 @@ out:
1811 spin_unlock(&ci->i_unsafe_lock); 1811 spin_unlock(&ci->i_unsafe_lock);
1812} 1812}
1813 1813
1814int ceph_fsync(struct file *file, int datasync) 1814int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1815{ 1815{
1816 struct inode *inode = file->f_mapping->host; 1816 struct inode *inode = file->f_mapping->host;
1817 struct ceph_inode_info *ci = ceph_inode(inode); 1817 struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1822,9 +1822,10 @@ int ceph_fsync(struct file *file, int datasync)
1822 dout("fsync %p%s\n", inode, datasync ? " datasync" : ""); 1822 dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
1823 sync_write_wait(inode); 1823 sync_write_wait(inode);
1824 1824
1825 ret = filemap_write_and_wait(inode->i_mapping); 1825 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
1826 if (ret < 0) 1826 if (ret < 0)
1827 return ret; 1827 return ret;
1828 mutex_lock(&inode->i_mutex);
1828 1829
1829 dirty = try_flush_caps(inode, NULL, &flush_tid); 1830 dirty = try_flush_caps(inode, NULL, &flush_tid);
1830 dout("fsync dirty caps are %s\n", ceph_cap_string(dirty)); 1831 dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
@@ -1841,6 +1842,7 @@ int ceph_fsync(struct file *file, int datasync)
1841 } 1842 }
1842 1843
1843 dout("fsync %p%s done\n", inode, datasync ? " datasync" : ""); 1844 dout("fsync %p%s done\n", inode, datasync ? " datasync" : "");
1845 mutex_unlock(&inode->i_mutex);
1844 return ret; 1846 return ret;
1845} 1847}
1846 1848
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ef8f08c343e8..1065ac779840 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -252,7 +252,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
252 off = 1; 252 off = 1;
253 } 253 }
254 if (filp->f_pos == 1) { 254 if (filp->f_pos == 1) {
255 ino_t ino = filp->f_dentry->d_parent->d_inode->i_ino; 255 ino_t ino = parent_ino(filp->f_dentry);
256 dout("readdir off 1 -> '..'\n"); 256 dout("readdir off 1 -> '..'\n");
257 if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1), 257 if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1),
258 ceph_translate_ino(inode->i_sb, ino), 258 ceph_translate_ino(inode->i_sb, ino),
@@ -446,14 +446,19 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
446 loff_t retval; 446 loff_t retval;
447 447
448 mutex_lock(&inode->i_mutex); 448 mutex_lock(&inode->i_mutex);
449 retval = -EINVAL;
449 switch (origin) { 450 switch (origin) {
450 case SEEK_END: 451 case SEEK_END:
451 offset += inode->i_size + 2; /* FIXME */ 452 offset += inode->i_size + 2; /* FIXME */
452 break; 453 break;
453 case SEEK_CUR: 454 case SEEK_CUR:
454 offset += file->f_pos; 455 offset += file->f_pos;
456 case SEEK_SET:
457 break;
458 default:
459 goto out;
455 } 460 }
456 retval = -EINVAL; 461
457 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { 462 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
458 if (offset != file->f_pos) { 463 if (offset != file->f_pos) {
459 file->f_pos = offset; 464 file->f_pos = offset;
@@ -477,6 +482,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
477 if (offset > old_offset) 482 if (offset > old_offset)
478 fi->dir_release_count--; 483 fi->dir_release_count--;
479 } 484 }
485out:
480 mutex_unlock(&inode->i_mutex); 486 mutex_unlock(&inode->i_mutex);
481 return retval; 487 return retval;
482} 488}
@@ -566,7 +572,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
566 /* open (but not create!) intent? */ 572 /* open (but not create!) intent? */
567 if (nd && 573 if (nd &&
568 (nd->flags & LOOKUP_OPEN) && 574 (nd->flags & LOOKUP_OPEN) &&
569 (nd->flags & LOOKUP_CONTINUE) == 0 && /* only open last component */
570 !(nd->intent.open.flags & O_CREAT)) { 575 !(nd->intent.open.flags & O_CREAT)) {
571 int mode = nd->intent.open.create_mode & ~current->fs->umask; 576 int mode = nd->intent.open.create_mode & ~current->fs->umask;
572 return ceph_lookup_open(dir, dentry, nd, mode, 1); 577 return ceph_lookup_open(dir, dentry, nd, mode, 1);
@@ -1113,7 +1118,8 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
1113 * an fsync() on a dir will wait for any uncommitted directory 1118 * an fsync() on a dir will wait for any uncommitted directory
1114 * operations to commit. 1119 * operations to commit.
1115 */ 1120 */
1116static int ceph_dir_fsync(struct file *file, int datasync) 1121static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
1122 int datasync)
1117{ 1123{
1118 struct inode *inode = file->f_path.dentry->d_inode; 1124 struct inode *inode = file->f_path.dentry->d_inode;
1119 struct ceph_inode_info *ci = ceph_inode(inode); 1125 struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1123,6 +1129,11 @@ static int ceph_dir_fsync(struct file *file, int datasync)
1123 int ret = 0; 1129 int ret = 0;
1124 1130
1125 dout("dir_fsync %p\n", inode); 1131 dout("dir_fsync %p\n", inode);
1132 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
1133 if (ret)
1134 return ret;
1135 mutex_lock(&inode->i_mutex);
1136
1126 spin_lock(&ci->i_unsafe_lock); 1137 spin_lock(&ci->i_unsafe_lock);
1127 if (list_empty(head)) 1138 if (list_empty(head))
1128 goto out; 1139 goto out;
@@ -1156,6 +1167,8 @@ static int ceph_dir_fsync(struct file *file, int datasync)
1156 } while (req->r_tid < last_tid); 1167 } while (req->r_tid < last_tid);
1157out: 1168out:
1158 spin_unlock(&ci->i_unsafe_lock); 1169 spin_unlock(&ci->i_unsafe_lock);
1170 mutex_unlock(&inode->i_mutex);
1171
1159 return ret; 1172 return ret;
1160} 1173}
1161 1174
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4698a5c553dc..0d0eae05598f 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -226,7 +226,7 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
226 struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); 226 struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry);
227 struct ceph_mds_request *req; 227 struct ceph_mds_request *req;
228 int err; 228 int err;
229 int flags = nd->intent.open.flags - 1; /* silly vfs! */ 229 int flags = nd->intent.open.flags;
230 230
231 dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n", 231 dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n",
232 dentry, dentry->d_name.len, dentry->d_name.name, flags, mode); 232 dentry, dentry->d_name.len, dentry->d_name.name, flags, mode);
@@ -768,13 +768,16 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
768 768
769 mutex_lock(&inode->i_mutex); 769 mutex_lock(&inode->i_mutex);
770 __ceph_do_pending_vmtruncate(inode); 770 __ceph_do_pending_vmtruncate(inode);
771 switch (origin) { 771 if (origin != SEEK_CUR || origin != SEEK_SET) {
772 case SEEK_END:
773 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE); 772 ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
774 if (ret < 0) { 773 if (ret < 0) {
775 offset = ret; 774 offset = ret;
776 goto out; 775 goto out;
777 } 776 }
777 }
778
779 switch (origin) {
780 case SEEK_END:
778 offset += inode->i_size; 781 offset += inode->i_size;
779 break; 782 break;
780 case SEEK_CUR: 783 case SEEK_CUR:
@@ -790,6 +793,19 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
790 } 793 }
791 offset += file->f_pos; 794 offset += file->f_pos;
792 break; 795 break;
796 case SEEK_DATA:
797 if (offset >= inode->i_size) {
798 ret = -ENXIO;
799 goto out;
800 }
801 break;
802 case SEEK_HOLE:
803 if (offset >= inode->i_size) {
804 ret = -ENXIO;
805 goto out;
806 }
807 offset = inode->i_size;
808 break;
793 } 809 }
794 810
795 if (offset < 0 || offset > inode->i_sb->s_maxbytes) { 811 if (offset < 0 || offset > inode->i_sb->s_maxbytes) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index d8858e96ab18..dfb2831d8d85 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1795,17 +1795,17 @@ int ceph_do_getattr(struct inode *inode, int mask)
1795 * Check inode permissions. We verify we have a valid value for 1795 * Check inode permissions. We verify we have a valid value for
1796 * the AUTH cap, then call the generic handler. 1796 * the AUTH cap, then call the generic handler.
1797 */ 1797 */
1798int ceph_permission(struct inode *inode, int mask, unsigned int flags) 1798int ceph_permission(struct inode *inode, int mask)
1799{ 1799{
1800 int err; 1800 int err;
1801 1801
1802 if (flags & IPERM_FLAG_RCU) 1802 if (mask & MAY_NOT_BLOCK)
1803 return -ECHILD; 1803 return -ECHILD;
1804 1804
1805 err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED); 1805 err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
1806 1806
1807 if (!err) 1807 if (!err)
1808 err = generic_permission(inode, mask, flags, NULL); 1808 err = generic_permission(inode, mask);
1809 return err; 1809 return err;
1810} 1810}
1811 1811
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index f5cabefa98dc..30446b144e3d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -692,7 +692,7 @@ extern void ceph_queue_invalidate(struct inode *inode);
692extern void ceph_queue_writeback(struct inode *inode); 692extern void ceph_queue_writeback(struct inode *inode);
693 693
694extern int ceph_do_getattr(struct inode *inode, int mask); 694extern int ceph_do_getattr(struct inode *inode, int mask);
695extern int ceph_permission(struct inode *inode, int mask, unsigned int flags); 695extern int ceph_permission(struct inode *inode, int mask);
696extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); 696extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
697extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, 697extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
698 struct kstat *stat); 698 struct kstat *stat);
@@ -728,7 +728,8 @@ extern void ceph_put_cap(struct ceph_mds_client *mdsc,
728 728
729extern void ceph_queue_caps_release(struct inode *inode); 729extern void ceph_queue_caps_release(struct inode *inode);
730extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc); 730extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
731extern int ceph_fsync(struct file *file, int datasync); 731extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
732 int datasync);
732extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc, 733extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
733 struct ceph_mds_session *session); 734 struct ceph_mds_session *session);
734extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, 735extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index bc4b12ca537b..865517470967 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -224,7 +224,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
224 return 0; 224 return 0;
225} 225}
226 226
227static int cifs_permission(struct inode *inode, int mask, unsigned int flags) 227static int cifs_permission(struct inode *inode, int mask)
228{ 228{
229 struct cifs_sb_info *cifs_sb; 229 struct cifs_sb_info *cifs_sb;
230 230
@@ -239,7 +239,7 @@ static int cifs_permission(struct inode *inode, int mask, unsigned int flags)
239 on the client (above and beyond ACL on servers) for 239 on the client (above and beyond ACL on servers) for
240 servers which do not support setting and viewing mode bits, 240 servers which do not support setting and viewing mode bits,
241 so allowing client to check permissions is useful */ 241 so allowing client to check permissions is useful */
242 return generic_permission(inode, mask, flags, NULL); 242 return generic_permission(inode, mask);
243} 243}
244 244
245static struct kmem_cache *cifs_inode_cachep; 245static struct kmem_cache *cifs_inode_cachep;
@@ -704,8 +704,11 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
704 704
705static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) 705static loff_t cifs_llseek(struct file *file, loff_t offset, int origin)
706{ 706{
707 /* origin == SEEK_END => we must revalidate the cached file length */ 707 /*
708 if (origin == SEEK_END) { 708 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
709 * the cached file length
710 */
711 if (origin != SEEK_SET || origin != SEEK_CUR) {
709 int rc; 712 int rc;
710 struct inode *inode = file->f_path.dentry->d_inode; 713 struct inode *inode = file->f_path.dentry->d_inode;
711 714
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 036ca83e5f46..fbd050c8d52a 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -91,8 +91,8 @@ extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
91extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, 91extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
92 unsigned long nr_segs, loff_t pos); 92 unsigned long nr_segs, loff_t pos);
93extern int cifs_lock(struct file *, int, struct file_lock *); 93extern int cifs_lock(struct file *, int, struct file_lock *);
94extern int cifs_fsync(struct file *, int); 94extern int cifs_fsync(struct file *, loff_t, loff_t, int);
95extern int cifs_strict_fsync(struct file *, int); 95extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int);
96extern int cifs_flush(struct file *, fl_owner_t id); 96extern int cifs_flush(struct file *, fl_owner_t id);
97extern int cifs_file_mmap(struct file * , struct vm_area_struct *); 97extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
98extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *); 98extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index ccc1afa0bf3b..e66297bad412 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -320,9 +320,10 @@ requeue_echo:
320} 320}
321 321
322static int 322static int
323cifs_demultiplex_thread(struct TCP_Server_Info *server) 323cifs_demultiplex_thread(void *p)
324{ 324{
325 int length; 325 int length;
326 struct TCP_Server_Info *server = p;
326 unsigned int pdu_length, total_read; 327 unsigned int pdu_length, total_read;
327 struct smb_hdr *smb_buffer = NULL; 328 struct smb_hdr *smb_buffer = NULL;
328 struct smb_hdr *bigbuf = NULL; 329 struct smb_hdr *bigbuf = NULL;
@@ -1791,7 +1792,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
1791 * this will succeed. No need for try_module_get(). 1792 * this will succeed. No need for try_module_get().
1792 */ 1793 */
1793 __module_get(THIS_MODULE); 1794 __module_get(THIS_MODULE);
1794 tcp_ses->tsk = kthread_run((void *)(void *)cifs_demultiplex_thread, 1795 tcp_ses->tsk = kthread_run(cifs_demultiplex_thread,
1795 tcp_ses, "cifsd"); 1796 tcp_ses, "cifsd");
1796 if (IS_ERR(tcp_ses->tsk)) { 1797 if (IS_ERR(tcp_ses->tsk)) {
1797 rc = PTR_ERR(tcp_ses->tsk); 1798 rc = PTR_ERR(tcp_ses->tsk);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index fa8c21d913bc..14d602f178c2 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -179,7 +179,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
179 if (oplockEnabled) 179 if (oplockEnabled)
180 oplock = REQ_OPLOCK; 180 oplock = REQ_OPLOCK;
181 181
182 if (nd && (nd->flags & LOOKUP_OPEN)) 182 if (nd)
183 oflags = nd->intent.open.file->f_flags; 183 oflags = nd->intent.open.file->f_flags;
184 else 184 else
185 oflags = O_RDONLY | O_CREAT; 185 oflags = O_RDONLY | O_CREAT;
@@ -214,7 +214,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
214 which should be rare for path not covered on files) */ 214 which should be rare for path not covered on files) */
215 } 215 }
216 216
217 if (nd && (nd->flags & LOOKUP_OPEN)) { 217 if (nd) {
218 /* if the file is going to stay open, then we 218 /* if the file is going to stay open, then we
219 need to set the desired access properly */ 219 need to set the desired access properly */
220 desiredAccess = 0; 220 desiredAccess = 0;
@@ -328,7 +328,7 @@ cifs_create_set_dentry:
328 else 328 else
329 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); 329 cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
330 330
331 if (newinode && nd && (nd->flags & LOOKUP_OPEN)) { 331 if (newinode && nd) {
332 struct cifsFileInfo *pfile_info; 332 struct cifsFileInfo *pfile_info;
333 struct file *filp; 333 struct file *filp;
334 334
@@ -568,7 +568,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
568 * reduction in network traffic in the other paths. 568 * reduction in network traffic in the other paths.
569 */ 569 */
570 if (pTcon->unix_ext) { 570 if (pTcon->unix_ext) {
571 if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && 571 if (nd && !(nd->flags & LOOKUP_DIRECTORY) &&
572 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && 572 (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
573 (nd->intent.open.file->f_flags & O_CREAT)) { 573 (nd->intent.open.file->f_flags & O_CREAT)) {
574 rc = cifs_posix_open(full_path, &newInode, 574 rc = cifs_posix_open(full_path, &newInode,
@@ -663,10 +663,8 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd)
663 * case sensitive name which is specified by user if this is 663 * case sensitive name which is specified by user if this is
664 * for creation. 664 * for creation.
665 */ 665 */
666 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { 666 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
667 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) 667 return 0;
668 return 0;
669 }
670 668
671 if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled) 669 if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled)
672 return 0; 670 return 0;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index bb71471a4d9d..378acdafa356 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1401,7 +1401,8 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
1401 return rc; 1401 return rc;
1402} 1402}
1403 1403
1404int cifs_strict_fsync(struct file *file, int datasync) 1404int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
1405 int datasync)
1405{ 1406{
1406 int xid; 1407 int xid;
1407 int rc = 0; 1408 int rc = 0;
@@ -1410,6 +1411,11 @@ int cifs_strict_fsync(struct file *file, int datasync)
1410 struct inode *inode = file->f_path.dentry->d_inode; 1411 struct inode *inode = file->f_path.dentry->d_inode;
1411 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); 1412 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1412 1413
1414 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
1415 if (rc)
1416 return rc;
1417 mutex_lock(&inode->i_mutex);
1418
1413 xid = GetXid(); 1419 xid = GetXid();
1414 1420
1415 cFYI(1, "Sync file - name: %s datasync: 0x%x", 1421 cFYI(1, "Sync file - name: %s datasync: 0x%x",
@@ -1428,16 +1434,23 @@ int cifs_strict_fsync(struct file *file, int datasync)
1428 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); 1434 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1429 1435
1430 FreeXid(xid); 1436 FreeXid(xid);
1437 mutex_unlock(&inode->i_mutex);
1431 return rc; 1438 return rc;
1432} 1439}
1433 1440
1434int cifs_fsync(struct file *file, int datasync) 1441int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1435{ 1442{
1436 int xid; 1443 int xid;
1437 int rc = 0; 1444 int rc = 0;
1438 struct cifs_tcon *tcon; 1445 struct cifs_tcon *tcon;
1439 struct cifsFileInfo *smbfile = file->private_data; 1446 struct cifsFileInfo *smbfile = file->private_data;
1440 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); 1447 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1448 struct inode *inode = file->f_mapping->host;
1449
1450 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
1451 if (rc)
1452 return rc;
1453 mutex_lock(&inode->i_mutex);
1441 1454
1442 xid = GetXid(); 1455 xid = GetXid();
1443 1456
@@ -1449,6 +1462,7 @@ int cifs_fsync(struct file *file, int datasync)
1449 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); 1462 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1450 1463
1451 FreeXid(xid); 1464 FreeXid(xid);
1465 mutex_unlock(&inode->i_mutex);
1452 return rc; 1466 return rc;
1453} 1467}
1454 1468
@@ -1737,7 +1751,7 @@ cifs_iovec_read(struct file *file, const struct iovec *iov,
1737 io_parms.pid = pid; 1751 io_parms.pid = pid;
1738 io_parms.tcon = pTcon; 1752 io_parms.tcon = pTcon;
1739 io_parms.offset = *poffset; 1753 io_parms.offset = *poffset;
1740 io_parms.length = len; 1754 io_parms.length = cur_len;
1741 rc = CIFSSMBRead(xid, &io_parms, &bytes_read, 1755 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
1742 &read_data, &buf_type); 1756 &read_data, &buf_type);
1743 pSMBr = (struct smb_com_read_rsp *)read_data; 1757 pSMBr = (struct smb_com_read_rsp *)read_data;
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 6751e745bbc6..965a3af186a1 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -796,7 +796,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir)
796 file->f_pos++; 796 file->f_pos++;
797 case 1: 797 case 1:
798 if (filldir(direntry, "..", 2, file->f_pos, 798 if (filldir(direntry, "..", 2, file->f_pos,
799 file->f_path.dentry->d_parent->d_inode->i_ino, DT_DIR) < 0) { 799 parent_ino(file->f_path.dentry), DT_DIR) < 0) {
800 cERROR(1, "Filldir for parent dir failed"); 800 cERROR(1, "Filldir for parent dir failed");
801 rc = -ENOMEM; 801 rc = -ENOMEM;
802 break; 802 break;
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h
index 6b443ff43a19..b7143cf783ac 100644
--- a/fs/coda/coda_int.h
+++ b/fs/coda/coda_int.h
@@ -11,7 +11,7 @@ extern int coda_fake_statfs;
11 11
12void coda_destroy_inodecache(void); 12void coda_destroy_inodecache(void);
13int coda_init_inodecache(void); 13int coda_init_inodecache(void);
14int coda_fsync(struct file *coda_file, int datasync); 14int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync);
15void coda_sysctl_init(void); 15void coda_sysctl_init(void);
16void coda_sysctl_clean(void); 16void coda_sysctl_clean(void);
17 17
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index 9b0c5323890b..44e17e9c21ae 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -39,7 +39,7 @@ extern const struct file_operations coda_ioctl_operations;
39/* operations shared over more than one file */ 39/* operations shared over more than one file */
40int coda_open(struct inode *i, struct file *f); 40int coda_open(struct inode *i, struct file *f);
41int coda_release(struct inode *i, struct file *f); 41int coda_release(struct inode *i, struct file *f);
42int coda_permission(struct inode *inode, int mask, unsigned int flags); 42int coda_permission(struct inode *inode, int mask);
43int coda_revalidate_inode(struct dentry *); 43int coda_revalidate_inode(struct dentry *);
44int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); 44int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
45int coda_setattr(struct dentry *, struct iattr *); 45int coda_setattr(struct dentry *, struct iattr *);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 2b8dae4d121e..0239433f50cb 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -132,11 +132,11 @@ exit:
132} 132}
133 133
134 134
135int coda_permission(struct inode *inode, int mask, unsigned int flags) 135int coda_permission(struct inode *inode, int mask)
136{ 136{
137 int error; 137 int error;
138 138
139 if (flags & IPERM_FLAG_RCU) 139 if (mask & MAY_NOT_BLOCK)
140 return -ECHILD; 140 return -ECHILD;
141 141
142 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 142 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
@@ -449,8 +449,7 @@ static int coda_venus_readdir(struct file *coda_file, void *buf,
449 struct file *host_file; 449 struct file *host_file;
450 struct dentry *de; 450 struct dentry *de;
451 struct venus_dirent *vdir; 451 struct venus_dirent *vdir;
452 unsigned long vdir_size = 452 unsigned long vdir_size = offsetof(struct venus_dirent, d_name);
453 (unsigned long)(&((struct venus_dirent *)0)->d_name);
454 unsigned int type; 453 unsigned int type;
455 struct qstr name; 454 struct qstr name;
456 ino_t ino; 455 ino_t ino;
@@ -474,7 +473,7 @@ static int coda_venus_readdir(struct file *coda_file, void *buf,
474 coda_file->f_pos++; 473 coda_file->f_pos++;
475 } 474 }
476 if (coda_file->f_pos == 1) { 475 if (coda_file->f_pos == 1) {
477 ret = filldir(buf, "..", 2, 1, de->d_parent->d_inode->i_ino, DT_DIR); 476 ret = filldir(buf, "..", 2, 1, parent_ino(de), DT_DIR);
478 if (ret < 0) 477 if (ret < 0)
479 goto out; 478 goto out;
480 result++; 479 result++;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 0433057be330..8edd404e6419 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -199,7 +199,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
199 return 0; 199 return 0;
200} 200}
201 201
202int coda_fsync(struct file *coda_file, int datasync) 202int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
203{ 203{
204 struct file *host_file; 204 struct file *host_file;
205 struct inode *coda_inode = coda_file->f_path.dentry->d_inode; 205 struct inode *coda_inode = coda_file->f_path.dentry->d_inode;
@@ -210,6 +210,11 @@ int coda_fsync(struct file *coda_file, int datasync)
210 S_ISLNK(coda_inode->i_mode))) 210 S_ISLNK(coda_inode->i_mode)))
211 return -EINVAL; 211 return -EINVAL;
212 212
213 err = filemap_write_and_wait_range(coda_inode->i_mapping, start, end);
214 if (err)
215 return err;
216 mutex_lock(&coda_inode->i_mutex);
217
213 cfi = CODA_FTOC(coda_file); 218 cfi = CODA_FTOC(coda_file);
214 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); 219 BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
215 host_file = cfi->cfi_container; 220 host_file = cfi->cfi_container;
@@ -217,6 +222,7 @@ int coda_fsync(struct file *coda_file, int datasync)
217 err = vfs_fsync(host_file, datasync); 222 err = vfs_fsync(host_file, datasync);
218 if (!err && !datasync) 223 if (!err && !datasync)
219 err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode)); 224 err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
225 mutex_unlock(&coda_inode->i_mutex);
220 226
221 return err; 227 return err;
222} 228}
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index cb140ef293e4..ee0981f1375b 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,7 +24,7 @@
24#include "coda_linux.h" 24#include "coda_linux.h"
25 25
26/* pioctl ops */ 26/* pioctl ops */
27static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags); 27static int coda_ioctl_permission(struct inode *inode, int mask);
28static long coda_pioctl(struct file *filp, unsigned int cmd, 28static long coda_pioctl(struct file *filp, unsigned int cmd,
29 unsigned long user_data); 29 unsigned long user_data);
30 30
@@ -41,7 +41,7 @@ const struct file_operations coda_ioctl_operations = {
41}; 41};
42 42
43/* the coda pioctl inode ops */ 43/* the coda pioctl inode ops */
44static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags) 44static int coda_ioctl_permission(struct inode *inode, int mask)
45{ 45{
46 return (mask & MAY_EXEC) ? -EACCES : 0; 46 return (mask & MAY_EXEC) ? -EACCES : 0;
47} 47}
diff --git a/fs/dcache.c b/fs/dcache.c
index 6e4ea6d87774..be18598c7fd7 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -344,6 +344,24 @@ void d_drop(struct dentry *dentry)
344EXPORT_SYMBOL(d_drop); 344EXPORT_SYMBOL(d_drop);
345 345
346/* 346/*
347 * d_clear_need_lookup - drop a dentry from cache and clear the need lookup flag
348 * @dentry: dentry to drop
349 *
350 * This is called when we do a lookup on a placeholder dentry that needed to be
351 * looked up. The dentry should have been hashed in order for it to be found by
352 * the lookup code, but now needs to be unhashed while we do the actual lookup
353 * and clear the DCACHE_NEED_LOOKUP flag.
354 */
355void d_clear_need_lookup(struct dentry *dentry)
356{
357 spin_lock(&dentry->d_lock);
358 __d_drop(dentry);
359 dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
360 spin_unlock(&dentry->d_lock);
361}
362EXPORT_SYMBOL(d_clear_need_lookup);
363
364/*
347 * Finish off a dentry we've decided to kill. 365 * Finish off a dentry we've decided to kill.
348 * dentry->d_lock must be held, returns with it unlocked. 366 * dentry->d_lock must be held, returns with it unlocked.
349 * If ref is non-zero, then decrement the refcount too. 367 * If ref is non-zero, then decrement the refcount too.
@@ -432,8 +450,13 @@ repeat:
432 if (d_unhashed(dentry)) 450 if (d_unhashed(dentry))
433 goto kill_it; 451 goto kill_it;
434 452
435 /* Otherwise leave it cached and ensure it's on the LRU */ 453 /*
436 dentry->d_flags |= DCACHE_REFERENCED; 454 * If this dentry needs lookup, don't set the referenced flag so that it
455 * is more likely to be cleaned up by the dcache shrinker in case of
456 * memory pressure.
457 */
458 if (!d_need_lookup(dentry))
459 dentry->d_flags |= DCACHE_REFERENCED;
437 dentry_lru_add(dentry); 460 dentry_lru_add(dentry);
438 461
439 dentry->d_count--; 462 dentry->d_count--;
@@ -526,10 +549,6 @@ repeat:
526 */ 549 */
527 rcu_read_lock(); 550 rcu_read_lock();
528 ret = dentry->d_parent; 551 ret = dentry->d_parent;
529 if (!ret) {
530 rcu_read_unlock();
531 goto out;
532 }
533 spin_lock(&ret->d_lock); 552 spin_lock(&ret->d_lock);
534 if (unlikely(ret != dentry->d_parent)) { 553 if (unlikely(ret != dentry->d_parent)) {
535 spin_unlock(&ret->d_lock); 554 spin_unlock(&ret->d_lock);
@@ -540,7 +559,6 @@ repeat:
540 BUG_ON(!ret->d_count); 559 BUG_ON(!ret->d_count);
541 ret->d_count++; 560 ret->d_count++;
542 spin_unlock(&ret->d_lock); 561 spin_unlock(&ret->d_lock);
543out:
544 return ret; 562 return ret;
545} 563}
546EXPORT_SYMBOL(dget_parent); 564EXPORT_SYMBOL(dget_parent);
@@ -720,13 +738,11 @@ static void shrink_dentry_list(struct list_head *list)
720 * 738 *
721 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned. 739 * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
722 */ 740 */
723static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) 741static void __shrink_dcache_sb(struct super_block *sb, int count, int flags)
724{ 742{
725 /* called from prune_dcache() and shrink_dcache_parent() */
726 struct dentry *dentry; 743 struct dentry *dentry;
727 LIST_HEAD(referenced); 744 LIST_HEAD(referenced);
728 LIST_HEAD(tmp); 745 LIST_HEAD(tmp);
729 int cnt = *count;
730 746
731relock: 747relock:
732 spin_lock(&dcache_lru_lock); 748 spin_lock(&dcache_lru_lock);
@@ -754,7 +770,7 @@ relock:
754 } else { 770 } else {
755 list_move_tail(&dentry->d_lru, &tmp); 771 list_move_tail(&dentry->d_lru, &tmp);
756 spin_unlock(&dentry->d_lock); 772 spin_unlock(&dentry->d_lock);
757 if (!--cnt) 773 if (!--count)
758 break; 774 break;
759 } 775 }
760 cond_resched_lock(&dcache_lru_lock); 776 cond_resched_lock(&dcache_lru_lock);
@@ -764,83 +780,22 @@ relock:
764 spin_unlock(&dcache_lru_lock); 780 spin_unlock(&dcache_lru_lock);
765 781
766 shrink_dentry_list(&tmp); 782 shrink_dentry_list(&tmp);
767
768 *count = cnt;
769} 783}
770 784
771/** 785/**
772 * prune_dcache - shrink the dcache 786 * prune_dcache_sb - shrink the dcache
773 * @count: number of entries to try to free 787 * @nr_to_scan: number of entries to try to free
774 * 788 *
775 * Shrink the dcache. This is done when we need more memory, or simply when we 789 * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
776 * need to unmount something (at which point we need to unuse all dentries). 790 * done when we need more memory an called from the superblock shrinker
791 * function.
777 * 792 *
778 * This function may fail to free any resources if all the dentries are in use. 793 * This function may fail to free any resources if all the dentries are in
794 * use.
779 */ 795 */
780static void prune_dcache(int count) 796void prune_dcache_sb(struct super_block *sb, int nr_to_scan)
781{ 797{
782 struct super_block *sb, *p = NULL; 798 __shrink_dcache_sb(sb, nr_to_scan, DCACHE_REFERENCED);
783 int w_count;
784 int unused = dentry_stat.nr_unused;
785 int prune_ratio;
786 int pruned;
787
788 if (unused == 0 || count == 0)
789 return;
790 if (count >= unused)
791 prune_ratio = 1;
792 else
793 prune_ratio = unused / count;
794 spin_lock(&sb_lock);
795 list_for_each_entry(sb, &super_blocks, s_list) {
796 if (list_empty(&sb->s_instances))
797 continue;
798 if (sb->s_nr_dentry_unused == 0)
799 continue;
800 sb->s_count++;
801 /* Now, we reclaim unused dentrins with fairness.
802 * We reclaim them same percentage from each superblock.
803 * We calculate number of dentries to scan on this sb
804 * as follows, but the implementation is arranged to avoid
805 * overflows:
806 * number of dentries to scan on this sb =
807 * count * (number of dentries on this sb /
808 * number of dentries in the machine)
809 */
810 spin_unlock(&sb_lock);
811 if (prune_ratio != 1)
812 w_count = (sb->s_nr_dentry_unused / prune_ratio) + 1;
813 else
814 w_count = sb->s_nr_dentry_unused;
815 pruned = w_count;
816 /*
817 * We need to be sure this filesystem isn't being unmounted,
818 * otherwise we could race with generic_shutdown_super(), and
819 * end up holding a reference to an inode while the filesystem
820 * is unmounted. So we try to get s_umount, and make sure
821 * s_root isn't NULL.
822 */
823 if (down_read_trylock(&sb->s_umount)) {
824 if ((sb->s_root != NULL) &&
825 (!list_empty(&sb->s_dentry_lru))) {
826 __shrink_dcache_sb(sb, &w_count,
827 DCACHE_REFERENCED);
828 pruned -= w_count;
829 }
830 up_read(&sb->s_umount);
831 }
832 spin_lock(&sb_lock);
833 if (p)
834 __put_super(p);
835 count -= pruned;
836 p = sb;
837 /* more work left to do? */
838 if (count <= 0)
839 break;
840 }
841 if (p)
842 __put_super(p);
843 spin_unlock(&sb_lock);
844} 799}
845 800
846/** 801/**
@@ -1215,45 +1170,13 @@ void shrink_dcache_parent(struct dentry * parent)
1215 int found; 1170 int found;
1216 1171
1217 while ((found = select_parent(parent)) != 0) 1172 while ((found = select_parent(parent)) != 0)
1218 __shrink_dcache_sb(sb, &found, 0); 1173 __shrink_dcache_sb(sb, found, 0);
1219} 1174}
1220EXPORT_SYMBOL(shrink_dcache_parent); 1175EXPORT_SYMBOL(shrink_dcache_parent);
1221 1176
1222/*
1223 * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain.
1224 *
1225 * We need to avoid reentering the filesystem if the caller is performing a
1226 * GFP_NOFS allocation attempt. One example deadlock is:
1227 *
1228 * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
1229 * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
1230 * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
1231 *
1232 * In this case we return -1 to tell the caller that we baled.
1233 */
1234static int shrink_dcache_memory(struct shrinker *shrink,
1235 struct shrink_control *sc)
1236{
1237 int nr = sc->nr_to_scan;
1238 gfp_t gfp_mask = sc->gfp_mask;
1239
1240 if (nr) {
1241 if (!(gfp_mask & __GFP_FS))
1242 return -1;
1243 prune_dcache(nr);
1244 }
1245
1246 return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
1247}
1248
1249static struct shrinker dcache_shrinker = {
1250 .shrink = shrink_dcache_memory,
1251 .seeks = DEFAULT_SEEKS,
1252};
1253
1254/** 1177/**
1255 * d_alloc - allocate a dcache entry 1178 * __d_alloc - allocate a dcache entry
1256 * @parent: parent of entry to allocate 1179 * @sb: filesystem it will belong to
1257 * @name: qstr of the name 1180 * @name: qstr of the name
1258 * 1181 *
1259 * Allocates a dentry. It returns %NULL if there is insufficient memory 1182 * Allocates a dentry. It returns %NULL if there is insufficient memory
@@ -1261,7 +1184,7 @@ static struct shrinker dcache_shrinker = {
1261 * copied and the copy passed in may be reused after this call. 1184 * copied and the copy passed in may be reused after this call.
1262 */ 1185 */
1263 1186
1264struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) 1187struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
1265{ 1188{
1266 struct dentry *dentry; 1189 struct dentry *dentry;
1267 char *dname; 1190 char *dname;
@@ -1291,8 +1214,8 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1291 spin_lock_init(&dentry->d_lock); 1214 spin_lock_init(&dentry->d_lock);
1292 seqcount_init(&dentry->d_seq); 1215 seqcount_init(&dentry->d_seq);
1293 dentry->d_inode = NULL; 1216 dentry->d_inode = NULL;
1294 dentry->d_parent = NULL; 1217 dentry->d_parent = dentry;
1295 dentry->d_sb = NULL; 1218 dentry->d_sb = sb;
1296 dentry->d_op = NULL; 1219 dentry->d_op = NULL;
1297 dentry->d_fsdata = NULL; 1220 dentry->d_fsdata = NULL;
1298 INIT_HLIST_BL_NODE(&dentry->d_hash); 1221 INIT_HLIST_BL_NODE(&dentry->d_hash);
@@ -1300,36 +1223,47 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1300 INIT_LIST_HEAD(&dentry->d_subdirs); 1223 INIT_LIST_HEAD(&dentry->d_subdirs);
1301 INIT_LIST_HEAD(&dentry->d_alias); 1224 INIT_LIST_HEAD(&dentry->d_alias);
1302 INIT_LIST_HEAD(&dentry->d_u.d_child); 1225 INIT_LIST_HEAD(&dentry->d_u.d_child);
1303 1226 d_set_d_op(dentry, dentry->d_sb->s_d_op);
1304 if (parent) {
1305 spin_lock(&parent->d_lock);
1306 /*
1307 * don't need child lock because it is not subject
1308 * to concurrency here
1309 */
1310 __dget_dlock(parent);
1311 dentry->d_parent = parent;
1312 dentry->d_sb = parent->d_sb;
1313 d_set_d_op(dentry, dentry->d_sb->s_d_op);
1314 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1315 spin_unlock(&parent->d_lock);
1316 }
1317 1227
1318 this_cpu_inc(nr_dentry); 1228 this_cpu_inc(nr_dentry);
1319 1229
1320 return dentry; 1230 return dentry;
1321} 1231}
1232
1233/**
1234 * d_alloc - allocate a dcache entry
1235 * @parent: parent of entry to allocate
1236 * @name: qstr of the name
1237 *
1238 * Allocates a dentry. It returns %NULL if there is insufficient memory
1239 * available. On a success the dentry is returned. The name passed in is
1240 * copied and the copy passed in may be reused after this call.
1241 */
1242struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
1243{
1244 struct dentry *dentry = __d_alloc(parent->d_sb, name);
1245 if (!dentry)
1246 return NULL;
1247
1248 spin_lock(&parent->d_lock);
1249 /*
1250 * don't need child lock because it is not subject
1251 * to concurrency here
1252 */
1253 __dget_dlock(parent);
1254 dentry->d_parent = parent;
1255 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
1256 spin_unlock(&parent->d_lock);
1257
1258 return dentry;
1259}
1322EXPORT_SYMBOL(d_alloc); 1260EXPORT_SYMBOL(d_alloc);
1323 1261
1324struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name) 1262struct dentry *d_alloc_pseudo(struct super_block *sb, const struct qstr *name)
1325{ 1263{
1326 struct dentry *dentry = d_alloc(NULL, name); 1264 struct dentry *dentry = __d_alloc(sb, name);
1327 if (dentry) { 1265 if (dentry)
1328 dentry->d_sb = sb;
1329 d_set_d_op(dentry, dentry->d_sb->s_d_op);
1330 dentry->d_parent = dentry;
1331 dentry->d_flags |= DCACHE_DISCONNECTED; 1266 dentry->d_flags |= DCACHE_DISCONNECTED;
1332 }
1333 return dentry; 1267 return dentry;
1334} 1268}
1335EXPORT_SYMBOL(d_alloc_pseudo); 1269EXPORT_SYMBOL(d_alloc_pseudo);
@@ -1499,13 +1433,9 @@ struct dentry * d_alloc_root(struct inode * root_inode)
1499 if (root_inode) { 1433 if (root_inode) {
1500 static const struct qstr name = { .name = "/", .len = 1 }; 1434 static const struct qstr name = { .name = "/", .len = 1 };
1501 1435
1502 res = d_alloc(NULL, &name); 1436 res = __d_alloc(root_inode->i_sb, &name);
1503 if (res) { 1437 if (res)
1504 res->d_sb = root_inode->i_sb;
1505 d_set_d_op(res, res->d_sb->s_d_op);
1506 res->d_parent = res;
1507 d_instantiate(res, root_inode); 1438 d_instantiate(res, root_inode);
1508 }
1509 } 1439 }
1510 return res; 1440 return res;
1511} 1441}
@@ -1566,13 +1496,11 @@ struct dentry *d_obtain_alias(struct inode *inode)
1566 if (res) 1496 if (res)
1567 goto out_iput; 1497 goto out_iput;
1568 1498
1569 tmp = d_alloc(NULL, &anonstring); 1499 tmp = __d_alloc(inode->i_sb, &anonstring);
1570 if (!tmp) { 1500 if (!tmp) {
1571 res = ERR_PTR(-ENOMEM); 1501 res = ERR_PTR(-ENOMEM);
1572 goto out_iput; 1502 goto out_iput;
1573 } 1503 }
1574 tmp->d_parent = tmp; /* make sure dput doesn't croak */
1575
1576 1504
1577 spin_lock(&inode->i_lock); 1505 spin_lock(&inode->i_lock);
1578 res = __d_find_any_alias(inode); 1506 res = __d_find_any_alias(inode);
@@ -1584,8 +1512,6 @@ struct dentry *d_obtain_alias(struct inode *inode)
1584 1512
1585 /* attach a disconnected dentry */ 1513 /* attach a disconnected dentry */
1586 spin_lock(&tmp->d_lock); 1514 spin_lock(&tmp->d_lock);
1587 tmp->d_sb = inode->i_sb;
1588 d_set_d_op(tmp, tmp->d_sb->s_d_op);
1589 tmp->d_inode = inode; 1515 tmp->d_inode = inode;
1590 tmp->d_flags |= DCACHE_DISCONNECTED; 1516 tmp->d_flags |= DCACHE_DISCONNECTED;
1591 list_add(&tmp->d_alias, &inode->i_dentry); 1517 list_add(&tmp->d_alias, &inode->i_dentry);
@@ -1626,6 +1552,9 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
1626{ 1552{
1627 struct dentry *new = NULL; 1553 struct dentry *new = NULL;
1628 1554
1555 if (IS_ERR(inode))
1556 return ERR_CAST(inode);
1557
1629 if (inode && S_ISDIR(inode->i_mode)) { 1558 if (inode && S_ISDIR(inode->i_mode)) {
1630 spin_lock(&inode->i_lock); 1559 spin_lock(&inode->i_lock);
1631 new = __d_find_alias(inode, 1); 1560 new = __d_find_alias(inode, 1);
@@ -1708,29 +1637,22 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
1708 } 1637 }
1709 1638
1710 /* 1639 /*
1711 * Negative dentry: instantiate it unless the inode is a directory and 1640 * We are going to instantiate this dentry, unhash it and clear the
1712 * already has a dentry. 1641 * lookup flag so we can do that.
1713 */ 1642 */
1714 spin_lock(&inode->i_lock); 1643 if (unlikely(d_need_lookup(found)))
1715 if (!S_ISDIR(inode->i_mode) || list_empty(&inode->i_dentry)) { 1644 d_clear_need_lookup(found);
1716 __d_instantiate(found, inode);
1717 spin_unlock(&inode->i_lock);
1718 security_d_instantiate(found, inode);
1719 return found;
1720 }
1721 1645
1722 /* 1646 /*
1723 * In case a directory already has a (disconnected) entry grab a 1647 * Negative dentry: instantiate it unless the inode is a directory and
1724 * reference to it, move it in place and use it. 1648 * already has a dentry.
1725 */ 1649 */
1726 new = list_entry(inode->i_dentry.next, struct dentry, d_alias); 1650 new = d_splice_alias(inode, found);
1727 __dget(new); 1651 if (new) {
1728 spin_unlock(&inode->i_lock); 1652 dput(found);
1729 security_d_instantiate(found, inode); 1653 found = new;
1730 d_move(new, found); 1654 }
1731 iput(inode); 1655 return found;
1732 dput(found);
1733 return new;
1734 1656
1735err_out: 1657err_out:
1736 iput(inode); 1658 iput(inode);
@@ -1813,8 +1735,6 @@ seqretry:
1813 tname = dentry->d_name.name; 1735 tname = dentry->d_name.name;
1814 i = dentry->d_inode; 1736 i = dentry->d_inode;
1815 prefetch(tname); 1737 prefetch(tname);
1816 if (i)
1817 prefetch(i);
1818 /* 1738 /*
1819 * This seqcount check is required to ensure name and 1739 * This seqcount check is required to ensure name and
1820 * len are loaded atomically, so as not to walk off the 1740 * len are loaded atomically, so as not to walk off the
@@ -3047,8 +2967,6 @@ static void __init dcache_init(void)
3047 */ 2967 */
3048 dentry_cache = KMEM_CACHE(dentry, 2968 dentry_cache = KMEM_CACHE(dentry,
3049 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD); 2969 SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
3050
3051 register_shrinker(&dcache_shrinker);
3052 2970
3053 /* Hash may have been set up in dcache_init_early */ 2971 /* Hash may have been set up in dcache_init_early */
3054 if (!hashdist) 2972 if (!hashdist)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index ac5f164170e3..01d2d9ef609c 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -135,6 +135,50 @@ struct dio {
135 struct page *pages[DIO_PAGES]; /* page buffer */ 135 struct page *pages[DIO_PAGES]; /* page buffer */
136}; 136};
137 137
138static void __inode_dio_wait(struct inode *inode)
139{
140 wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
141 DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
142
143 do {
144 prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
145 if (atomic_read(&inode->i_dio_count))
146 schedule();
147 } while (atomic_read(&inode->i_dio_count));
148 finish_wait(wq, &q.wait);
149}
150
151/**
152 * inode_dio_wait - wait for outstanding DIO requests to finish
153 * @inode: inode to wait for
154 *
155 * Waits for all pending direct I/O requests to finish so that we can
156 * proceed with a truncate or equivalent operation.
157 *
158 * Must be called under a lock that serializes taking new references
159 * to i_dio_count, usually by inode->i_mutex.
160 */
161void inode_dio_wait(struct inode *inode)
162{
163 if (atomic_read(&inode->i_dio_count))
164 __inode_dio_wait(inode);
165}
166EXPORT_SYMBOL_GPL(inode_dio_wait);
167
168/*
169 * inode_dio_done - signal finish of a direct I/O requests
170 * @inode: inode the direct I/O happens on
171 *
172 * This is called once we've finished processing a direct I/O request,
173 * and is used to wake up callers waiting for direct I/O to be quiesced.
174 */
175void inode_dio_done(struct inode *inode)
176{
177 if (atomic_dec_and_test(&inode->i_dio_count))
178 wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
179}
180EXPORT_SYMBOL_GPL(inode_dio_done);
181
138/* 182/*
139 * How many pages are in the queue? 183 * How many pages are in the queue?
140 */ 184 */
@@ -249,14 +293,12 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
249 if (dio->end_io && dio->result) { 293 if (dio->end_io && dio->result) {
250 dio->end_io(dio->iocb, offset, transferred, 294 dio->end_io(dio->iocb, offset, transferred,
251 dio->map_bh.b_private, ret, is_async); 295 dio->map_bh.b_private, ret, is_async);
252 } else if (is_async) { 296 } else {
253 aio_complete(dio->iocb, ret, 0); 297 if (is_async)
298 aio_complete(dio->iocb, ret, 0);
299 inode_dio_done(dio->inode);
254 } 300 }
255 301
256 if (dio->flags & DIO_LOCKING)
257 /* lockdep: non-owner release */
258 up_read_non_owner(&dio->inode->i_alloc_sem);
259
260 return ret; 302 return ret;
261} 303}
262 304
@@ -980,9 +1022,6 @@ out:
980 return ret; 1022 return ret;
981} 1023}
982 1024
983/*
984 * Releases both i_mutex and i_alloc_sem
985 */
986static ssize_t 1025static ssize_t
987direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 1026direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
988 const struct iovec *iov, loff_t offset, unsigned long nr_segs, 1027 const struct iovec *iov, loff_t offset, unsigned long nr_segs,
@@ -1146,15 +1185,16 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1146 * For writes this function is called under i_mutex and returns with 1185 * For writes this function is called under i_mutex and returns with
1147 * i_mutex held, for reads, i_mutex is not held on entry, but it is 1186 * i_mutex held, for reads, i_mutex is not held on entry, but it is
1148 * taken and dropped again before returning. 1187 * taken and dropped again before returning.
1149 * For reads and writes i_alloc_sem is taken in shared mode and released
1150 * on I/O completion (which may happen asynchronously after returning to
1151 * the caller).
1152 *
1153 * - if the flags value does NOT contain DIO_LOCKING we don't use any 1188 * - if the flags value does NOT contain DIO_LOCKING we don't use any
1154 * internal locking but rather rely on the filesystem to synchronize 1189 * internal locking but rather rely on the filesystem to synchronize
1155 * direct I/O reads/writes versus each other and truncate. 1190 * direct I/O reads/writes versus each other and truncate.
1156 * For reads and writes both i_mutex and i_alloc_sem are not held on 1191 *
1157 * entry and are never taken. 1192 * To help with locking against truncate we incremented the i_dio_count
1193 * counter before starting direct I/O, and decrement it once we are done.
1194 * Truncate can wait for it to reach zero to provide exclusion. It is
1195 * expected that filesystem provide exclusion between new direct I/O
1196 * and truncates. For DIO_LOCKING filesystems this is done by i_mutex,
1197 * but other filesystems need to take care of this on their own.
1158 */ 1198 */
1159ssize_t 1199ssize_t
1160__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1200__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
@@ -1200,6 +1240,10 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1200 } 1240 }
1201 } 1241 }
1202 1242
1243 /* watch out for a 0 len io from a tricksy fs */
1244 if (rw == READ && end == offset)
1245 return 0;
1246
1203 dio = kmalloc(sizeof(*dio), GFP_KERNEL); 1247 dio = kmalloc(sizeof(*dio), GFP_KERNEL);
1204 retval = -ENOMEM; 1248 retval = -ENOMEM;
1205 if (!dio) 1249 if (!dio)
@@ -1213,8 +1257,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1213 1257
1214 dio->flags = flags; 1258 dio->flags = flags;
1215 if (dio->flags & DIO_LOCKING) { 1259 if (dio->flags & DIO_LOCKING) {
1216 /* watch out for a 0 len io from a tricksy fs */ 1260 if (rw == READ) {
1217 if (rw == READ && end > offset) {
1218 struct address_space *mapping = 1261 struct address_space *mapping =
1219 iocb->ki_filp->f_mapping; 1262 iocb->ki_filp->f_mapping;
1220 1263
@@ -1229,15 +1272,14 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1229 goto out; 1272 goto out;
1230 } 1273 }
1231 } 1274 }
1232
1233 /*
1234 * Will be released at I/O completion, possibly in a
1235 * different thread.
1236 */
1237 down_read_non_owner(&inode->i_alloc_sem);
1238 } 1275 }
1239 1276
1240 /* 1277 /*
1278 * Will be decremented at I/O completion time.
1279 */
1280 atomic_inc(&inode->i_dio_count);
1281
1282 /*
1241 * For file extending writes updating i_size before data 1283 * For file extending writes updating i_size before data
1242 * writeouts complete can expose uninitialized blocks. So 1284 * writeouts complete can expose uninitialized blocks. So
1243 * even for AIO, we need to wait for i/o to complete before 1285 * even for AIO, we need to wait for i/o to complete before
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index abc49f292454..90e5997262ea 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -14,17 +14,9 @@
14#include "dlm_internal.h" 14#include "dlm_internal.h"
15#include "lock.h" 15#include "lock.h"
16#include "user.h" 16#include "user.h"
17#include "ast.h"
18
19#define WAKE_ASTS 0
20
21static uint64_t ast_seq_count;
22static struct list_head ast_queue;
23static spinlock_t ast_queue_lock;
24static struct task_struct * astd_task;
25static unsigned long astd_wakeflags;
26static struct mutex astd_running;
27 17
18static uint64_t dlm_cb_seq;
19static spinlock_t dlm_cb_seq_spin;
28 20
29static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb) 21static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
30{ 22{
@@ -57,21 +49,13 @@ static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
57 } 49 }
58} 50}
59 51
60void dlm_del_ast(struct dlm_lkb *lkb)
61{
62 spin_lock(&ast_queue_lock);
63 if (!list_empty(&lkb->lkb_astqueue))
64 list_del_init(&lkb->lkb_astqueue);
65 spin_unlock(&ast_queue_lock);
66}
67
68int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, 52int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
69 int status, uint32_t sbflags, uint64_t seq) 53 int status, uint32_t sbflags, uint64_t seq)
70{ 54{
71 struct dlm_ls *ls = lkb->lkb_resource->res_ls; 55 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
72 uint64_t prev_seq; 56 uint64_t prev_seq;
73 int prev_mode; 57 int prev_mode;
74 int i; 58 int i, rv;
75 59
76 for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { 60 for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
77 if (lkb->lkb_callbacks[i].seq) 61 if (lkb->lkb_callbacks[i].seq)
@@ -100,7 +84,8 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
100 mode, 84 mode,
101 (unsigned long long)prev_seq, 85 (unsigned long long)prev_seq,
102 prev_mode); 86 prev_mode);
103 return 0; 87 rv = 0;
88 goto out;
104 } 89 }
105 } 90 }
106 91
@@ -109,6 +94,7 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
109 lkb->lkb_callbacks[i].mode = mode; 94 lkb->lkb_callbacks[i].mode = mode;
110 lkb->lkb_callbacks[i].sb_status = status; 95 lkb->lkb_callbacks[i].sb_status = status;
111 lkb->lkb_callbacks[i].sb_flags = (sbflags & 0x000000FF); 96 lkb->lkb_callbacks[i].sb_flags = (sbflags & 0x000000FF);
97 rv = 0;
112 break; 98 break;
113 } 99 }
114 100
@@ -117,21 +103,24 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
117 lkb->lkb_id, (unsigned long long)seq, 103 lkb->lkb_id, (unsigned long long)seq,
118 flags, mode, status, sbflags); 104 flags, mode, status, sbflags);
119 dlm_dump_lkb_callbacks(lkb); 105 dlm_dump_lkb_callbacks(lkb);
120 return -1; 106 rv = -1;
107 goto out;
121 } 108 }
122 109 out:
123 return 0; 110 return rv;
124} 111}
125 112
126int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, 113int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
127 struct dlm_callback *cb, int *resid) 114 struct dlm_callback *cb, int *resid)
128{ 115{
129 int i; 116 int i, rv;
130 117
131 *resid = 0; 118 *resid = 0;
132 119
133 if (!lkb->lkb_callbacks[0].seq) 120 if (!lkb->lkb_callbacks[0].seq) {
134 return -ENOENT; 121 rv = -ENOENT;
122 goto out;
123 }
135 124
136 /* oldest undelivered cb is callbacks[0] */ 125 /* oldest undelivered cb is callbacks[0] */
137 126
@@ -163,7 +152,8 @@ int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
163 cb->mode, 152 cb->mode,
164 (unsigned long long)lkb->lkb_last_cast.seq, 153 (unsigned long long)lkb->lkb_last_cast.seq,
165 lkb->lkb_last_cast.mode); 154 lkb->lkb_last_cast.mode);
166 return 0; 155 rv = 0;
156 goto out;
167 } 157 }
168 } 158 }
169 159
@@ -176,171 +166,150 @@ int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
176 memcpy(&lkb->lkb_last_bast, cb, sizeof(struct dlm_callback)); 166 memcpy(&lkb->lkb_last_bast, cb, sizeof(struct dlm_callback));
177 lkb->lkb_last_bast_time = ktime_get(); 167 lkb->lkb_last_bast_time = ktime_get();
178 } 168 }
179 169 rv = 0;
180 return 0; 170 out:
171 return rv;
181} 172}
182 173
183void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, 174void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
184 uint32_t sbflags) 175 uint32_t sbflags)
185{ 176{
186 uint64_t seq; 177 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
178 uint64_t new_seq, prev_seq;
187 int rv; 179 int rv;
188 180
189 spin_lock(&ast_queue_lock); 181 spin_lock(&dlm_cb_seq_spin);
190 182 new_seq = ++dlm_cb_seq;
191 seq = ++ast_seq_count; 183 spin_unlock(&dlm_cb_seq_spin);
192 184
193 if (lkb->lkb_flags & DLM_IFL_USER) { 185 if (lkb->lkb_flags & DLM_IFL_USER) {
194 spin_unlock(&ast_queue_lock); 186 dlm_user_add_ast(lkb, flags, mode, status, sbflags, new_seq);
195 dlm_user_add_ast(lkb, flags, mode, status, sbflags, seq);
196 return; 187 return;
197 } 188 }
198 189
199 rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, seq); 190 mutex_lock(&lkb->lkb_cb_mutex);
200 if (rv < 0) { 191 prev_seq = lkb->lkb_callbacks[0].seq;
201 spin_unlock(&ast_queue_lock);
202 return;
203 }
204 192
205 if (list_empty(&lkb->lkb_astqueue)) { 193 rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, new_seq);
194 if (rv < 0)
195 goto out;
196
197 if (!prev_seq) {
206 kref_get(&lkb->lkb_ref); 198 kref_get(&lkb->lkb_ref);
207 list_add_tail(&lkb->lkb_astqueue, &ast_queue);
208 }
209 spin_unlock(&ast_queue_lock);
210 199
211 set_bit(WAKE_ASTS, &astd_wakeflags); 200 if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) {
212 wake_up_process(astd_task); 201 mutex_lock(&ls->ls_cb_mutex);
202 list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay);
203 mutex_unlock(&ls->ls_cb_mutex);
204 } else {
205 queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
206 }
207 }
208 out:
209 mutex_unlock(&lkb->lkb_cb_mutex);
213} 210}
214 211
215static void process_asts(void) 212void dlm_callback_work(struct work_struct *work)
216{ 213{
217 struct dlm_ls *ls = NULL; 214 struct dlm_lkb *lkb = container_of(work, struct dlm_lkb, lkb_cb_work);
218 struct dlm_rsb *r = NULL; 215 struct dlm_ls *ls = lkb->lkb_resource->res_ls;
219 struct dlm_lkb *lkb;
220 void (*castfn) (void *astparam); 216 void (*castfn) (void *astparam);
221 void (*bastfn) (void *astparam, int mode); 217 void (*bastfn) (void *astparam, int mode);
222 struct dlm_callback callbacks[DLM_CALLBACKS_SIZE]; 218 struct dlm_callback callbacks[DLM_CALLBACKS_SIZE];
223 int i, rv, resid; 219 int i, rv, resid;
224 220
225repeat: 221 memset(&callbacks, 0, sizeof(callbacks));
226 spin_lock(&ast_queue_lock);
227 list_for_each_entry(lkb, &ast_queue, lkb_astqueue) {
228 r = lkb->lkb_resource;
229 ls = r->res_ls;
230 222
231 if (dlm_locking_stopped(ls)) 223 mutex_lock(&lkb->lkb_cb_mutex);
232 continue; 224 if (!lkb->lkb_callbacks[0].seq) {
233 225 /* no callback work exists, shouldn't happen */
234 /* we remove from astqueue list and remove everything in 226 log_error(ls, "dlm_callback_work %x no work", lkb->lkb_id);
235 lkb_callbacks before releasing the spinlock so empty 227 dlm_print_lkb(lkb);
236 lkb_astqueue is always consistent with empty lkb_callbacks */ 228 dlm_dump_lkb_callbacks(lkb);
237 229 }
238 list_del_init(&lkb->lkb_astqueue);
239
240 castfn = lkb->lkb_astfn;
241 bastfn = lkb->lkb_bastfn;
242 230
243 memset(&callbacks, 0, sizeof(callbacks)); 231 for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
232 rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid);
233 if (rv < 0)
234 break;
235 }
244 236
245 for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { 237 if (resid) {
246 rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid); 238 /* cbs remain, loop should have removed all, shouldn't happen */
247 if (rv < 0) 239 log_error(ls, "dlm_callback_work %x resid %d", lkb->lkb_id,
248 break; 240 resid);
249 } 241 dlm_print_lkb(lkb);
250 spin_unlock(&ast_queue_lock); 242 dlm_dump_lkb_callbacks(lkb);
243 }
244 mutex_unlock(&lkb->lkb_cb_mutex);
251 245
252 if (resid) { 246 castfn = lkb->lkb_astfn;
253 /* shouldn't happen, for loop should have removed all */ 247 bastfn = lkb->lkb_bastfn;
254 log_error(ls, "callback resid %d lkb %x",
255 resid, lkb->lkb_id);
256 }
257 248
258 for (i = 0; i < DLM_CALLBACKS_SIZE; i++) { 249 for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
259 if (!callbacks[i].seq) 250 if (!callbacks[i].seq)
260 break; 251 break;
261 if (callbacks[i].flags & DLM_CB_SKIP) { 252 if (callbacks[i].flags & DLM_CB_SKIP) {
262 continue; 253 continue;
263 } else if (callbacks[i].flags & DLM_CB_BAST) { 254 } else if (callbacks[i].flags & DLM_CB_BAST) {
264 bastfn(lkb->lkb_astparam, callbacks[i].mode); 255 bastfn(lkb->lkb_astparam, callbacks[i].mode);
265 } else if (callbacks[i].flags & DLM_CB_CAST) { 256 } else if (callbacks[i].flags & DLM_CB_CAST) {
266 lkb->lkb_lksb->sb_status = callbacks[i].sb_status; 257 lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
267 lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags; 258 lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
268 castfn(lkb->lkb_astparam); 259 castfn(lkb->lkb_astparam);
269 }
270 } 260 }
271
272 /* removes ref for ast_queue, may cause lkb to be freed */
273 dlm_put_lkb(lkb);
274
275 cond_resched();
276 goto repeat;
277 } 261 }
278 spin_unlock(&ast_queue_lock);
279}
280
281static inline int no_asts(void)
282{
283 int ret;
284 262
285 spin_lock(&ast_queue_lock); 263 /* undo kref_get from dlm_add_callback, may cause lkb to be freed */
286 ret = list_empty(&ast_queue); 264 dlm_put_lkb(lkb);
287 spin_unlock(&ast_queue_lock);
288 return ret;
289} 265}
290 266
291static int dlm_astd(void *data) 267int dlm_callback_start(struct dlm_ls *ls)
292{ 268{
293 while (!kthread_should_stop()) { 269 ls->ls_callback_wq = alloc_workqueue("dlm_callback",
294 set_current_state(TASK_INTERRUPTIBLE); 270 WQ_UNBOUND |
295 if (!test_bit(WAKE_ASTS, &astd_wakeflags)) 271 WQ_MEM_RECLAIM |
296 schedule(); 272 WQ_NON_REENTRANT,
297 set_current_state(TASK_RUNNING); 273 0);
298 274 if (!ls->ls_callback_wq) {
299 mutex_lock(&astd_running); 275 log_print("can't start dlm_callback workqueue");
300 if (test_and_clear_bit(WAKE_ASTS, &astd_wakeflags)) 276 return -ENOMEM;
301 process_asts();
302 mutex_unlock(&astd_running);
303 } 277 }
304 return 0; 278 return 0;
305} 279}
306 280
307void dlm_astd_wake(void) 281void dlm_callback_stop(struct dlm_ls *ls)
308{ 282{
309 if (!no_asts()) { 283 if (ls->ls_callback_wq)
310 set_bit(WAKE_ASTS, &astd_wakeflags); 284 destroy_workqueue(ls->ls_callback_wq);
311 wake_up_process(astd_task);
312 }
313} 285}
314 286
315int dlm_astd_start(void) 287void dlm_callback_suspend(struct dlm_ls *ls)
316{ 288{
317 struct task_struct *p; 289 set_bit(LSFL_CB_DELAY, &ls->ls_flags);
318 int error = 0;
319
320 INIT_LIST_HEAD(&ast_queue);
321 spin_lock_init(&ast_queue_lock);
322 mutex_init(&astd_running);
323
324 p = kthread_run(dlm_astd, NULL, "dlm_astd");
325 if (IS_ERR(p))
326 error = PTR_ERR(p);
327 else
328 astd_task = p;
329 return error;
330}
331 290
332void dlm_astd_stop(void) 291 if (ls->ls_callback_wq)
333{ 292 flush_workqueue(ls->ls_callback_wq);
334 kthread_stop(astd_task);
335} 293}
336 294
337void dlm_astd_suspend(void) 295void dlm_callback_resume(struct dlm_ls *ls)
338{ 296{
339 mutex_lock(&astd_running); 297 struct dlm_lkb *lkb, *safe;
340} 298 int count = 0;
341 299
342void dlm_astd_resume(void) 300 clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
343{ 301
344 mutex_unlock(&astd_running); 302 if (!ls->ls_callback_wq)
303 return;
304
305 mutex_lock(&ls->ls_cb_mutex);
306 list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) {
307 list_del_init(&lkb->lkb_cb_list);
308 queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
309 count++;
310 }
311 mutex_unlock(&ls->ls_cb_mutex);
312
313 log_debug(ls, "dlm_callback_resume %d", count);
345} 314}
346 315
diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h
index 8aa89c9b5611..757b551c6820 100644
--- a/fs/dlm/ast.h
+++ b/fs/dlm/ast.h
@@ -18,14 +18,15 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
18 int status, uint32_t sbflags, uint64_t seq); 18 int status, uint32_t sbflags, uint64_t seq);
19int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb, 19int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
20 struct dlm_callback *cb, int *resid); 20 struct dlm_callback *cb, int *resid);
21void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status, 21void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
22 uint32_t sbflags); 22 uint32_t sbflags);
23 23
24void dlm_astd_wake(void); 24void dlm_callback_work(struct work_struct *work);
25int dlm_astd_start(void); 25int dlm_callback_start(struct dlm_ls *ls);
26void dlm_astd_stop(void); 26void dlm_callback_stop(struct dlm_ls *ls);
27void dlm_astd_suspend(void); 27void dlm_callback_suspend(struct dlm_ls *ls);
28void dlm_astd_resume(void); 28void dlm_callback_resume(struct dlm_ls *ls);
29 29
30#endif 30#endif
31 31
32
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 9b026ea8baa9..6cf72fcc0d0c 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -28,7 +28,8 @@
28 * /config/dlm/<cluster>/spaces/<space>/nodes/<node>/weight 28 * /config/dlm/<cluster>/spaces/<space>/nodes/<node>/weight
29 * /config/dlm/<cluster>/comms/<comm>/nodeid 29 * /config/dlm/<cluster>/comms/<comm>/nodeid
30 * /config/dlm/<cluster>/comms/<comm>/local 30 * /config/dlm/<cluster>/comms/<comm>/local
31 * /config/dlm/<cluster>/comms/<comm>/addr 31 * /config/dlm/<cluster>/comms/<comm>/addr (write only)
32 * /config/dlm/<cluster>/comms/<comm>/addr_list (read only)
32 * The <cluster> level is useless, but I haven't figured out how to avoid it. 33 * The <cluster> level is useless, but I haven't figured out how to avoid it.
33 */ 34 */
34 35
@@ -80,6 +81,7 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf,
80 size_t len); 81 size_t len);
81static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, 82static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf,
82 size_t len); 83 size_t len);
84static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf);
83static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf); 85static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf);
84static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, 86static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
85 size_t len); 87 size_t len);
@@ -92,7 +94,6 @@ struct dlm_cluster {
92 unsigned int cl_tcp_port; 94 unsigned int cl_tcp_port;
93 unsigned int cl_buffer_size; 95 unsigned int cl_buffer_size;
94 unsigned int cl_rsbtbl_size; 96 unsigned int cl_rsbtbl_size;
95 unsigned int cl_lkbtbl_size;
96 unsigned int cl_dirtbl_size; 97 unsigned int cl_dirtbl_size;
97 unsigned int cl_recover_timer; 98 unsigned int cl_recover_timer;
98 unsigned int cl_toss_secs; 99 unsigned int cl_toss_secs;
@@ -101,13 +102,13 @@ struct dlm_cluster {
101 unsigned int cl_protocol; 102 unsigned int cl_protocol;
102 unsigned int cl_timewarn_cs; 103 unsigned int cl_timewarn_cs;
103 unsigned int cl_waitwarn_us; 104 unsigned int cl_waitwarn_us;
105 unsigned int cl_new_rsb_count;
104}; 106};
105 107
106enum { 108enum {
107 CLUSTER_ATTR_TCP_PORT = 0, 109 CLUSTER_ATTR_TCP_PORT = 0,
108 CLUSTER_ATTR_BUFFER_SIZE, 110 CLUSTER_ATTR_BUFFER_SIZE,
109 CLUSTER_ATTR_RSBTBL_SIZE, 111 CLUSTER_ATTR_RSBTBL_SIZE,
110 CLUSTER_ATTR_LKBTBL_SIZE,
111 CLUSTER_ATTR_DIRTBL_SIZE, 112 CLUSTER_ATTR_DIRTBL_SIZE,
112 CLUSTER_ATTR_RECOVER_TIMER, 113 CLUSTER_ATTR_RECOVER_TIMER,
113 CLUSTER_ATTR_TOSS_SECS, 114 CLUSTER_ATTR_TOSS_SECS,
@@ -116,6 +117,7 @@ enum {
116 CLUSTER_ATTR_PROTOCOL, 117 CLUSTER_ATTR_PROTOCOL,
117 CLUSTER_ATTR_TIMEWARN_CS, 118 CLUSTER_ATTR_TIMEWARN_CS,
118 CLUSTER_ATTR_WAITWARN_US, 119 CLUSTER_ATTR_WAITWARN_US,
120 CLUSTER_ATTR_NEW_RSB_COUNT,
119}; 121};
120 122
121struct cluster_attribute { 123struct cluster_attribute {
@@ -160,7 +162,6 @@ __CONFIGFS_ATTR(name, 0644, name##_read, name##_write)
160CLUSTER_ATTR(tcp_port, 1); 162CLUSTER_ATTR(tcp_port, 1);
161CLUSTER_ATTR(buffer_size, 1); 163CLUSTER_ATTR(buffer_size, 1);
162CLUSTER_ATTR(rsbtbl_size, 1); 164CLUSTER_ATTR(rsbtbl_size, 1);
163CLUSTER_ATTR(lkbtbl_size, 1);
164CLUSTER_ATTR(dirtbl_size, 1); 165CLUSTER_ATTR(dirtbl_size, 1);
165CLUSTER_ATTR(recover_timer, 1); 166CLUSTER_ATTR(recover_timer, 1);
166CLUSTER_ATTR(toss_secs, 1); 167CLUSTER_ATTR(toss_secs, 1);
@@ -169,12 +170,12 @@ CLUSTER_ATTR(log_debug, 0);
169CLUSTER_ATTR(protocol, 0); 170CLUSTER_ATTR(protocol, 0);
170CLUSTER_ATTR(timewarn_cs, 1); 171CLUSTER_ATTR(timewarn_cs, 1);
171CLUSTER_ATTR(waitwarn_us, 0); 172CLUSTER_ATTR(waitwarn_us, 0);
173CLUSTER_ATTR(new_rsb_count, 0);
172 174
173static struct configfs_attribute *cluster_attrs[] = { 175static struct configfs_attribute *cluster_attrs[] = {
174 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, 176 [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
175 [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr, 177 [CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr,
176 [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr, 178 [CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr,
177 [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr,
178 [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr, 179 [CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr,
179 [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr, 180 [CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr,
180 [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr, 181 [CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
@@ -183,6 +184,7 @@ static struct configfs_attribute *cluster_attrs[] = {
183 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, 184 [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
184 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, 185 [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
185 [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, 186 [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
187 [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr,
186 NULL, 188 NULL,
187}; 189};
188 190
@@ -190,6 +192,7 @@ enum {
190 COMM_ATTR_NODEID = 0, 192 COMM_ATTR_NODEID = 0,
191 COMM_ATTR_LOCAL, 193 COMM_ATTR_LOCAL,
192 COMM_ATTR_ADDR, 194 COMM_ATTR_ADDR,
195 COMM_ATTR_ADDR_LIST,
193}; 196};
194 197
195struct comm_attribute { 198struct comm_attribute {
@@ -217,14 +220,22 @@ static struct comm_attribute comm_attr_local = {
217static struct comm_attribute comm_attr_addr = { 220static struct comm_attribute comm_attr_addr = {
218 .attr = { .ca_owner = THIS_MODULE, 221 .attr = { .ca_owner = THIS_MODULE,
219 .ca_name = "addr", 222 .ca_name = "addr",
220 .ca_mode = S_IRUGO | S_IWUSR }, 223 .ca_mode = S_IWUSR },
221 .store = comm_addr_write, 224 .store = comm_addr_write,
222}; 225};
223 226
227static struct comm_attribute comm_attr_addr_list = {
228 .attr = { .ca_owner = THIS_MODULE,
229 .ca_name = "addr_list",
230 .ca_mode = S_IRUGO },
231 .show = comm_addr_list_read,
232};
233
224static struct configfs_attribute *comm_attrs[] = { 234static struct configfs_attribute *comm_attrs[] = {
225 [COMM_ATTR_NODEID] = &comm_attr_nodeid.attr, 235 [COMM_ATTR_NODEID] = &comm_attr_nodeid.attr,
226 [COMM_ATTR_LOCAL] = &comm_attr_local.attr, 236 [COMM_ATTR_LOCAL] = &comm_attr_local.attr,
227 [COMM_ATTR_ADDR] = &comm_attr_addr.attr, 237 [COMM_ATTR_ADDR] = &comm_attr_addr.attr,
238 [COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list.attr,
228 NULL, 239 NULL,
229}; 240};
230 241
@@ -435,7 +446,6 @@ static struct config_group *make_cluster(struct config_group *g,
435 cl->cl_tcp_port = dlm_config.ci_tcp_port; 446 cl->cl_tcp_port = dlm_config.ci_tcp_port;
436 cl->cl_buffer_size = dlm_config.ci_buffer_size; 447 cl->cl_buffer_size = dlm_config.ci_buffer_size;
437 cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size; 448 cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size;
438 cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size;
439 cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size; 449 cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size;
440 cl->cl_recover_timer = dlm_config.ci_recover_timer; 450 cl->cl_recover_timer = dlm_config.ci_recover_timer;
441 cl->cl_toss_secs = dlm_config.ci_toss_secs; 451 cl->cl_toss_secs = dlm_config.ci_toss_secs;
@@ -444,6 +454,7 @@ static struct config_group *make_cluster(struct config_group *g,
444 cl->cl_protocol = dlm_config.ci_protocol; 454 cl->cl_protocol = dlm_config.ci_protocol;
445 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; 455 cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
446 cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; 456 cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
457 cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count;
447 458
448 space_list = &sps->ss_group; 459 space_list = &sps->ss_group;
449 comm_list = &cms->cs_group; 460 comm_list = &cms->cs_group;
@@ -720,6 +731,50 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
720 return len; 731 return len;
721} 732}
722 733
734static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf)
735{
736 ssize_t s;
737 ssize_t allowance;
738 int i;
739 struct sockaddr_storage *addr;
740 struct sockaddr_in *addr_in;
741 struct sockaddr_in6 *addr_in6;
742
743 /* Taken from ip6_addr_string() defined in lib/vsprintf.c */
744 char buf0[sizeof("AF_INET6 xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255\n")];
745
746
747 /* Derived from SIMPLE_ATTR_SIZE of fs/configfs/file.c */
748 allowance = 4096;
749 buf[0] = '\0';
750
751 for (i = 0; i < cm->addr_count; i++) {
752 addr = cm->addr[i];
753
754 switch(addr->ss_family) {
755 case AF_INET:
756 addr_in = (struct sockaddr_in *)addr;
757 s = sprintf(buf0, "AF_INET %pI4\n", &addr_in->sin_addr.s_addr);
758 break;
759 case AF_INET6:
760 addr_in6 = (struct sockaddr_in6 *)addr;
761 s = sprintf(buf0, "AF_INET6 %pI6\n", &addr_in6->sin6_addr);
762 break;
763 default:
764 s = sprintf(buf0, "%s\n", "<UNKNOWN>");
765 break;
766 }
767 allowance -= s;
768 if (allowance >= 0)
769 strcat(buf, buf0);
770 else {
771 allowance += s;
772 break;
773 }
774 }
775 return 4096 - allowance;
776}
777
723static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, 778static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
724 char *buf) 779 char *buf)
725{ 780{
@@ -983,7 +1038,6 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
983#define DEFAULT_TCP_PORT 21064 1038#define DEFAULT_TCP_PORT 21064
984#define DEFAULT_BUFFER_SIZE 4096 1039#define DEFAULT_BUFFER_SIZE 4096
985#define DEFAULT_RSBTBL_SIZE 1024 1040#define DEFAULT_RSBTBL_SIZE 1024
986#define DEFAULT_LKBTBL_SIZE 1024
987#define DEFAULT_DIRTBL_SIZE 1024 1041#define DEFAULT_DIRTBL_SIZE 1024
988#define DEFAULT_RECOVER_TIMER 5 1042#define DEFAULT_RECOVER_TIMER 5
989#define DEFAULT_TOSS_SECS 10 1043#define DEFAULT_TOSS_SECS 10
@@ -992,12 +1046,12 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
992#define DEFAULT_PROTOCOL 0 1046#define DEFAULT_PROTOCOL 0
993#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ 1047#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
994#define DEFAULT_WAITWARN_US 0 1048#define DEFAULT_WAITWARN_US 0
1049#define DEFAULT_NEW_RSB_COUNT 128
995 1050
996struct dlm_config_info dlm_config = { 1051struct dlm_config_info dlm_config = {
997 .ci_tcp_port = DEFAULT_TCP_PORT, 1052 .ci_tcp_port = DEFAULT_TCP_PORT,
998 .ci_buffer_size = DEFAULT_BUFFER_SIZE, 1053 .ci_buffer_size = DEFAULT_BUFFER_SIZE,
999 .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE, 1054 .ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
1000 .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE,
1001 .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE, 1055 .ci_dirtbl_size = DEFAULT_DIRTBL_SIZE,
1002 .ci_recover_timer = DEFAULT_RECOVER_TIMER, 1056 .ci_recover_timer = DEFAULT_RECOVER_TIMER,
1003 .ci_toss_secs = DEFAULT_TOSS_SECS, 1057 .ci_toss_secs = DEFAULT_TOSS_SECS,
@@ -1005,6 +1059,7 @@ struct dlm_config_info dlm_config = {
1005 .ci_log_debug = DEFAULT_LOG_DEBUG, 1059 .ci_log_debug = DEFAULT_LOG_DEBUG,
1006 .ci_protocol = DEFAULT_PROTOCOL, 1060 .ci_protocol = DEFAULT_PROTOCOL,
1007 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, 1061 .ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
1008 .ci_waitwarn_us = DEFAULT_WAITWARN_US 1062 .ci_waitwarn_us = DEFAULT_WAITWARN_US,
1063 .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT
1009}; 1064};
1010 1065
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index dd0ce24d5a80..3099d0dd26c0 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -20,7 +20,6 @@ struct dlm_config_info {
20 int ci_tcp_port; 20 int ci_tcp_port;
21 int ci_buffer_size; 21 int ci_buffer_size;
22 int ci_rsbtbl_size; 22 int ci_rsbtbl_size;
23 int ci_lkbtbl_size;
24 int ci_dirtbl_size; 23 int ci_dirtbl_size;
25 int ci_recover_timer; 24 int ci_recover_timer;
26 int ci_toss_secs; 25 int ci_toss_secs;
@@ -29,6 +28,7 @@ struct dlm_config_info {
29 int ci_protocol; 28 int ci_protocol;
30 int ci_timewarn_cs; 29 int ci_timewarn_cs;
31 int ci_waitwarn_us; 30 int ci_waitwarn_us;
31 int ci_new_rsb_count;
32}; 32};
33 33
34extern struct dlm_config_info dlm_config; 34extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 0262451eb9c6..fe2860c02449 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -37,6 +37,7 @@
37#include <linux/jhash.h> 37#include <linux/jhash.h>
38#include <linux/miscdevice.h> 38#include <linux/miscdevice.h>
39#include <linux/mutex.h> 39#include <linux/mutex.h>
40#include <linux/idr.h>
40#include <asm/uaccess.h> 41#include <asm/uaccess.h>
41 42
42#include <linux/dlm.h> 43#include <linux/dlm.h>
@@ -52,7 +53,6 @@ struct dlm_ls;
52struct dlm_lkb; 53struct dlm_lkb;
53struct dlm_rsb; 54struct dlm_rsb;
54struct dlm_member; 55struct dlm_member;
55struct dlm_lkbtable;
56struct dlm_rsbtable; 56struct dlm_rsbtable;
57struct dlm_dirtable; 57struct dlm_dirtable;
58struct dlm_direntry; 58struct dlm_direntry;
@@ -108,11 +108,6 @@ struct dlm_rsbtable {
108 spinlock_t lock; 108 spinlock_t lock;
109}; 109};
110 110
111struct dlm_lkbtable {
112 struct list_head list;
113 rwlock_t lock;
114 uint16_t counter;
115};
116 111
117/* 112/*
118 * Lockspace member (per node in a ls) 113 * Lockspace member (per node in a ls)
@@ -248,17 +243,18 @@ struct dlm_lkb {
248 int8_t lkb_wait_count; 243 int8_t lkb_wait_count;
249 int lkb_wait_nodeid; /* for debugging */ 244 int lkb_wait_nodeid; /* for debugging */
250 245
251 struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
252 struct list_head lkb_statequeue; /* rsb g/c/w list */ 246 struct list_head lkb_statequeue; /* rsb g/c/w list */
253 struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */ 247 struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */
254 struct list_head lkb_wait_reply; /* waiting for remote reply */ 248 struct list_head lkb_wait_reply; /* waiting for remote reply */
255 struct list_head lkb_astqueue; /* need ast to be sent */
256 struct list_head lkb_ownqueue; /* list of locks for a process */ 249 struct list_head lkb_ownqueue; /* list of locks for a process */
257 struct list_head lkb_time_list; 250 struct list_head lkb_time_list;
258 ktime_t lkb_timestamp; 251 ktime_t lkb_timestamp;
259 ktime_t lkb_wait_time; 252 ktime_t lkb_wait_time;
260 unsigned long lkb_timeout_cs; 253 unsigned long lkb_timeout_cs;
261 254
255 struct mutex lkb_cb_mutex;
256 struct work_struct lkb_cb_work;
257 struct list_head lkb_cb_list; /* for ls_cb_delay or proc->asts */
262 struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE]; 258 struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE];
263 struct dlm_callback lkb_last_cast; 259 struct dlm_callback lkb_last_cast;
264 struct dlm_callback lkb_last_bast; 260 struct dlm_callback lkb_last_bast;
@@ -299,7 +295,7 @@ struct dlm_rsb {
299 int res_recover_locks_count; 295 int res_recover_locks_count;
300 296
301 char *res_lvbptr; 297 char *res_lvbptr;
302 char res_name[1]; 298 char res_name[DLM_RESNAME_MAXLEN+1];
303}; 299};
304 300
305/* find_rsb() flags */ 301/* find_rsb() flags */
@@ -465,12 +461,12 @@ struct dlm_ls {
465 unsigned long ls_scan_time; 461 unsigned long ls_scan_time;
466 struct kobject ls_kobj; 462 struct kobject ls_kobj;
467 463
464 struct idr ls_lkbidr;
465 spinlock_t ls_lkbidr_spin;
466
468 struct dlm_rsbtable *ls_rsbtbl; 467 struct dlm_rsbtable *ls_rsbtbl;
469 uint32_t ls_rsbtbl_size; 468 uint32_t ls_rsbtbl_size;
470 469
471 struct dlm_lkbtable *ls_lkbtbl;
472 uint32_t ls_lkbtbl_size;
473
474 struct dlm_dirtable *ls_dirtbl; 470 struct dlm_dirtable *ls_dirtbl;
475 uint32_t ls_dirtbl_size; 471 uint32_t ls_dirtbl_size;
476 472
@@ -483,6 +479,10 @@ struct dlm_ls {
483 struct mutex ls_timeout_mutex; 479 struct mutex ls_timeout_mutex;
484 struct list_head ls_timeout; 480 struct list_head ls_timeout;
485 481
482 spinlock_t ls_new_rsb_spin;
483 int ls_new_rsb_count;
484 struct list_head ls_new_rsb; /* new rsb structs */
485
486 struct list_head ls_nodes; /* current nodes in ls */ 486 struct list_head ls_nodes; /* current nodes in ls */
487 struct list_head ls_nodes_gone; /* dead node list, recovery */ 487 struct list_head ls_nodes_gone; /* dead node list, recovery */
488 int ls_num_nodes; /* number of nodes in ls */ 488 int ls_num_nodes; /* number of nodes in ls */
@@ -506,8 +506,12 @@ struct dlm_ls {
506 506
507 struct miscdevice ls_device; 507 struct miscdevice ls_device;
508 508
509 struct workqueue_struct *ls_callback_wq;
510
509 /* recovery related */ 511 /* recovery related */
510 512
513 struct mutex ls_cb_mutex;
514 struct list_head ls_cb_delay; /* save for queue_work later */
511 struct timer_list ls_timer; 515 struct timer_list ls_timer;
512 struct task_struct *ls_recoverd_task; 516 struct task_struct *ls_recoverd_task;
513 struct mutex ls_recoverd_active; 517 struct mutex ls_recoverd_active;
@@ -544,6 +548,7 @@ struct dlm_ls {
544#define LSFL_RCOM_WAIT 4 548#define LSFL_RCOM_WAIT 4
545#define LSFL_UEVENT_WAIT 5 549#define LSFL_UEVENT_WAIT 5
546#define LSFL_TIMEWARN 6 550#define LSFL_TIMEWARN 6
551#define LSFL_CB_DELAY 7
547 552
548/* much of this is just saving user space pointers associated with the 553/* much of this is just saving user space pointers associated with the
549 lock that we pass back to the user lib with an ast */ 554 lock that we pass back to the user lib with an ast */
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index f71d0b5abd95..83b5e32514e1 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -305,7 +305,7 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
305 rv = -EDEADLK; 305 rv = -EDEADLK;
306 } 306 }
307 307
308 dlm_add_ast(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags); 308 dlm_add_cb(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags);
309} 309}
310 310
311static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb) 311static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -319,7 +319,7 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
319 if (is_master_copy(lkb)) { 319 if (is_master_copy(lkb)) {
320 send_bast(r, lkb, rqmode); 320 send_bast(r, lkb, rqmode);
321 } else { 321 } else {
322 dlm_add_ast(lkb, DLM_CB_BAST, rqmode, 0, 0); 322 dlm_add_cb(lkb, DLM_CB_BAST, rqmode, 0, 0);
323 } 323 }
324} 324}
325 325
@@ -327,19 +327,68 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
327 * Basic operations on rsb's and lkb's 327 * Basic operations on rsb's and lkb's
328 */ 328 */
329 329
330static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len) 330static int pre_rsb_struct(struct dlm_ls *ls)
331{
332 struct dlm_rsb *r1, *r2;
333 int count = 0;
334
335 spin_lock(&ls->ls_new_rsb_spin);
336 if (ls->ls_new_rsb_count > dlm_config.ci_new_rsb_count / 2) {
337 spin_unlock(&ls->ls_new_rsb_spin);
338 return 0;
339 }
340 spin_unlock(&ls->ls_new_rsb_spin);
341
342 r1 = dlm_allocate_rsb(ls);
343 r2 = dlm_allocate_rsb(ls);
344
345 spin_lock(&ls->ls_new_rsb_spin);
346 if (r1) {
347 list_add(&r1->res_hashchain, &ls->ls_new_rsb);
348 ls->ls_new_rsb_count++;
349 }
350 if (r2) {
351 list_add(&r2->res_hashchain, &ls->ls_new_rsb);
352 ls->ls_new_rsb_count++;
353 }
354 count = ls->ls_new_rsb_count;
355 spin_unlock(&ls->ls_new_rsb_spin);
356
357 if (!count)
358 return -ENOMEM;
359 return 0;
360}
361
362/* If ls->ls_new_rsb is empty, return -EAGAIN, so the caller can
363 unlock any spinlocks, go back and call pre_rsb_struct again.
364 Otherwise, take an rsb off the list and return it. */
365
366static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
367 struct dlm_rsb **r_ret)
331{ 368{
332 struct dlm_rsb *r; 369 struct dlm_rsb *r;
370 int count;
333 371
334 r = dlm_allocate_rsb(ls, len); 372 spin_lock(&ls->ls_new_rsb_spin);
335 if (!r) 373 if (list_empty(&ls->ls_new_rsb)) {
336 return NULL; 374 count = ls->ls_new_rsb_count;
375 spin_unlock(&ls->ls_new_rsb_spin);
376 log_debug(ls, "find_rsb retry %d %d %s",
377 count, dlm_config.ci_new_rsb_count, name);
378 return -EAGAIN;
379 }
380
381 r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain);
382 list_del(&r->res_hashchain);
383 ls->ls_new_rsb_count--;
384 spin_unlock(&ls->ls_new_rsb_spin);
337 385
338 r->res_ls = ls; 386 r->res_ls = ls;
339 r->res_length = len; 387 r->res_length = len;
340 memcpy(r->res_name, name, len); 388 memcpy(r->res_name, name, len);
341 mutex_init(&r->res_mutex); 389 mutex_init(&r->res_mutex);
342 390
391 INIT_LIST_HEAD(&r->res_hashchain);
343 INIT_LIST_HEAD(&r->res_lookup); 392 INIT_LIST_HEAD(&r->res_lookup);
344 INIT_LIST_HEAD(&r->res_grantqueue); 393 INIT_LIST_HEAD(&r->res_grantqueue);
345 INIT_LIST_HEAD(&r->res_convertqueue); 394 INIT_LIST_HEAD(&r->res_convertqueue);
@@ -347,7 +396,8 @@ static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len)
347 INIT_LIST_HEAD(&r->res_root_list); 396 INIT_LIST_HEAD(&r->res_root_list);
348 INIT_LIST_HEAD(&r->res_recover_list); 397 INIT_LIST_HEAD(&r->res_recover_list);
349 398
350 return r; 399 *r_ret = r;
400 return 0;
351} 401}
352 402
353static int search_rsb_list(struct list_head *head, char *name, int len, 403static int search_rsb_list(struct list_head *head, char *name, int len,
@@ -405,16 +455,6 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b,
405 return error; 455 return error;
406} 456}
407 457
408static int search_rsb(struct dlm_ls *ls, char *name, int len, int b,
409 unsigned int flags, struct dlm_rsb **r_ret)
410{
411 int error;
412 spin_lock(&ls->ls_rsbtbl[b].lock);
413 error = _search_rsb(ls, name, len, b, flags, r_ret);
414 spin_unlock(&ls->ls_rsbtbl[b].lock);
415 return error;
416}
417
418/* 458/*
419 * Find rsb in rsbtbl and potentially create/add one 459 * Find rsb in rsbtbl and potentially create/add one
420 * 460 *
@@ -432,35 +472,48 @@ static int search_rsb(struct dlm_ls *ls, char *name, int len, int b,
432static int find_rsb(struct dlm_ls *ls, char *name, int namelen, 472static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
433 unsigned int flags, struct dlm_rsb **r_ret) 473 unsigned int flags, struct dlm_rsb **r_ret)
434{ 474{
435 struct dlm_rsb *r = NULL, *tmp; 475 struct dlm_rsb *r = NULL;
436 uint32_t hash, bucket; 476 uint32_t hash, bucket;
437 int error = -EINVAL; 477 int error;
438 478
439 if (namelen > DLM_RESNAME_MAXLEN) 479 if (namelen > DLM_RESNAME_MAXLEN) {
480 error = -EINVAL;
440 goto out; 481 goto out;
482 }
441 483
442 if (dlm_no_directory(ls)) 484 if (dlm_no_directory(ls))
443 flags |= R_CREATE; 485 flags |= R_CREATE;
444 486
445 error = 0;
446 hash = jhash(name, namelen, 0); 487 hash = jhash(name, namelen, 0);
447 bucket = hash & (ls->ls_rsbtbl_size - 1); 488 bucket = hash & (ls->ls_rsbtbl_size - 1);
448 489
449 error = search_rsb(ls, name, namelen, bucket, flags, &r); 490 retry:
491 if (flags & R_CREATE) {
492 error = pre_rsb_struct(ls);
493 if (error < 0)
494 goto out;
495 }
496
497 spin_lock(&ls->ls_rsbtbl[bucket].lock);
498
499 error = _search_rsb(ls, name, namelen, bucket, flags, &r);
450 if (!error) 500 if (!error)
451 goto out; 501 goto out_unlock;
452 502
453 if (error == -EBADR && !(flags & R_CREATE)) 503 if (error == -EBADR && !(flags & R_CREATE))
454 goto out; 504 goto out_unlock;
455 505
456 /* the rsb was found but wasn't a master copy */ 506 /* the rsb was found but wasn't a master copy */
457 if (error == -ENOTBLK) 507 if (error == -ENOTBLK)
458 goto out; 508 goto out_unlock;
459 509
460 error = -ENOMEM; 510 error = get_rsb_struct(ls, name, namelen, &r);
461 r = create_rsb(ls, name, namelen); 511 if (error == -EAGAIN) {
462 if (!r) 512 spin_unlock(&ls->ls_rsbtbl[bucket].lock);
463 goto out; 513 goto retry;
514 }
515 if (error)
516 goto out_unlock;
464 517
465 r->res_hash = hash; 518 r->res_hash = hash;
466 r->res_bucket = bucket; 519 r->res_bucket = bucket;
@@ -474,18 +527,10 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
474 nodeid = 0; 527 nodeid = 0;
475 r->res_nodeid = nodeid; 528 r->res_nodeid = nodeid;
476 } 529 }
477
478 spin_lock(&ls->ls_rsbtbl[bucket].lock);
479 error = _search_rsb(ls, name, namelen, bucket, 0, &tmp);
480 if (!error) {
481 spin_unlock(&ls->ls_rsbtbl[bucket].lock);
482 dlm_free_rsb(r);
483 r = tmp;
484 goto out;
485 }
486 list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list); 530 list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list);
487 spin_unlock(&ls->ls_rsbtbl[bucket].lock);
488 error = 0; 531 error = 0;
532 out_unlock:
533 spin_unlock(&ls->ls_rsbtbl[bucket].lock);
489 out: 534 out:
490 *r_ret = r; 535 *r_ret = r;
491 return error; 536 return error;
@@ -580,9 +625,8 @@ static void detach_lkb(struct dlm_lkb *lkb)
580 625
581static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) 626static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
582{ 627{
583 struct dlm_lkb *lkb, *tmp; 628 struct dlm_lkb *lkb;
584 uint32_t lkid = 0; 629 int rv, id;
585 uint16_t bucket;
586 630
587 lkb = dlm_allocate_lkb(ls); 631 lkb = dlm_allocate_lkb(ls);
588 if (!lkb) 632 if (!lkb)
@@ -594,60 +638,42 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
594 INIT_LIST_HEAD(&lkb->lkb_ownqueue); 638 INIT_LIST_HEAD(&lkb->lkb_ownqueue);
595 INIT_LIST_HEAD(&lkb->lkb_rsb_lookup); 639 INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
596 INIT_LIST_HEAD(&lkb->lkb_time_list); 640 INIT_LIST_HEAD(&lkb->lkb_time_list);
597 INIT_LIST_HEAD(&lkb->lkb_astqueue); 641 INIT_LIST_HEAD(&lkb->lkb_cb_list);
642 mutex_init(&lkb->lkb_cb_mutex);
643 INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
598 644
599 get_random_bytes(&bucket, sizeof(bucket)); 645 retry:
600 bucket &= (ls->ls_lkbtbl_size - 1); 646 rv = idr_pre_get(&ls->ls_lkbidr, GFP_NOFS);
601 647 if (!rv)
602 write_lock(&ls->ls_lkbtbl[bucket].lock); 648 return -ENOMEM;
603 649
604 /* counter can roll over so we must verify lkid is not in use */ 650 spin_lock(&ls->ls_lkbidr_spin);
651 rv = idr_get_new_above(&ls->ls_lkbidr, lkb, 1, &id);
652 if (!rv)
653 lkb->lkb_id = id;
654 spin_unlock(&ls->ls_lkbidr_spin);
605 655
606 while (lkid == 0) { 656 if (rv == -EAGAIN)
607 lkid = (bucket << 16) | ls->ls_lkbtbl[bucket].counter++; 657 goto retry;
608 658
609 list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list, 659 if (rv < 0) {
610 lkb_idtbl_list) { 660 log_error(ls, "create_lkb idr error %d", rv);
611 if (tmp->lkb_id != lkid) 661 return rv;
612 continue;
613 lkid = 0;
614 break;
615 }
616 } 662 }
617 663
618 lkb->lkb_id = lkid;
619 list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list);
620 write_unlock(&ls->ls_lkbtbl[bucket].lock);
621
622 *lkb_ret = lkb; 664 *lkb_ret = lkb;
623 return 0; 665 return 0;
624} 666}
625 667
626static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid)
627{
628 struct dlm_lkb *lkb;
629 uint16_t bucket = (lkid >> 16);
630
631 list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) {
632 if (lkb->lkb_id == lkid)
633 return lkb;
634 }
635 return NULL;
636}
637
638static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret) 668static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret)
639{ 669{
640 struct dlm_lkb *lkb; 670 struct dlm_lkb *lkb;
641 uint16_t bucket = (lkid >> 16);
642
643 if (bucket >= ls->ls_lkbtbl_size)
644 return -EBADSLT;
645 671
646 read_lock(&ls->ls_lkbtbl[bucket].lock); 672 spin_lock(&ls->ls_lkbidr_spin);
647 lkb = __find_lkb(ls, lkid); 673 lkb = idr_find(&ls->ls_lkbidr, lkid);
648 if (lkb) 674 if (lkb)
649 kref_get(&lkb->lkb_ref); 675 kref_get(&lkb->lkb_ref);
650 read_unlock(&ls->ls_lkbtbl[bucket].lock); 676 spin_unlock(&ls->ls_lkbidr_spin);
651 677
652 *lkb_ret = lkb; 678 *lkb_ret = lkb;
653 return lkb ? 0 : -ENOENT; 679 return lkb ? 0 : -ENOENT;
@@ -668,12 +694,12 @@ static void kill_lkb(struct kref *kref)
668 694
669static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb) 695static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
670{ 696{
671 uint16_t bucket = (lkb->lkb_id >> 16); 697 uint32_t lkid = lkb->lkb_id;
672 698
673 write_lock(&ls->ls_lkbtbl[bucket].lock); 699 spin_lock(&ls->ls_lkbidr_spin);
674 if (kref_put(&lkb->lkb_ref, kill_lkb)) { 700 if (kref_put(&lkb->lkb_ref, kill_lkb)) {
675 list_del(&lkb->lkb_idtbl_list); 701 idr_remove(&ls->ls_lkbidr, lkid);
676 write_unlock(&ls->ls_lkbtbl[bucket].lock); 702 spin_unlock(&ls->ls_lkbidr_spin);
677 703
678 detach_lkb(lkb); 704 detach_lkb(lkb);
679 705
@@ -683,7 +709,7 @@ static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
683 dlm_free_lkb(lkb); 709 dlm_free_lkb(lkb);
684 return 1; 710 return 1;
685 } else { 711 } else {
686 write_unlock(&ls->ls_lkbtbl[bucket].lock); 712 spin_unlock(&ls->ls_lkbidr_spin);
687 return 0; 713 return 0;
688 } 714 }
689} 715}
@@ -849,9 +875,7 @@ void dlm_scan_waiters(struct dlm_ls *ls)
849 875
850 if (!num_nodes) { 876 if (!num_nodes) {
851 num_nodes = ls->ls_num_nodes; 877 num_nodes = ls->ls_num_nodes;
852 warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int)); 878 warned = kzalloc(num_nodes * sizeof(int), GFP_KERNEL);
853 if (warned)
854 memset(warned, 0, num_nodes * sizeof(int));
855 } 879 }
856 if (!warned) 880 if (!warned)
857 continue; 881 continue;
@@ -863,9 +887,7 @@ void dlm_scan_waiters(struct dlm_ls *ls)
863 dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid); 887 dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
864 } 888 }
865 mutex_unlock(&ls->ls_waiters_mutex); 889 mutex_unlock(&ls->ls_waiters_mutex);
866 890 kfree(warned);
867 if (warned)
868 kfree(warned);
869 891
870 if (debug_expired) 892 if (debug_expired)
871 log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us", 893 log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
@@ -2401,9 +2423,6 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
2401 2423
2402 if (deadlk) { 2424 if (deadlk) {
2403 /* it's left on the granted queue */ 2425 /* it's left on the granted queue */
2404 log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
2405 lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
2406 lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
2407 revert_lock(r, lkb); 2426 revert_lock(r, lkb);
2408 queue_cast(r, lkb, -EDEADLK); 2427 queue_cast(r, lkb, -EDEADLK);
2409 error = -EDEADLK; 2428 error = -EDEADLK;
@@ -3993,8 +4012,6 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms)
3993 default: 4012 default:
3994 log_error(ls, "unknown message type %d", ms->m_type); 4013 log_error(ls, "unknown message type %d", ms->m_type);
3995 } 4014 }
3996
3997 dlm_astd_wake();
3998} 4015}
3999 4016
4000/* If the lockspace is in recovery mode (locking stopped), then normal 4017/* If the lockspace is in recovery mode (locking stopped), then normal
@@ -4133,7 +4150,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
4133 struct dlm_message *ms_stub; 4150 struct dlm_message *ms_stub;
4134 int wait_type, stub_unlock_result, stub_cancel_result; 4151 int wait_type, stub_unlock_result, stub_cancel_result;
4135 4152
4136 ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message)); 4153 ms_stub = kmalloc(sizeof(struct dlm_message), GFP_KERNEL);
4137 if (!ms_stub) { 4154 if (!ms_stub) {
4138 log_error(ls, "dlm_recover_waiters_pre no mem"); 4155 log_error(ls, "dlm_recover_waiters_pre no mem");
4139 return; 4156 return;
@@ -4809,7 +4826,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
4809 goto out_put; 4826 goto out_put;
4810 4827
4811 spin_lock(&ua->proc->locks_spin); 4828 spin_lock(&ua->proc->locks_spin);
4812 /* dlm_user_add_ast() may have already taken lkb off the proc list */ 4829 /* dlm_user_add_cb() may have already taken lkb off the proc list */
4813 if (!list_empty(&lkb->lkb_ownqueue)) 4830 if (!list_empty(&lkb->lkb_ownqueue))
4814 list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking); 4831 list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
4815 spin_unlock(&ua->proc->locks_spin); 4832 spin_unlock(&ua->proc->locks_spin);
@@ -4946,7 +4963,7 @@ static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
4946 4963
4947/* We have to release clear_proc_locks mutex before calling unlock_proc_lock() 4964/* We have to release clear_proc_locks mutex before calling unlock_proc_lock()
4948 (which does lock_rsb) due to deadlock with receiving a message that does 4965 (which does lock_rsb) due to deadlock with receiving a message that does
4949 lock_rsb followed by dlm_user_add_ast() */ 4966 lock_rsb followed by dlm_user_add_cb() */
4950 4967
4951static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls, 4968static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
4952 struct dlm_user_proc *proc) 4969 struct dlm_user_proc *proc)
@@ -4969,7 +4986,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
4969 return lkb; 4986 return lkb;
4970} 4987}
4971 4988
4972/* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which 4989/* The ls_clear_proc_locks mutex protects against dlm_user_add_cb() which
4973 1) references lkb->ua which we free here and 2) adds lkbs to proc->asts, 4990 1) references lkb->ua which we free here and 2) adds lkbs to proc->asts,
4974 which we clear here. */ 4991 which we clear here. */
4975 4992
@@ -5011,10 +5028,10 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
5011 dlm_put_lkb(lkb); 5028 dlm_put_lkb(lkb);
5012 } 5029 }
5013 5030
5014 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { 5031 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) {
5015 memset(&lkb->lkb_callbacks, 0, 5032 memset(&lkb->lkb_callbacks, 0,
5016 sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE); 5033 sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
5017 list_del_init(&lkb->lkb_astqueue); 5034 list_del_init(&lkb->lkb_cb_list);
5018 dlm_put_lkb(lkb); 5035 dlm_put_lkb(lkb);
5019 } 5036 }
5020 5037
@@ -5053,10 +5070,10 @@ static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
5053 spin_unlock(&proc->locks_spin); 5070 spin_unlock(&proc->locks_spin);
5054 5071
5055 spin_lock(&proc->asts_spin); 5072 spin_lock(&proc->asts_spin);
5056 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) { 5073 list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) {
5057 memset(&lkb->lkb_callbacks, 0, 5074 memset(&lkb->lkb_callbacks, 0,
5058 sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE); 5075 sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
5059 list_del_init(&lkb->lkb_astqueue); 5076 list_del_init(&lkb->lkb_cb_list);
5060 dlm_put_lkb(lkb); 5077 dlm_put_lkb(lkb);
5061 } 5078 }
5062 spin_unlock(&proc->asts_spin); 5079 spin_unlock(&proc->asts_spin);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 14cbf4099753..a1d8f1af144b 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -15,7 +15,6 @@
15#include "lockspace.h" 15#include "lockspace.h"
16#include "member.h" 16#include "member.h"
17#include "recoverd.h" 17#include "recoverd.h"
18#include "ast.h"
19#include "dir.h" 18#include "dir.h"
20#include "lowcomms.h" 19#include "lowcomms.h"
21#include "config.h" 20#include "config.h"
@@ -24,6 +23,7 @@
24#include "recover.h" 23#include "recover.h"
25#include "requestqueue.h" 24#include "requestqueue.h"
26#include "user.h" 25#include "user.h"
26#include "ast.h"
27 27
28static int ls_count; 28static int ls_count;
29static struct mutex ls_lock; 29static struct mutex ls_lock;
@@ -359,17 +359,10 @@ static int threads_start(void)
359{ 359{
360 int error; 360 int error;
361 361
362 /* Thread which process lock requests for all lockspace's */
363 error = dlm_astd_start();
364 if (error) {
365 log_print("cannot start dlm_astd thread %d", error);
366 goto fail;
367 }
368
369 error = dlm_scand_start(); 362 error = dlm_scand_start();
370 if (error) { 363 if (error) {
371 log_print("cannot start dlm_scand thread %d", error); 364 log_print("cannot start dlm_scand thread %d", error);
372 goto astd_fail; 365 goto fail;
373 } 366 }
374 367
375 /* Thread for sending/receiving messages for all lockspace's */ 368 /* Thread for sending/receiving messages for all lockspace's */
@@ -383,8 +376,6 @@ static int threads_start(void)
383 376
384 scand_fail: 377 scand_fail:
385 dlm_scand_stop(); 378 dlm_scand_stop();
386 astd_fail:
387 dlm_astd_stop();
388 fail: 379 fail:
389 return error; 380 return error;
390} 381}
@@ -393,7 +384,6 @@ static void threads_stop(void)
393{ 384{
394 dlm_scand_stop(); 385 dlm_scand_stop();
395 dlm_lowcomms_stop(); 386 dlm_lowcomms_stop();
396 dlm_astd_stop();
397} 387}
398 388
399static int new_lockspace(const char *name, int namelen, void **lockspace, 389static int new_lockspace(const char *name, int namelen, void **lockspace,
@@ -463,7 +453,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
463 size = dlm_config.ci_rsbtbl_size; 453 size = dlm_config.ci_rsbtbl_size;
464 ls->ls_rsbtbl_size = size; 454 ls->ls_rsbtbl_size = size;
465 455
466 ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_NOFS); 456 ls->ls_rsbtbl = vmalloc(sizeof(struct dlm_rsbtable) * size);
467 if (!ls->ls_rsbtbl) 457 if (!ls->ls_rsbtbl)
468 goto out_lsfree; 458 goto out_lsfree;
469 for (i = 0; i < size; i++) { 459 for (i = 0; i < size; i++) {
@@ -472,22 +462,13 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
472 spin_lock_init(&ls->ls_rsbtbl[i].lock); 462 spin_lock_init(&ls->ls_rsbtbl[i].lock);
473 } 463 }
474 464
475 size = dlm_config.ci_lkbtbl_size; 465 idr_init(&ls->ls_lkbidr);
476 ls->ls_lkbtbl_size = size; 466 spin_lock_init(&ls->ls_lkbidr_spin);
477
478 ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_NOFS);
479 if (!ls->ls_lkbtbl)
480 goto out_rsbfree;
481 for (i = 0; i < size; i++) {
482 INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list);
483 rwlock_init(&ls->ls_lkbtbl[i].lock);
484 ls->ls_lkbtbl[i].counter = 1;
485 }
486 467
487 size = dlm_config.ci_dirtbl_size; 468 size = dlm_config.ci_dirtbl_size;
488 ls->ls_dirtbl_size = size; 469 ls->ls_dirtbl_size = size;
489 470
490 ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_NOFS); 471 ls->ls_dirtbl = vmalloc(sizeof(struct dlm_dirtable) * size);
491 if (!ls->ls_dirtbl) 472 if (!ls->ls_dirtbl)
492 goto out_lkbfree; 473 goto out_lkbfree;
493 for (i = 0; i < size; i++) { 474 for (i = 0; i < size; i++) {
@@ -502,6 +483,9 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
502 INIT_LIST_HEAD(&ls->ls_timeout); 483 INIT_LIST_HEAD(&ls->ls_timeout);
503 mutex_init(&ls->ls_timeout_mutex); 484 mutex_init(&ls->ls_timeout_mutex);
504 485
486 INIT_LIST_HEAD(&ls->ls_new_rsb);
487 spin_lock_init(&ls->ls_new_rsb_spin);
488
505 INIT_LIST_HEAD(&ls->ls_nodes); 489 INIT_LIST_HEAD(&ls->ls_nodes);
506 INIT_LIST_HEAD(&ls->ls_nodes_gone); 490 INIT_LIST_HEAD(&ls->ls_nodes_gone);
507 ls->ls_num_nodes = 0; 491 ls->ls_num_nodes = 0;
@@ -520,6 +504,9 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
520 init_completion(&ls->ls_members_done); 504 init_completion(&ls->ls_members_done);
521 ls->ls_members_result = -1; 505 ls->ls_members_result = -1;
522 506
507 mutex_init(&ls->ls_cb_mutex);
508 INIT_LIST_HEAD(&ls->ls_cb_delay);
509
523 ls->ls_recoverd_task = NULL; 510 ls->ls_recoverd_task = NULL;
524 mutex_init(&ls->ls_recoverd_active); 511 mutex_init(&ls->ls_recoverd_active);
525 spin_lock_init(&ls->ls_recover_lock); 512 spin_lock_init(&ls->ls_recover_lock);
@@ -553,18 +540,26 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
553 list_add(&ls->ls_list, &lslist); 540 list_add(&ls->ls_list, &lslist);
554 spin_unlock(&lslist_lock); 541 spin_unlock(&lslist_lock);
555 542
543 if (flags & DLM_LSFL_FS) {
544 error = dlm_callback_start(ls);
545 if (error) {
546 log_error(ls, "can't start dlm_callback %d", error);
547 goto out_delist;
548 }
549 }
550
556 /* needs to find ls in lslist */ 551 /* needs to find ls in lslist */
557 error = dlm_recoverd_start(ls); 552 error = dlm_recoverd_start(ls);
558 if (error) { 553 if (error) {
559 log_error(ls, "can't start dlm_recoverd %d", error); 554 log_error(ls, "can't start dlm_recoverd %d", error);
560 goto out_delist; 555 goto out_callback;
561 } 556 }
562 557
563 ls->ls_kobj.kset = dlm_kset; 558 ls->ls_kobj.kset = dlm_kset;
564 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL, 559 error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
565 "%s", ls->ls_name); 560 "%s", ls->ls_name);
566 if (error) 561 if (error)
567 goto out_stop; 562 goto out_recoverd;
568 kobject_uevent(&ls->ls_kobj, KOBJ_ADD); 563 kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
569 564
570 /* let kobject handle freeing of ls if there's an error */ 565 /* let kobject handle freeing of ls if there's an error */
@@ -578,7 +573,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
578 573
579 error = do_uevent(ls, 1); 574 error = do_uevent(ls, 1);
580 if (error) 575 if (error)
581 goto out_stop; 576 goto out_recoverd;
582 577
583 wait_for_completion(&ls->ls_members_done); 578 wait_for_completion(&ls->ls_members_done);
584 error = ls->ls_members_result; 579 error = ls->ls_members_result;
@@ -595,19 +590,20 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
595 do_uevent(ls, 0); 590 do_uevent(ls, 0);
596 dlm_clear_members(ls); 591 dlm_clear_members(ls);
597 kfree(ls->ls_node_array); 592 kfree(ls->ls_node_array);
598 out_stop: 593 out_recoverd:
599 dlm_recoverd_stop(ls); 594 dlm_recoverd_stop(ls);
595 out_callback:
596 dlm_callback_stop(ls);
600 out_delist: 597 out_delist:
601 spin_lock(&lslist_lock); 598 spin_lock(&lslist_lock);
602 list_del(&ls->ls_list); 599 list_del(&ls->ls_list);
603 spin_unlock(&lslist_lock); 600 spin_unlock(&lslist_lock);
604 kfree(ls->ls_recover_buf); 601 kfree(ls->ls_recover_buf);
605 out_dirfree: 602 out_dirfree:
606 kfree(ls->ls_dirtbl); 603 vfree(ls->ls_dirtbl);
607 out_lkbfree: 604 out_lkbfree:
608 kfree(ls->ls_lkbtbl); 605 idr_destroy(&ls->ls_lkbidr);
609 out_rsbfree: 606 vfree(ls->ls_rsbtbl);
610 kfree(ls->ls_rsbtbl);
611 out_lsfree: 607 out_lsfree:
612 if (do_unreg) 608 if (do_unreg)
613 kobject_put(&ls->ls_kobj); 609 kobject_put(&ls->ls_kobj);
@@ -641,50 +637,64 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
641 return error; 637 return error;
642} 638}
643 639
644/* Return 1 if the lockspace still has active remote locks, 640static int lkb_idr_is_local(int id, void *p, void *data)
645 * 2 if the lockspace still has active local locks. 641{
646 */ 642 struct dlm_lkb *lkb = p;
647static int lockspace_busy(struct dlm_ls *ls) 643
648{ 644 if (!lkb->lkb_nodeid)
649 int i, lkb_found = 0; 645 return 1;
650 struct dlm_lkb *lkb; 646 return 0;
651 647}
652 /* NOTE: We check the lockidtbl here rather than the resource table. 648
653 This is because there may be LKBs queued as ASTs that have been 649static int lkb_idr_is_any(int id, void *p, void *data)
654 unlinked from their RSBs and are pending deletion once the AST has 650{
655 been delivered */ 651 return 1;
656 652}
657 for (i = 0; i < ls->ls_lkbtbl_size; i++) { 653
658 read_lock(&ls->ls_lkbtbl[i].lock); 654static int lkb_idr_free(int id, void *p, void *data)
659 if (!list_empty(&ls->ls_lkbtbl[i].list)) { 655{
660 lkb_found = 1; 656 struct dlm_lkb *lkb = p;
661 list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list, 657
662 lkb_idtbl_list) { 658 if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
663 if (!lkb->lkb_nodeid) { 659 dlm_free_lvb(lkb->lkb_lvbptr);
664 read_unlock(&ls->ls_lkbtbl[i].lock); 660
665 return 2; 661 dlm_free_lkb(lkb);
666 } 662 return 0;
667 } 663}
668 } 664
669 read_unlock(&ls->ls_lkbtbl[i].lock); 665/* NOTE: We check the lkbidr here rather than the resource table.
666 This is because there may be LKBs queued as ASTs that have been unlinked
667 from their RSBs and are pending deletion once the AST has been delivered */
668
669static int lockspace_busy(struct dlm_ls *ls, int force)
670{
671 int rv;
672
673 spin_lock(&ls->ls_lkbidr_spin);
674 if (force == 0) {
675 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls);
676 } else if (force == 1) {
677 rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls);
678 } else {
679 rv = 0;
670 } 680 }
671 return lkb_found; 681 spin_unlock(&ls->ls_lkbidr_spin);
682 return rv;
672} 683}
673 684
674static int release_lockspace(struct dlm_ls *ls, int force) 685static int release_lockspace(struct dlm_ls *ls, int force)
675{ 686{
676 struct dlm_lkb *lkb;
677 struct dlm_rsb *rsb; 687 struct dlm_rsb *rsb;
678 struct list_head *head; 688 struct list_head *head;
679 int i, busy, rv; 689 int i, busy, rv;
680 690
681 busy = lockspace_busy(ls); 691 busy = lockspace_busy(ls, force);
682 692
683 spin_lock(&lslist_lock); 693 spin_lock(&lslist_lock);
684 if (ls->ls_create_count == 1) { 694 if (ls->ls_create_count == 1) {
685 if (busy > force) 695 if (busy) {
686 rv = -EBUSY; 696 rv = -EBUSY;
687 else { 697 } else {
688 /* remove_lockspace takes ls off lslist */ 698 /* remove_lockspace takes ls off lslist */
689 ls->ls_create_count = 0; 699 ls->ls_create_count = 0;
690 rv = 0; 700 rv = 0;
@@ -708,12 +718,12 @@ static int release_lockspace(struct dlm_ls *ls, int force)
708 718
709 dlm_recoverd_stop(ls); 719 dlm_recoverd_stop(ls);
710 720
721 dlm_callback_stop(ls);
722
711 remove_lockspace(ls); 723 remove_lockspace(ls);
712 724
713 dlm_delete_debug_file(ls); 725 dlm_delete_debug_file(ls);
714 726
715 dlm_astd_suspend();
716
717 kfree(ls->ls_recover_buf); 727 kfree(ls->ls_recover_buf);
718 728
719 /* 729 /*
@@ -721,31 +731,15 @@ static int release_lockspace(struct dlm_ls *ls, int force)
721 */ 731 */
722 732
723 dlm_dir_clear(ls); 733 dlm_dir_clear(ls);
724 kfree(ls->ls_dirtbl); 734 vfree(ls->ls_dirtbl);
725 735
726 /* 736 /*
727 * Free all lkb's on lkbtbl[] lists. 737 * Free all lkb's in idr
728 */ 738 */
729 739
730 for (i = 0; i < ls->ls_lkbtbl_size; i++) { 740 idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
731 head = &ls->ls_lkbtbl[i].list; 741 idr_remove_all(&ls->ls_lkbidr);
732 while (!list_empty(head)) { 742 idr_destroy(&ls->ls_lkbidr);
733 lkb = list_entry(head->next, struct dlm_lkb,
734 lkb_idtbl_list);
735
736 list_del(&lkb->lkb_idtbl_list);
737
738 dlm_del_ast(lkb);
739
740 if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
741 dlm_free_lvb(lkb->lkb_lvbptr);
742
743 dlm_free_lkb(lkb);
744 }
745 }
746 dlm_astd_resume();
747
748 kfree(ls->ls_lkbtbl);
749 743
750 /* 744 /*
751 * Free all rsb's on rsbtbl[] lists 745 * Free all rsb's on rsbtbl[] lists
@@ -770,7 +764,14 @@ static int release_lockspace(struct dlm_ls *ls, int force)
770 } 764 }
771 } 765 }
772 766
773 kfree(ls->ls_rsbtbl); 767 vfree(ls->ls_rsbtbl);
768
769 while (!list_empty(&ls->ls_new_rsb)) {
770 rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb,
771 res_hashchain);
772 list_del(&rsb->res_hashchain);
773 dlm_free_rsb(rsb);
774 }
774 775
775 /* 776 /*
776 * Free structures on any other lists 777 * Free structures on any other lists
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 5e2c71f05e46..990626e7da80 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -512,12 +512,10 @@ static void process_sctp_notification(struct connection *con,
512 } 512 }
513 make_sockaddr(&prim.ssp_addr, 0, &addr_len); 513 make_sockaddr(&prim.ssp_addr, 0, &addr_len);
514 if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { 514 if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
515 int i;
516 unsigned char *b=(unsigned char *)&prim.ssp_addr; 515 unsigned char *b=(unsigned char *)&prim.ssp_addr;
517 log_print("reject connect from unknown addr"); 516 log_print("reject connect from unknown addr");
518 for (i=0; i<sizeof(struct sockaddr_storage);i++) 517 print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
519 printk("%02x ", b[i]); 518 b, sizeof(struct sockaddr_storage));
520 printk("\n");
521 sctp_send_shutdown(prim.ssp_assoc_id); 519 sctp_send_shutdown(prim.ssp_assoc_id);
522 return; 520 return;
523 } 521 }
@@ -748,7 +746,10 @@ static int tcp_accept_from_sock(struct connection *con)
748 /* Get the new node's NODEID */ 746 /* Get the new node's NODEID */
749 make_sockaddr(&peeraddr, 0, &len); 747 make_sockaddr(&peeraddr, 0, &len);
750 if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { 748 if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
749 unsigned char *b=(unsigned char *)&peeraddr;
751 log_print("connect from non cluster node"); 750 log_print("connect from non cluster node");
751 print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
752 b, sizeof(struct sockaddr_storage));
752 sock_release(newsock); 753 sock_release(newsock);
753 mutex_unlock(&con->sock_mutex); 754 mutex_unlock(&con->sock_mutex);
754 return -1; 755 return -1;
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 8e0d00db004f..da64df7576e1 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -16,6 +16,7 @@
16#include "memory.h" 16#include "memory.h"
17 17
18static struct kmem_cache *lkb_cache; 18static struct kmem_cache *lkb_cache;
19static struct kmem_cache *rsb_cache;
19 20
20 21
21int __init dlm_memory_init(void) 22int __init dlm_memory_init(void)
@@ -26,6 +27,14 @@ int __init dlm_memory_init(void)
26 __alignof__(struct dlm_lkb), 0, NULL); 27 __alignof__(struct dlm_lkb), 0, NULL);
27 if (!lkb_cache) 28 if (!lkb_cache)
28 ret = -ENOMEM; 29 ret = -ENOMEM;
30
31 rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb),
32 __alignof__(struct dlm_rsb), 0, NULL);
33 if (!rsb_cache) {
34 kmem_cache_destroy(lkb_cache);
35 ret = -ENOMEM;
36 }
37
29 return ret; 38 return ret;
30} 39}
31 40
@@ -33,6 +42,8 @@ void dlm_memory_exit(void)
33{ 42{
34 if (lkb_cache) 43 if (lkb_cache)
35 kmem_cache_destroy(lkb_cache); 44 kmem_cache_destroy(lkb_cache);
45 if (rsb_cache)
46 kmem_cache_destroy(rsb_cache);
36} 47}
37 48
38char *dlm_allocate_lvb(struct dlm_ls *ls) 49char *dlm_allocate_lvb(struct dlm_ls *ls)
@@ -48,16 +59,11 @@ void dlm_free_lvb(char *p)
48 kfree(p); 59 kfree(p);
49} 60}
50 61
51/* FIXME: have some minimal space built-in to rsb for the name and 62struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls)
52 kmalloc a separate name if needed, like dentries are done */
53
54struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen)
55{ 63{
56 struct dlm_rsb *r; 64 struct dlm_rsb *r;
57 65
58 DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,); 66 r = kmem_cache_zalloc(rsb_cache, GFP_NOFS);
59
60 r = kzalloc(sizeof(*r) + namelen, GFP_NOFS);
61 return r; 67 return r;
62} 68}
63 69
@@ -65,7 +71,7 @@ void dlm_free_rsb(struct dlm_rsb *r)
65{ 71{
66 if (r->res_lvbptr) 72 if (r->res_lvbptr)
67 dlm_free_lvb(r->res_lvbptr); 73 dlm_free_lvb(r->res_lvbptr);
68 kfree(r); 74 kmem_cache_free(rsb_cache, r);
69} 75}
70 76
71struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls) 77struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h
index 485fb29143bd..177c11cbb0a6 100644
--- a/fs/dlm/memory.h
+++ b/fs/dlm/memory.h
@@ -16,7 +16,7 @@
16 16
17int dlm_memory_init(void); 17int dlm_memory_init(void);
18void dlm_memory_exit(void); 18void dlm_memory_exit(void);
19struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen); 19struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls);
20void dlm_free_rsb(struct dlm_rsb *r); 20void dlm_free_rsb(struct dlm_rsb *r);
21struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls); 21struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls);
22void dlm_free_lkb(struct dlm_lkb *l); 22void dlm_free_lkb(struct dlm_lkb *l);
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index fd677c8c3d3b..774da3cf92c6 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -58,13 +58,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
58 58
59 mutex_lock(&ls->ls_recoverd_active); 59 mutex_lock(&ls->ls_recoverd_active);
60 60
61 /* 61 dlm_callback_suspend(ls);
62 * Suspending and resuming dlm_astd ensures that no lkb's from this ls
63 * will be processed by dlm_astd during recovery.
64 */
65
66 dlm_astd_suspend();
67 dlm_astd_resume();
68 62
69 /* 63 /*
70 * Free non-master tossed rsb's. Master rsb's are kept on toss 64 * Free non-master tossed rsb's. Master rsb's are kept on toss
@@ -202,6 +196,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
202 196
203 dlm_adjust_timeouts(ls); 197 dlm_adjust_timeouts(ls);
204 198
199 dlm_callback_resume(ls);
200
205 error = enable_locking(ls, rv->seq); 201 error = enable_locking(ls, rv->seq);
206 if (error) { 202 if (error) {
207 log_debug(ls, "enable_locking failed %d", error); 203 log_debug(ls, "enable_locking failed %d", error);
@@ -222,8 +218,6 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
222 218
223 dlm_grant_after_purge(ls); 219 dlm_grant_after_purge(ls);
224 220
225 dlm_astd_wake();
226
227 log_debug(ls, "recover %llx done: %u ms", 221 log_debug(ls, "recover %llx done: %u ms",
228 (unsigned long long)rv->seq, 222 (unsigned long long)rv->seq,
229 jiffies_to_msecs(jiffies - start)); 223 jiffies_to_msecs(jiffies - start));
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index e96bf3e9be88..d8ea60756403 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -213,9 +213,9 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode,
213 goto out; 213 goto out;
214 } 214 }
215 215
216 if (list_empty(&lkb->lkb_astqueue)) { 216 if (list_empty(&lkb->lkb_cb_list)) {
217 kref_get(&lkb->lkb_ref); 217 kref_get(&lkb->lkb_ref);
218 list_add_tail(&lkb->lkb_astqueue, &proc->asts); 218 list_add_tail(&lkb->lkb_cb_list, &proc->asts);
219 wake_up_interruptible(&proc->wait); 219 wake_up_interruptible(&proc->wait);
220 } 220 }
221 spin_unlock(&proc->asts_spin); 221 spin_unlock(&proc->asts_spin);
@@ -832,24 +832,24 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
832 } 832 }
833 833
834 /* if we empty lkb_callbacks, we don't want to unlock the spinlock 834 /* if we empty lkb_callbacks, we don't want to unlock the spinlock
835 without removing lkb_astqueue; so empty lkb_astqueue is always 835 without removing lkb_cb_list; so empty lkb_cb_list is always
836 consistent with empty lkb_callbacks */ 836 consistent with empty lkb_callbacks */
837 837
838 lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue); 838 lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_cb_list);
839 839
840 rv = dlm_rem_lkb_callback(lkb->lkb_resource->res_ls, lkb, &cb, &resid); 840 rv = dlm_rem_lkb_callback(lkb->lkb_resource->res_ls, lkb, &cb, &resid);
841 if (rv < 0) { 841 if (rv < 0) {
842 /* this shouldn't happen; lkb should have been removed from 842 /* this shouldn't happen; lkb should have been removed from
843 list when resid was zero */ 843 list when resid was zero */
844 log_print("dlm_rem_lkb_callback empty %x", lkb->lkb_id); 844 log_print("dlm_rem_lkb_callback empty %x", lkb->lkb_id);
845 list_del_init(&lkb->lkb_astqueue); 845 list_del_init(&lkb->lkb_cb_list);
846 spin_unlock(&proc->asts_spin); 846 spin_unlock(&proc->asts_spin);
847 /* removes ref for proc->asts, may cause lkb to be freed */ 847 /* removes ref for proc->asts, may cause lkb to be freed */
848 dlm_put_lkb(lkb); 848 dlm_put_lkb(lkb);
849 goto try_another; 849 goto try_another;
850 } 850 }
851 if (!resid) 851 if (!resid)
852 list_del_init(&lkb->lkb_astqueue); 852 list_del_init(&lkb->lkb_cb_list);
853 spin_unlock(&proc->asts_spin); 853 spin_unlock(&proc->asts_spin);
854 854
855 if (cb.flags & DLM_CB_SKIP) { 855 if (cb.flags & DLM_CB_SKIP) {
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 4ec9eb00a241..c6ac98cf9baa 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -270,14 +270,15 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
270} 270}
271 271
272static int 272static int
273ecryptfs_fsync(struct file *file, int datasync) 273ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
274{ 274{
275 int rc = 0; 275 int rc = 0;
276 276
277 rc = generic_file_fsync(file, datasync); 277 rc = generic_file_fsync(file, start, end, datasync);
278 if (rc) 278 if (rc)
279 goto out; 279 goto out;
280 rc = vfs_fsync(ecryptfs_file_to_lower(file), datasync); 280 rc = vfs_fsync_range(ecryptfs_file_to_lower(file), start, end,
281 datasync);
281out: 282out:
282 return rc; 283 return rc;
283} 284}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 7349ade17de6..340c657a108c 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -147,7 +147,6 @@ static int ecryptfs_interpose(struct dentry *lower_dentry,
147 * @lower_dir_inode: inode of the parent in the lower fs of the new file 147 * @lower_dir_inode: inode of the parent in the lower fs of the new file
148 * @dentry: New file's dentry 148 * @dentry: New file's dentry
149 * @mode: The mode of the new file 149 * @mode: The mode of the new file
150 * @nd: nameidata of ecryptfs' parent's dentry & vfsmount
151 * 150 *
152 * Creates the file in the lower file system. 151 * Creates the file in the lower file system.
153 * 152 *
@@ -155,31 +154,10 @@ static int ecryptfs_interpose(struct dentry *lower_dentry,
155 */ 154 */
156static int 155static int
157ecryptfs_create_underlying_file(struct inode *lower_dir_inode, 156ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
158 struct dentry *dentry, int mode, 157 struct dentry *dentry, int mode)
159 struct nameidata *nd)
160{ 158{
161 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); 159 struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
162 struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); 160 return vfs_create(lower_dir_inode, lower_dentry, mode, NULL);
163 struct dentry *dentry_save;
164 struct vfsmount *vfsmount_save;
165 unsigned int flags_save;
166 int rc;
167
168 if (nd) {
169 dentry_save = nd->path.dentry;
170 vfsmount_save = nd->path.mnt;
171 flags_save = nd->flags;
172 nd->path.dentry = lower_dentry;
173 nd->path.mnt = lower_mnt;
174 nd->flags &= ~LOOKUP_OPEN;
175 }
176 rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
177 if (nd) {
178 nd->path.dentry = dentry_save;
179 nd->path.mnt = vfsmount_save;
180 nd->flags = flags_save;
181 }
182 return rc;
183} 161}
184 162
185/** 163/**
@@ -197,8 +175,7 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
197 */ 175 */
198static int 176static int
199ecryptfs_do_create(struct inode *directory_inode, 177ecryptfs_do_create(struct inode *directory_inode,
200 struct dentry *ecryptfs_dentry, int mode, 178 struct dentry *ecryptfs_dentry, int mode)
201 struct nameidata *nd)
202{ 179{
203 int rc; 180 int rc;
204 struct dentry *lower_dentry; 181 struct dentry *lower_dentry;
@@ -213,7 +190,7 @@ ecryptfs_do_create(struct inode *directory_inode,
213 goto out; 190 goto out;
214 } 191 }
215 rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode, 192 rc = ecryptfs_create_underlying_file(lower_dir_dentry->d_inode,
216 ecryptfs_dentry, mode, nd); 193 ecryptfs_dentry, mode);
217 if (rc) { 194 if (rc) {
218 printk(KERN_ERR "%s: Failure to create dentry in lower fs; " 195 printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
219 "rc = [%d]\n", __func__, rc); 196 "rc = [%d]\n", __func__, rc);
@@ -294,7 +271,7 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
294 int rc; 271 int rc;
295 272
296 /* ecryptfs_do_create() calls ecryptfs_interpose() */ 273 /* ecryptfs_do_create() calls ecryptfs_interpose() */
297 rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode, nd); 274 rc = ecryptfs_do_create(directory_inode, ecryptfs_dentry, mode);
298 if (unlikely(rc)) { 275 if (unlikely(rc)) {
299 ecryptfs_printk(KERN_WARNING, "Failed to create file in" 276 ecryptfs_printk(KERN_WARNING, "Failed to create file in"
300 "lower filesystem\n"); 277 "lower filesystem\n");
@@ -942,10 +919,8 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
942} 919}
943 920
944static int 921static int
945ecryptfs_permission(struct inode *inode, int mask, unsigned int flags) 922ecryptfs_permission(struct inode *inode, int mask)
946{ 923{
947 if (flags & IPERM_FLAG_RCU)
948 return -ECHILD;
949 return inode_permission(ecryptfs_inode_to_lower(inode), mask); 924 return inode_permission(ecryptfs_inode_to_lower(inode), mask);
950} 925}
951 926
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 1511bf9e5f80..832b10ded82f 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -60,14 +60,11 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
60 60
61struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { 61struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) {
62 efs_ino_t inodenum; 62 efs_ino_t inodenum;
63 struct inode * inode = NULL; 63 struct inode *inode = NULL;
64 64
65 inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len); 65 inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len);
66 if (inodenum) { 66 if (inodenum)
67 inode = efs_iget(dir->i_sb, inodenum); 67 inode = efs_iget(dir->i_sb, inodenum);
68 if (IS_ERR(inode))
69 return ERR_CAST(inode);
70 }
71 68
72 return d_splice_alias(inode, dentry); 69 return d_splice_alias(inode, dentry);
73} 70}
diff --git a/fs/exec.c b/fs/exec.c
index 6075a1e727ae..842d5700c155 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -963,9 +963,18 @@ static int de_thread(struct task_struct *tsk)
963 leader->group_leader = tsk; 963 leader->group_leader = tsk;
964 964
965 tsk->exit_signal = SIGCHLD; 965 tsk->exit_signal = SIGCHLD;
966 leader->exit_signal = -1;
966 967
967 BUG_ON(leader->exit_state != EXIT_ZOMBIE); 968 BUG_ON(leader->exit_state != EXIT_ZOMBIE);
968 leader->exit_state = EXIT_DEAD; 969 leader->exit_state = EXIT_DEAD;
970
971 /*
972 * We are going to release_task()->ptrace_unlink() silently,
973 * the tracer can sleep in do_wait(). EXIT_DEAD guarantees
974 * the tracer wont't block again waiting for this thread.
975 */
976 if (unlikely(leader->ptrace))
977 __wake_up_parent(leader, leader->parent);
969 write_unlock_irq(&tasklist_lock); 978 write_unlock_irq(&tasklist_lock);
970 979
971 release_task(leader); 980 release_task(leader);
@@ -1105,6 +1114,13 @@ out:
1105} 1114}
1106EXPORT_SYMBOL(flush_old_exec); 1115EXPORT_SYMBOL(flush_old_exec);
1107 1116
1117void would_dump(struct linux_binprm *bprm, struct file *file)
1118{
1119 if (inode_permission(file->f_path.dentry->d_inode, MAY_READ) < 0)
1120 bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
1121}
1122EXPORT_SYMBOL(would_dump);
1123
1108void setup_new_exec(struct linux_binprm * bprm) 1124void setup_new_exec(struct linux_binprm * bprm)
1109{ 1125{
1110 int i, ch; 1126 int i, ch;
@@ -1144,9 +1160,10 @@ void setup_new_exec(struct linux_binprm * bprm)
1144 if (bprm->cred->uid != current_euid() || 1160 if (bprm->cred->uid != current_euid() ||
1145 bprm->cred->gid != current_egid()) { 1161 bprm->cred->gid != current_egid()) {
1146 current->pdeath_signal = 0; 1162 current->pdeath_signal = 0;
1147 } else if (file_permission(bprm->file, MAY_READ) || 1163 } else {
1148 bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP) { 1164 would_dump(bprm, bprm->file);
1149 set_dumpable(current->mm, suid_dumpable); 1165 if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
1166 set_dumpable(current->mm, suid_dumpable);
1150 } 1167 }
1151 1168
1152 /* 1169 /*
@@ -1225,7 +1242,12 @@ int check_unsafe_exec(struct linux_binprm *bprm)
1225 unsigned n_fs; 1242 unsigned n_fs;
1226 int res = 0; 1243 int res = 0;
1227 1244
1228 bprm->unsafe = tracehook_unsafe_exec(p); 1245 if (p->ptrace) {
1246 if (p->ptrace & PT_PTRACE_CAP)
1247 bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP;
1248 else
1249 bprm->unsafe |= LSM_UNSAFE_PTRACE;
1250 }
1229 1251
1230 n_fs = 1; 1252 n_fs = 1;
1231 spin_lock(&p->fs->lock); 1253 spin_lock(&p->fs->lock);
@@ -1353,6 +1375,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1353 unsigned int depth = bprm->recursion_depth; 1375 unsigned int depth = bprm->recursion_depth;
1354 int try,retval; 1376 int try,retval;
1355 struct linux_binfmt *fmt; 1377 struct linux_binfmt *fmt;
1378 pid_t old_pid;
1356 1379
1357 retval = security_bprm_check(bprm); 1380 retval = security_bprm_check(bprm);
1358 if (retval) 1381 if (retval)
@@ -1362,6 +1385,11 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1362 if (retval) 1385 if (retval)
1363 return retval; 1386 return retval;
1364 1387
1388 /* Need to fetch pid before load_binary changes it */
1389 rcu_read_lock();
1390 old_pid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
1391 rcu_read_unlock();
1392
1365 retval = -ENOENT; 1393 retval = -ENOENT;
1366 for (try=0; try<2; try++) { 1394 for (try=0; try<2; try++) {
1367 read_lock(&binfmt_lock); 1395 read_lock(&binfmt_lock);
@@ -1381,7 +1409,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
1381 bprm->recursion_depth = depth; 1409 bprm->recursion_depth = depth;
1382 if (retval >= 0) { 1410 if (retval >= 0) {
1383 if (depth == 0) 1411 if (depth == 0)
1384 tracehook_report_exec(fmt, bprm, regs); 1412 ptrace_event(PTRACE_EVENT_EXEC,
1413 old_pid);
1385 put_binfmt(fmt); 1414 put_binfmt(fmt);
1386 allow_write_access(bprm->file); 1415 allow_write_access(bprm->file);
1387 if (bprm->file) 1416 if (bprm->file)
@@ -1769,7 +1798,7 @@ static int zap_process(struct task_struct *start, int exit_code)
1769 1798
1770 t = start; 1799 t = start;
1771 do { 1800 do {
1772 task_clear_group_stop_pending(t); 1801 task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
1773 if (t != current && t->mm) { 1802 if (t != current && t->mm) {
1774 sigaddset(&t->pending.signal, SIGKILL); 1803 sigaddset(&t->pending.signal, SIGKILL);
1775 signal_wake_up(t, 1); 1804 signal_wake_up(t, 1);
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 45ca323d8363..491c6c078e7f 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -42,11 +42,19 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
42 * Note, in exofs all metadata is written as part of inode, regardless. 42 * Note, in exofs all metadata is written as part of inode, regardless.
43 * The writeout is synchronous 43 * The writeout is synchronous
44 */ 44 */
45static int exofs_file_fsync(struct file *filp, int datasync) 45static int exofs_file_fsync(struct file *filp, loff_t start, loff_t end,
46 int datasync)
46{ 47{
48 struct inode *inode = filp->f_mapping->host;
47 int ret; 49 int ret;
48 50
51 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
52 if (ret)
53 return ret;
54
55 mutex_lock(&inode->i_mutex);
49 ret = sync_inode_metadata(filp->f_mapping->host, 1); 56 ret = sync_inode_metadata(filp->f_mapping->host, 1);
57 mutex_unlock(&inode->i_mutex);
50 return ret; 58 return ret;
51} 59}
52 60
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 4d70db110cfc..b54c43775f17 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -55,12 +55,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
55 return ERR_PTR(-ENAMETOOLONG); 55 return ERR_PTR(-ENAMETOOLONG);
56 56
57 ino = exofs_inode_by_name(dir, dentry); 57 ino = exofs_inode_by_name(dir, dentry);
58 inode = NULL; 58 inode = ino ? exofs_iget(dir->i_sb, ino) : NULL;
59 if (ino) {
60 inode = exofs_iget(dir->i_sb, ino);
61 if (IS_ERR(inode))
62 return ERR_CAST(inode);
63 }
64 return d_splice_alias(inode, dentry); 59 return d_splice_alias(inode, dentry);
65} 60}
66 61
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index abea5a17c764..bfe651f9ae16 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -232,11 +232,11 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
232} 232}
233 233
234int 234int
235ext2_check_acl(struct inode *inode, int mask, unsigned int flags) 235ext2_check_acl(struct inode *inode, int mask)
236{ 236{
237 struct posix_acl *acl; 237 struct posix_acl *acl;
238 238
239 if (flags & IPERM_FLAG_RCU) { 239 if (mask & MAY_NOT_BLOCK) {
240 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 240 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
241 return -ECHILD; 241 return -ECHILD;
242 return -EAGAIN; 242 return -EAGAIN;
diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h
index c939b7b12099..3ff6cbb9ac44 100644
--- a/fs/ext2/acl.h
+++ b/fs/ext2/acl.h
@@ -54,7 +54,7 @@ static inline int ext2_acl_count(size_t size)
54#ifdef CONFIG_EXT2_FS_POSIX_ACL 54#ifdef CONFIG_EXT2_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext2_check_acl (struct inode *, int, unsigned int); 57extern int ext2_check_acl (struct inode *, int);
58extern int ext2_acl_chmod (struct inode *); 58extern int ext2_acl_chmod (struct inode *);
59extern int ext2_init_acl (struct inode *, struct inode *); 59extern int ext2_init_acl (struct inode *, struct inode *);
60 60
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 645be9e7ee47..af9fc89b1b2d 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -150,7 +150,8 @@ extern void ext2_write_super (struct super_block *);
150extern const struct file_operations ext2_dir_operations; 150extern const struct file_operations ext2_dir_operations;
151 151
152/* file.c */ 152/* file.c */
153extern int ext2_fsync(struct file *file, int datasync); 153extern int ext2_fsync(struct file *file, loff_t start, loff_t end,
154 int datasync);
154extern const struct inode_operations ext2_file_inode_operations; 155extern const struct inode_operations ext2_file_inode_operations;
155extern const struct file_operations ext2_file_operations; 156extern const struct file_operations ext2_file_operations;
156extern const struct file_operations ext2_xip_file_operations; 157extern const struct file_operations ext2_xip_file_operations;
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 49eec9456c5b..82e06321de35 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -40,13 +40,13 @@ static int ext2_release_file (struct inode * inode, struct file * filp)
40 return 0; 40 return 0;
41} 41}
42 42
43int ext2_fsync(struct file *file, int datasync) 43int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
44{ 44{
45 int ret; 45 int ret;
46 struct super_block *sb = file->f_mapping->host->i_sb; 46 struct super_block *sb = file->f_mapping->host->i_sb;
47 struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping; 47 struct address_space *mapping = sb->s_bdev->bd_inode->i_mapping;
48 48
49 ret = generic_file_fsync(file, datasync); 49 ret = generic_file_fsync(file, start, end, datasync);
50 if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) { 50 if (ret == -EIO || test_and_clear_bit(AS_EIO, &mapping->flags)) {
51 /* We don't really know where the IO error happened... */ 51 /* We don't really know where the IO error happened... */
52 ext2_error(sb, __func__, 52 ext2_error(sb, __func__,
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 788e09a07f7e..a8a58f63f07c 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -843,8 +843,8 @@ ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
843 struct inode *inode = mapping->host; 843 struct inode *inode = mapping->host;
844 ssize_t ret; 844 ssize_t ret;
845 845
846 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, 846 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
847 iov, offset, nr_segs, ext2_get_block, NULL); 847 ext2_get_block);
848 if (ret < 0 && (rw & WRITE)) 848 if (ret < 0 && (rw & WRITE))
849 ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); 849 ext2_write_failed(mapping, offset + iov_length(iov, nr_segs));
850 return ret; 850 return ret;
@@ -1184,6 +1184,8 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
1184 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) 1184 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1185 return -EPERM; 1185 return -EPERM;
1186 1186
1187 inode_dio_wait(inode);
1188
1187 if (mapping_is_xip(inode->i_mapping)) 1189 if (mapping_is_xip(inode->i_mapping))
1188 error = xip_truncate_page(inode->i_mapping, newsize); 1190 error = xip_truncate_page(inode->i_mapping, newsize);
1189 else if (test_opt(inode->i_sb, NOBH)) 1191 else if (test_opt(inode->i_sb, NOBH))
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index ed5c5d496ee9..d60b7099e2db 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -67,15 +67,11 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str
67 inode = NULL; 67 inode = NULL;
68 if (ino) { 68 if (ino) {
69 inode = ext2_iget(dir->i_sb, ino); 69 inode = ext2_iget(dir->i_sb, ino);
70 if (IS_ERR(inode)) { 70 if (inode == ERR_PTR(-ESTALE)) {
71 if (PTR_ERR(inode) == -ESTALE) { 71 ext2_error(dir->i_sb, __func__,
72 ext2_error(dir->i_sb, __func__, 72 "deleted inode referenced: %lu",
73 "deleted inode referenced: %lu", 73 (unsigned long) ino);
74 (unsigned long) ino); 74 return ERR_PTR(-EIO);
75 return ERR_PTR(-EIO);
76 } else {
77 return ERR_CAST(inode);
78 }
79 } 75 }
80 } 76 }
81 return d_splice_alias(inode, dentry); 77 return d_splice_alias(inode, dentry);
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 9d021c0d472a..edfeb293d4cb 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -240,11 +240,11 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
240} 240}
241 241
242int 242int
243ext3_check_acl(struct inode *inode, int mask, unsigned int flags) 243ext3_check_acl(struct inode *inode, int mask)
244{ 244{
245 struct posix_acl *acl; 245 struct posix_acl *acl;
246 246
247 if (flags & IPERM_FLAG_RCU) { 247 if (mask & MAY_NOT_BLOCK) {
248 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 248 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
249 return -ECHILD; 249 return -ECHILD;
250 return -EAGAIN; 250 return -EAGAIN;
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 5faf8048e906..597334626de9 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,7 +54,7 @@ static inline int ext3_acl_count(size_t size)
54#ifdef CONFIG_EXT3_FS_POSIX_ACL 54#ifdef CONFIG_EXT3_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext3_check_acl (struct inode *, int, unsigned int); 57extern int ext3_check_acl (struct inode *, int);
58extern int ext3_acl_chmod (struct inode *); 58extern int ext3_acl_chmod (struct inode *);
59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); 59extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
60 60
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 09b13bb34c94..0bcf63adb80a 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -43,7 +43,7 @@
43 * inode to disk. 43 * inode to disk.
44 */ 44 */
45 45
46int ext3_sync_file(struct file *file, int datasync) 46int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
47{ 47{
48 struct inode *inode = file->f_mapping->host; 48 struct inode *inode = file->f_mapping->host;
49 struct ext3_inode_info *ei = EXT3_I(inode); 49 struct ext3_inode_info *ei = EXT3_I(inode);
@@ -54,6 +54,17 @@ int ext3_sync_file(struct file *file, int datasync)
54 if (inode->i_sb->s_flags & MS_RDONLY) 54 if (inode->i_sb->s_flags & MS_RDONLY)
55 return 0; 55 return 0;
56 56
57 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
58 if (ret)
59 return ret;
60
61 /*
62 * Taking the mutex here just to keep consistent with how fsync was
63 * called previously, however it looks like we don't need to take
64 * i_mutex at all.
65 */
66 mutex_lock(&inode->i_mutex);
67
57 J_ASSERT(ext3_journal_current_handle() == NULL); 68 J_ASSERT(ext3_journal_current_handle() == NULL);
58 69
59 /* 70 /*
@@ -70,8 +81,10 @@ int ext3_sync_file(struct file *file, int datasync)
70 * (they were dirtied by commit). But that's OK - the blocks are 81 * (they were dirtied by commit). But that's OK - the blocks are
71 * safe in-journal, which is all fsync() needs to ensure. 82 * safe in-journal, which is all fsync() needs to ensure.
72 */ 83 */
73 if (ext3_should_journal_data(inode)) 84 if (ext3_should_journal_data(inode)) {
85 mutex_unlock(&inode->i_mutex);
74 return ext3_force_commit(inode->i_sb); 86 return ext3_force_commit(inode->i_sb);
87 }
75 88
76 if (datasync) 89 if (datasync)
77 commit_tid = atomic_read(&ei->i_datasync_tid); 90 commit_tid = atomic_read(&ei->i_datasync_tid);
@@ -91,5 +104,6 @@ int ext3_sync_file(struct file *file, int datasync)
91 */ 104 */
92 if (needs_barrier) 105 if (needs_barrier)
93 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 106 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
107 mutex_unlock(&inode->i_mutex);
94 return ret; 108 return ret;
95} 109}
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3451d23c3bae..2978a2a17a59 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1816,9 +1816,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
1816 } 1816 }
1817 1817
1818retry: 1818retry:
1819 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 1819 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
1820 offset, nr_segs, 1820 ext3_get_block);
1821 ext3_get_block, NULL);
1822 /* 1821 /*
1823 * In case of error extending write may have instantiated a few 1822 * In case of error extending write may have instantiated a few
1824 * blocks outside i_size. Trim these off again. 1823 * blocks outside i_size. Trim these off again.
@@ -3216,6 +3215,9 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
3216 ext3_journal_stop(handle); 3215 ext3_journal_stop(handle);
3217 } 3216 }
3218 3217
3218 if (attr->ia_valid & ATTR_SIZE)
3219 inode_dio_wait(inode);
3220
3219 if (S_ISREG(inode->i_mode) && 3221 if (S_ISREG(inode->i_mode) &&
3220 attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) { 3222 attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
3221 handle_t *handle; 3223 handle_t *handle;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 34b6d9bfc48a..c095cf5640c7 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1038,15 +1038,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
1038 return ERR_PTR(-EIO); 1038 return ERR_PTR(-EIO);
1039 } 1039 }
1040 inode = ext3_iget(dir->i_sb, ino); 1040 inode = ext3_iget(dir->i_sb, ino);
1041 if (IS_ERR(inode)) { 1041 if (inode == ERR_PTR(-ESTALE)) {
1042 if (PTR_ERR(inode) == -ESTALE) { 1042 ext3_error(dir->i_sb, __func__,
1043 ext3_error(dir->i_sb, __func__, 1043 "deleted inode referenced: %lu",
1044 "deleted inode referenced: %lu", 1044 ino);
1045 ino); 1045 return ERR_PTR(-EIO);
1046 return ERR_PTR(-EIO);
1047 } else {
1048 return ERR_CAST(inode);
1049 }
1050 } 1046 }
1051 } 1047 }
1052 return d_splice_alias(inode, dentry); 1048 return d_splice_alias(inode, dentry);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index aad153ef6b78..b57ea2f91269 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1718,6 +1718,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1718 sbi->s_resuid = le16_to_cpu(es->s_def_resuid); 1718 sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
1719 sbi->s_resgid = le16_to_cpu(es->s_def_resgid); 1719 sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
1720 1720
1721 /* enable barriers by default */
1722 set_opt(sbi->s_mount_opt, BARRIER);
1721 set_opt(sbi->s_mount_opt, RESERVATION); 1723 set_opt(sbi->s_mount_opt, RESERVATION);
1722 1724
1723 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 1725 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 21eacd7b7d79..60d900fcc3db 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -238,11 +238,11 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type,
238} 238}
239 239
240int 240int
241ext4_check_acl(struct inode *inode, int mask, unsigned int flags) 241ext4_check_acl(struct inode *inode, int mask)
242{ 242{
243 struct posix_acl *acl; 243 struct posix_acl *acl;
244 244
245 if (flags & IPERM_FLAG_RCU) { 245 if (mask & MAY_NOT_BLOCK) {
246 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 246 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
247 return -ECHILD; 247 return -ECHILD;
248 return -EAGAIN; 248 return -EAGAIN;
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index dec821168fd4..9d843d5deac4 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -54,7 +54,7 @@ static inline int ext4_acl_count(size_t size)
54#ifdef CONFIG_EXT4_FS_POSIX_ACL 54#ifdef CONFIG_EXT4_FS_POSIX_ACL
55 55
56/* acl.c */ 56/* acl.c */
57extern int ext4_check_acl(struct inode *, int, unsigned int); 57extern int ext4_check_acl(struct inode *, int);
58extern int ext4_acl_chmod(struct inode *); 58extern int ext4_acl_chmod(struct inode *);
59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); 59extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
60 60
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1921392cd708..fa44df879711 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1758,7 +1758,7 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
1758extern void ext4_htree_free_dir_info(struct dir_private_info *p); 1758extern void ext4_htree_free_dir_info(struct dir_private_info *p);
1759 1759
1760/* fsync.c */ 1760/* fsync.c */
1761extern int ext4_sync_file(struct file *, int); 1761extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
1762extern int ext4_flush_completed_IO(struct inode *); 1762extern int ext4_flush_completed_IO(struct inode *);
1763 1763
1764/* hash.c */ 1764/* hash.c */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2c0972322009..ce766f974b1d 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -236,6 +236,27 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
236 } 236 }
237 offset += file->f_pos; 237 offset += file->f_pos;
238 break; 238 break;
239 case SEEK_DATA:
240 /*
241 * In the generic case the entire file is data, so as long as
242 * offset isn't at the end of the file then the offset is data.
243 */
244 if (offset >= inode->i_size) {
245 mutex_unlock(&inode->i_mutex);
246 return -ENXIO;
247 }
248 break;
249 case SEEK_HOLE:
250 /*
251 * There is a virtual hole at the end of the file, so as long as
252 * offset isn't i_size or larger, return i_size.
253 */
254 if (offset >= inode->i_size) {
255 mutex_unlock(&inode->i_mutex);
256 return -ENXIO;
257 }
258 offset = inode->i_size;
259 break;
239 } 260 }
240 261
241 if (offset < 0 || offset > maxbytes) { 262 if (offset < 0 || offset > maxbytes) {
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index ce66d2fe826c..da3bed3e0c29 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -151,6 +151,32 @@ static int ext4_sync_parent(struct inode *inode)
151 return ret; 151 return ret;
152} 152}
153 153
154/**
155 * __sync_file - generic_file_fsync without the locking and filemap_write
156 * @inode: inode to sync
157 * @datasync: only sync essential metadata if true
158 *
159 * This is just generic_file_fsync without the locking. This is needed for
160 * nojournal mode to make sure this inodes data/metadata makes it to disk
161 * properly. The i_mutex should be held already.
162 */
163static int __sync_inode(struct inode *inode, int datasync)
164{
165 int err;
166 int ret;
167
168 ret = sync_mapping_buffers(inode->i_mapping);
169 if (!(inode->i_state & I_DIRTY))
170 return ret;
171 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
172 return ret;
173
174 err = sync_inode_metadata(inode, 1);
175 if (ret == 0)
176 ret = err;
177 return ret;
178}
179
154/* 180/*
155 * akpm: A new design for ext4_sync_file(). 181 * akpm: A new design for ext4_sync_file().
156 * 182 *
@@ -165,7 +191,7 @@ static int ext4_sync_parent(struct inode *inode)
165 * i_mutex lock is held when entering and exiting this function 191 * i_mutex lock is held when entering and exiting this function
166 */ 192 */
167 193
168int ext4_sync_file(struct file *file, int datasync) 194int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
169{ 195{
170 struct inode *inode = file->f_mapping->host; 196 struct inode *inode = file->f_mapping->host;
171 struct ext4_inode_info *ei = EXT4_I(inode); 197 struct ext4_inode_info *ei = EXT4_I(inode);
@@ -178,15 +204,20 @@ int ext4_sync_file(struct file *file, int datasync)
178 204
179 trace_ext4_sync_file_enter(file, datasync); 205 trace_ext4_sync_file_enter(file, datasync);
180 206
207 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
208 if (ret)
209 return ret;
210 mutex_lock(&inode->i_mutex);
211
181 if (inode->i_sb->s_flags & MS_RDONLY) 212 if (inode->i_sb->s_flags & MS_RDONLY)
182 return 0; 213 goto out;
183 214
184 ret = ext4_flush_completed_IO(inode); 215 ret = ext4_flush_completed_IO(inode);
185 if (ret < 0) 216 if (ret < 0)
186 goto out; 217 goto out;
187 218
188 if (!journal) { 219 if (!journal) {
189 ret = generic_file_fsync(file, datasync); 220 ret = __sync_inode(inode, datasync);
190 if (!ret && !list_empty(&inode->i_dentry)) 221 if (!ret && !list_empty(&inode->i_dentry))
191 ret = ext4_sync_parent(inode); 222 ret = ext4_sync_parent(inode);
192 goto out; 223 goto out;
@@ -220,6 +251,7 @@ int ext4_sync_file(struct file *file, int datasync)
220 if (needs_barrier) 251 if (needs_barrier)
221 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 252 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
222 out: 253 out:
254 mutex_unlock(&inode->i_mutex);
223 trace_ext4_sync_file_exit(inode, ret); 255 trace_ext4_sync_file_exit(inode, ret);
224 return ret; 256 return ret;
225} 257}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e3126c051006..678cde834f19 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3501,10 +3501,8 @@ retry:
3501 offset, nr_segs, 3501 offset, nr_segs,
3502 ext4_get_block, NULL, NULL, 0); 3502 ext4_get_block, NULL, NULL, 0);
3503 else { 3503 else {
3504 ret = blockdev_direct_IO(rw, iocb, inode, 3504 ret = blockdev_direct_IO(rw, iocb, inode, iov,
3505 inode->i_sb->s_bdev, iov, 3505 offset, nr_segs, ext4_get_block);
3506 offset, nr_segs,
3507 ext4_get_block, NULL);
3508 3506
3509 if (unlikely((rw & WRITE) && ret < 0)) { 3507 if (unlikely((rw & WRITE) && ret < 0)) {
3510 loff_t isize = i_size_read(inode); 3508 loff_t isize = i_size_read(inode);
@@ -3575,6 +3573,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3575 ssize_t size, void *private, int ret, 3573 ssize_t size, void *private, int ret,
3576 bool is_async) 3574 bool is_async)
3577{ 3575{
3576 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
3578 ext4_io_end_t *io_end = iocb->private; 3577 ext4_io_end_t *io_end = iocb->private;
3579 struct workqueue_struct *wq; 3578 struct workqueue_struct *wq;
3580 unsigned long flags; 3579 unsigned long flags;
@@ -3596,6 +3595,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3596out: 3595out:
3597 if (is_async) 3596 if (is_async)
3598 aio_complete(iocb, ret, 0); 3597 aio_complete(iocb, ret, 0);
3598 inode_dio_done(inode);
3599 return; 3599 return;
3600 } 3600 }
3601 3601
@@ -3616,6 +3616,9 @@ out:
3616 /* queue the work to convert unwritten extents to written */ 3616 /* queue the work to convert unwritten extents to written */
3617 queue_work(wq, &io_end->work); 3617 queue_work(wq, &io_end->work);
3618 iocb->private = NULL; 3618 iocb->private = NULL;
3619
3620 /* XXX: probably should move into the real I/O completion handler */
3621 inode_dio_done(inode);
3619} 3622}
3620 3623
3621static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) 3624static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
@@ -3748,11 +3751,13 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3748 EXT4_I(inode)->cur_aio_dio = iocb->private; 3751 EXT4_I(inode)->cur_aio_dio = iocb->private;
3749 } 3752 }
3750 3753
3751 ret = blockdev_direct_IO(rw, iocb, inode, 3754 ret = __blockdev_direct_IO(rw, iocb, inode,
3752 inode->i_sb->s_bdev, iov, 3755 inode->i_sb->s_bdev, iov,
3753 offset, nr_segs, 3756 offset, nr_segs,
3754 ext4_get_block_write, 3757 ext4_get_block_write,
3755 ext4_end_io_dio); 3758 ext4_end_io_dio,
3759 NULL,
3760 DIO_LOCKING | DIO_SKIP_HOLES);
3756 if (iocb->private) 3761 if (iocb->private)
3757 EXT4_I(inode)->cur_aio_dio = NULL; 3762 EXT4_I(inode)->cur_aio_dio = NULL;
3758 /* 3763 /*
@@ -5351,6 +5356,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5351 } 5356 }
5352 5357
5353 if (attr->ia_valid & ATTR_SIZE) { 5358 if (attr->ia_valid & ATTR_SIZE) {
5359 inode_dio_wait(inode);
5360
5354 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 5361 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
5355 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 5362 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
5356 5363
@@ -5843,80 +5850,84 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5843 struct page *page = vmf->page; 5850 struct page *page = vmf->page;
5844 loff_t size; 5851 loff_t size;
5845 unsigned long len; 5852 unsigned long len;
5846 int ret = -EINVAL; 5853 int ret;
5847 void *fsdata;
5848 struct file *file = vma->vm_file; 5854 struct file *file = vma->vm_file;
5849 struct inode *inode = file->f_path.dentry->d_inode; 5855 struct inode *inode = file->f_path.dentry->d_inode;
5850 struct address_space *mapping = inode->i_mapping; 5856 struct address_space *mapping = inode->i_mapping;
5857 handle_t *handle;
5858 get_block_t *get_block;
5859 int retries = 0;
5851 5860
5852 /* 5861 /*
5853 * Get i_alloc_sem to stop truncates messing with the inode. We cannot 5862 * This check is racy but catches the common case. We rely on
5854 * get i_mutex because we are already holding mmap_sem. 5863 * __block_page_mkwrite() to do a reliable check.
5855 */ 5864 */
5856 down_read(&inode->i_alloc_sem); 5865 vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
5857 size = i_size_read(inode); 5866 /* Delalloc case is easy... */
5858 if (page->mapping != mapping || size <= page_offset(page) 5867 if (test_opt(inode->i_sb, DELALLOC) &&
5859 || !PageUptodate(page)) { 5868 !ext4_should_journal_data(inode) &&
5860 /* page got truncated from under us? */ 5869 !ext4_nonda_switch(inode->i_sb)) {
5861 goto out_unlock; 5870 do {
5871 ret = __block_page_mkwrite(vma, vmf,
5872 ext4_da_get_block_prep);
5873 } while (ret == -ENOSPC &&
5874 ext4_should_retry_alloc(inode->i_sb, &retries));
5875 goto out_ret;
5862 } 5876 }
5863 ret = 0;
5864 5877
5865 lock_page(page); 5878 lock_page(page);
5866 wait_on_page_writeback(page); 5879 size = i_size_read(inode);
5867 if (PageMappedToDisk(page)) { 5880 /* Page got truncated from under us? */
5868 up_read(&inode->i_alloc_sem); 5881 if (page->mapping != mapping || page_offset(page) > size) {
5869 return VM_FAULT_LOCKED; 5882 unlock_page(page);
5883 ret = VM_FAULT_NOPAGE;
5884 goto out;
5870 } 5885 }
5871 5886
5872 if (page->index == size >> PAGE_CACHE_SHIFT) 5887 if (page->index == size >> PAGE_CACHE_SHIFT)
5873 len = size & ~PAGE_CACHE_MASK; 5888 len = size & ~PAGE_CACHE_MASK;
5874 else 5889 else
5875 len = PAGE_CACHE_SIZE; 5890 len = PAGE_CACHE_SIZE;
5876
5877 /* 5891 /*
5878 * return if we have all the buffers mapped. This avoid 5892 * Return if we have all the buffers mapped. This avoids the need to do
5879 * the need to call write_begin/write_end which does a 5893 * journal_start/journal_stop which can block and take a long time
5880 * journal_start/journal_stop which can block and take
5881 * long time
5882 */ 5894 */
5883 if (page_has_buffers(page)) { 5895 if (page_has_buffers(page)) {
5884 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, 5896 if (!walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
5885 ext4_bh_unmapped)) { 5897 ext4_bh_unmapped)) {
5886 up_read(&inode->i_alloc_sem); 5898 /* Wait so that we don't change page under IO */
5887 return VM_FAULT_LOCKED; 5899 wait_on_page_writeback(page);
5900 ret = VM_FAULT_LOCKED;
5901 goto out;
5888 } 5902 }
5889 } 5903 }
5890 unlock_page(page); 5904 unlock_page(page);
5891 /* 5905 /* OK, we need to fill the hole... */
5892 * OK, we need to fill the hole... Do write_begin write_end 5906 if (ext4_should_dioread_nolock(inode))
5893 * to do block allocation/reservation.We are not holding 5907 get_block = ext4_get_block_write;
5894 * inode.i__mutex here. That allow * parallel write_begin, 5908 else
5895 * write_end call. lock_page prevent this from happening 5909 get_block = ext4_get_block;
5896 * on the same page though 5910retry_alloc:
5897 */ 5911 handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
5898 ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), 5912 if (IS_ERR(handle)) {
5899 len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata);
5900 if (ret < 0)
5901 goto out_unlock;
5902 ret = mapping->a_ops->write_end(file, mapping, page_offset(page),
5903 len, len, page, fsdata);
5904 if (ret < 0)
5905 goto out_unlock;
5906 ret = 0;
5907
5908 /*
5909 * write_begin/end might have created a dirty page and someone
5910 * could wander in and start the IO. Make sure that hasn't
5911 * happened.
5912 */
5913 lock_page(page);
5914 wait_on_page_writeback(page);
5915 up_read(&inode->i_alloc_sem);
5916 return VM_FAULT_LOCKED;
5917out_unlock:
5918 if (ret)
5919 ret = VM_FAULT_SIGBUS; 5913 ret = VM_FAULT_SIGBUS;
5920 up_read(&inode->i_alloc_sem); 5914 goto out;
5915 }
5916 ret = __block_page_mkwrite(vma, vmf, get_block);
5917 if (!ret && ext4_should_journal_data(inode)) {
5918 if (walk_page_buffers(handle, page_buffers(page), 0,
5919 PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
5920 unlock_page(page);
5921 ret = VM_FAULT_SIGBUS;
5922 goto out;
5923 }
5924 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
5925 }
5926 ext4_journal_stop(handle);
5927 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
5928 goto retry_alloc;
5929out_ret:
5930 ret = block_page_mkwrite_return(ret);
5931out:
5921 return ret; 5932 return ret;
5922} 5933}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b754b7721f51..707d605bf769 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1037,15 +1037,11 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru
1037 return ERR_PTR(-EIO); 1037 return ERR_PTR(-EIO);
1038 } 1038 }
1039 inode = ext4_iget(dir->i_sb, ino); 1039 inode = ext4_iget(dir->i_sb, ino);
1040 if (IS_ERR(inode)) { 1040 if (inode == ERR_PTR(-ESTALE)) {
1041 if (PTR_ERR(inode) == -ESTALE) { 1041 EXT4_ERROR_INODE(dir,
1042 EXT4_ERROR_INODE(dir, 1042 "deleted inode referenced: %u",
1043 "deleted inode referenced: %u", 1043 ino);
1044 ino); 1044 return ERR_PTR(-EIO);
1045 return ERR_PTR(-EIO);
1046 } else {
1047 return ERR_CAST(inode);
1048 }
1049 } 1045 }
1050 } 1046 }
1051 return d_splice_alias(inode, dentry); 1047 return d_splice_alias(inode, dentry);
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 8276cc282dec..a5d3853822e0 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -109,6 +109,7 @@ struct msdos_inode_info {
109 int i_attrs; /* unused attribute bits */ 109 int i_attrs; /* unused attribute bits */
110 loff_t i_pos; /* on-disk position of directory entry or 0 */ 110 loff_t i_pos; /* on-disk position of directory entry or 0 */
111 struct hlist_node i_fat_hash; /* hash by i_location */ 111 struct hlist_node i_fat_hash; /* hash by i_location */
112 struct rw_semaphore truncate_lock; /* protect bmap against truncate */
112 struct inode vfs_inode; 113 struct inode vfs_inode;
113}; 114};
114 115
@@ -309,7 +310,8 @@ extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
309extern void fat_truncate_blocks(struct inode *inode, loff_t offset); 310extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
310extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, 311extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
311 struct kstat *stat); 312 struct kstat *stat);
312extern int fat_file_fsync(struct file *file, int datasync); 313extern int fat_file_fsync(struct file *file, loff_t start, loff_t end,
314 int datasync);
313 315
314/* fat/inode.c */ 316/* fat/inode.c */
315extern void fat_attach(struct inode *inode, loff_t i_pos); 317extern void fat_attach(struct inode *inode, loff_t i_pos);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 7018e1d8902d..c118acf16e43 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -149,12 +149,12 @@ static int fat_file_release(struct inode *inode, struct file *filp)
149 return 0; 149 return 0;
150} 150}
151 151
152int fat_file_fsync(struct file *filp, int datasync) 152int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
153{ 153{
154 struct inode *inode = filp->f_mapping->host; 154 struct inode *inode = filp->f_mapping->host;
155 int res, err; 155 int res, err;
156 156
157 res = generic_file_fsync(filp, datasync); 157 res = generic_file_fsync(filp, start, end, datasync);
158 err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping); 158 err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping);
159 159
160 return res ? res : err; 160 return res ? res : err;
@@ -397,6 +397,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
397 * sequence. 397 * sequence.
398 */ 398 */
399 if (attr->ia_valid & ATTR_SIZE) { 399 if (attr->ia_valid & ATTR_SIZE) {
400 inode_dio_wait(inode);
401
400 if (attr->ia_size > inode->i_size) { 402 if (attr->ia_size > inode->i_size) {
401 error = fat_cont_expand(inode, attr->ia_size); 403 error = fat_cont_expand(inode, attr->ia_size);
402 if (error || attr->ia_valid == ATTR_SIZE) 404 if (error || attr->ia_valid == ATTR_SIZE)
@@ -429,8 +431,10 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
429 } 431 }
430 432
431 if (attr->ia_valid & ATTR_SIZE) { 433 if (attr->ia_valid & ATTR_SIZE) {
434 down_write(&MSDOS_I(inode)->truncate_lock);
432 truncate_setsize(inode, attr->ia_size); 435 truncate_setsize(inode, attr->ia_size);
433 fat_truncate_blocks(inode, attr->ia_size); 436 fat_truncate_blocks(inode, attr->ia_size);
437 up_write(&MSDOS_I(inode)->truncate_lock);
434 } 438 }
435 439
436 setattr_copy(inode, attr); 440 setattr_copy(inode, attr);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index cb8d8391ac0b..5942fec22c65 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -211,8 +211,8 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
211 * FAT need to use the DIO_LOCKING for avoiding the race 211 * FAT need to use the DIO_LOCKING for avoiding the race
212 * condition of fat_get_block() and ->truncate(). 212 * condition of fat_get_block() and ->truncate().
213 */ 213 */
214 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, 214 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
215 iov, offset, nr_segs, fat_get_block, NULL); 215 fat_get_block);
216 if (ret < 0 && (rw & WRITE)) 216 if (ret < 0 && (rw & WRITE))
217 fat_write_failed(mapping, offset + iov_length(iov, nr_segs)); 217 fat_write_failed(mapping, offset + iov_length(iov, nr_segs));
218 218
@@ -224,9 +224,9 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
224 sector_t blocknr; 224 sector_t blocknr;
225 225
226 /* fat_get_cluster() assumes the requested blocknr isn't truncated. */ 226 /* fat_get_cluster() assumes the requested blocknr isn't truncated. */
227 down_read(&mapping->host->i_alloc_sem); 227 down_read(&MSDOS_I(mapping->host)->truncate_lock);
228 blocknr = generic_block_bmap(mapping, block, fat_get_block); 228 blocknr = generic_block_bmap(mapping, block, fat_get_block);
229 up_read(&mapping->host->i_alloc_sem); 229 up_read(&MSDOS_I(mapping->host)->truncate_lock);
230 230
231 return blocknr; 231 return blocknr;
232} 232}
@@ -510,6 +510,8 @@ static struct inode *fat_alloc_inode(struct super_block *sb)
510 ei = kmem_cache_alloc(fat_inode_cachep, GFP_NOFS); 510 ei = kmem_cache_alloc(fat_inode_cachep, GFP_NOFS);
511 if (!ei) 511 if (!ei)
512 return NULL; 512 return NULL;
513
514 init_rwsem(&ei->truncate_lock);
513 return &ei->vfs_inode; 515 return &ei->vfs_inode;
514} 516}
515 517
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 3b222dafd15b..66e83b845455 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -209,29 +209,20 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry,
209 int err; 209 int err;
210 210
211 lock_super(sb); 211 lock_super(sb);
212
213 err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); 212 err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo);
214 if (err) { 213 switch (err) {
215 if (err == -ENOENT) { 214 case -ENOENT:
216 inode = NULL; 215 inode = NULL;
217 goto out; 216 break;
218 } 217 case 0:
219 goto error; 218 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
220 } 219 brelse(sinfo.bh);
221 220 break;
222 inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); 221 default:
223 brelse(sinfo.bh); 222 inode = ERR_PTR(err);
224 if (IS_ERR(inode)) {
225 err = PTR_ERR(inode);
226 goto error;
227 } 223 }
228out:
229 unlock_super(sb); 224 unlock_super(sb);
230 return d_splice_alias(inode, dentry); 225 return d_splice_alias(inode, dentry);
231
232error:
233 unlock_super(sb);
234 return ERR_PTR(err);
235} 226}
236 227
237/***** Creates a directory entry (name is already formatted). */ 228/***** Creates a directory entry (name is already formatted). */
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 20b4ea53fdc4..bb3f29c3557b 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -82,10 +82,8 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd)
82 * case sensitive name which is specified by user if this is 82 * case sensitive name which is specified by user if this is
83 * for creation. 83 * for creation.
84 */ 84 */
85 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { 85 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
86 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) 86 return 0;
87 return 0;
88 }
89 87
90 return vfat_revalidate_shortname(dentry); 88 return vfat_revalidate_shortname(dentry);
91} 89}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 0f015a0468de..b8c507ca42f7 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -461,32 +461,6 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
461} 461}
462 462
463/* 463/*
464 * For background writeback the caller does not have the sb pinned
465 * before calling writeback. So make sure that we do pin it, so it doesn't
466 * go away while we are writing inodes from it.
467 */
468static bool pin_sb_for_writeback(struct super_block *sb)
469{
470 spin_lock(&sb_lock);
471 if (list_empty(&sb->s_instances)) {
472 spin_unlock(&sb_lock);
473 return false;
474 }
475
476 sb->s_count++;
477 spin_unlock(&sb_lock);
478
479 if (down_read_trylock(&sb->s_umount)) {
480 if (sb->s_root)
481 return true;
482 up_read(&sb->s_umount);
483 }
484
485 put_super(sb);
486 return false;
487}
488
489/*
490 * Write a portion of b_io inodes which belong to @sb. 464 * Write a portion of b_io inodes which belong to @sb.
491 * 465 *
492 * If @only_this_sb is true, then find and write all such 466 * If @only_this_sb is true, then find and write all such
@@ -585,7 +559,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
585 struct inode *inode = wb_inode(wb->b_io.prev); 559 struct inode *inode = wb_inode(wb->b_io.prev);
586 struct super_block *sb = inode->i_sb; 560 struct super_block *sb = inode->i_sb;
587 561
588 if (!pin_sb_for_writeback(sb)) { 562 if (!grab_super_passive(sb)) {
589 requeue_io(inode); 563 requeue_io(inode);
590 continue; 564 continue;
591 } 565 }
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 2f343b4d7a7d..3f7a59bfa7ad 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -976,16 +976,12 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
976 976
977 pagevec_init(&pvec, 0); 977 pagevec_init(&pvec, 0);
978 next = 0; 978 next = 0;
979 while (next <= (loff_t)-1 && 979 do {
980 pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE) 980 if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE))
981 ) { 981 break;
982 for (i = 0; i < pagevec_count(&pvec); i++) { 982 for (i = 0; i < pagevec_count(&pvec); i++) {
983 struct page *page = pvec.pages[i]; 983 struct page *page = pvec.pages[i];
984 pgoff_t page_index = page->index; 984 next = page->index;
985
986 ASSERTCMP(page_index, >=, next);
987 next = page_index + 1;
988
989 if (PageFsCache(page)) { 985 if (PageFsCache(page)) {
990 __fscache_wait_on_page_write(cookie, page); 986 __fscache_wait_on_page_write(cookie, page);
991 __fscache_uncache_page(cookie, page); 987 __fscache_uncache_page(cookie, page);
@@ -993,7 +989,7 @@ void __fscache_uncache_all_inode_pages(struct fscache_cookie *cookie,
993 } 989 }
994 pagevec_release(&pvec); 990 pagevec_release(&pvec);
995 cond_resched(); 991 cond_resched();
996 } 992 } while (++next);
997 993
998 _leave(""); 994 _leave("");
999} 995}
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index d50160714595..9f63e493a9b6 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -382,7 +382,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
382 struct fuse_entry_out outentry; 382 struct fuse_entry_out outentry;
383 struct fuse_file *ff; 383 struct fuse_file *ff;
384 struct file *file; 384 struct file *file;
385 int flags = nd->intent.open.flags - 1; 385 int flags = nd->intent.open.flags;
386 386
387 if (fc->no_create) 387 if (fc->no_create)
388 return -ENOSYS; 388 return -ENOSYS;
@@ -576,7 +576,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, int mode,
576static int fuse_create(struct inode *dir, struct dentry *entry, int mode, 576static int fuse_create(struct inode *dir, struct dentry *entry, int mode,
577 struct nameidata *nd) 577 struct nameidata *nd)
578{ 578{
579 if (nd && (nd->flags & LOOKUP_OPEN)) { 579 if (nd) {
580 int err = fuse_create_open(dir, entry, mode, nd); 580 int err = fuse_create_open(dir, entry, mode, nd);
581 if (err != -ENOSYS) 581 if (err != -ENOSYS)
582 return err; 582 return err;
@@ -971,9 +971,9 @@ static int fuse_access(struct inode *inode, int mask)
971 return err; 971 return err;
972} 972}
973 973
974static int fuse_perm_getattr(struct inode *inode, int flags) 974static int fuse_perm_getattr(struct inode *inode, int mask)
975{ 975{
976 if (flags & IPERM_FLAG_RCU) 976 if (mask & MAY_NOT_BLOCK)
977 return -ECHILD; 977 return -ECHILD;
978 978
979 return fuse_do_getattr(inode, NULL, NULL); 979 return fuse_do_getattr(inode, NULL, NULL);
@@ -992,7 +992,7 @@ static int fuse_perm_getattr(struct inode *inode, int flags)
992 * access request is sent. Execute permission is still checked 992 * access request is sent. Execute permission is still checked
993 * locally based on file mode. 993 * locally based on file mode.
994 */ 994 */
995static int fuse_permission(struct inode *inode, int mask, unsigned int flags) 995static int fuse_permission(struct inode *inode, int mask)
996{ 996{
997 struct fuse_conn *fc = get_fuse_conn(inode); 997 struct fuse_conn *fc = get_fuse_conn(inode);
998 bool refreshed = false; 998 bool refreshed = false;
@@ -1011,23 +1011,22 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1011 if (fi->i_time < get_jiffies_64()) { 1011 if (fi->i_time < get_jiffies_64()) {
1012 refreshed = true; 1012 refreshed = true;
1013 1013
1014 err = fuse_perm_getattr(inode, flags); 1014 err = fuse_perm_getattr(inode, mask);
1015 if (err) 1015 if (err)
1016 return err; 1016 return err;
1017 } 1017 }
1018 } 1018 }
1019 1019
1020 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) { 1020 if (fc->flags & FUSE_DEFAULT_PERMISSIONS) {
1021 err = generic_permission(inode, mask, flags, NULL); 1021 err = generic_permission(inode, mask);
1022 1022
1023 /* If permission is denied, try to refresh file 1023 /* If permission is denied, try to refresh file
1024 attributes. This is also needed, because the root 1024 attributes. This is also needed, because the root
1025 node will at first have no permissions */ 1025 node will at first have no permissions */
1026 if (err == -EACCES && !refreshed) { 1026 if (err == -EACCES && !refreshed) {
1027 err = fuse_perm_getattr(inode, flags); 1027 err = fuse_perm_getattr(inode, mask);
1028 if (!err) 1028 if (!err)
1029 err = generic_permission(inode, mask, 1029 err = generic_permission(inode, mask);
1030 flags, NULL);
1031 } 1030 }
1032 1031
1033 /* Note: the opposite of the above test does not 1032 /* Note: the opposite of the above test does not
@@ -1035,7 +1034,7 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1035 noticed immediately, only after the attribute 1034 noticed immediately, only after the attribute
1036 timeout has expired */ 1035 timeout has expired */
1037 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) { 1036 } else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1038 if (flags & IPERM_FLAG_RCU) 1037 if (mask & MAY_NOT_BLOCK)
1039 return -ECHILD; 1038 return -ECHILD;
1040 1039
1041 err = fuse_access(inode, mask); 1040 err = fuse_access(inode, mask);
@@ -1044,7 +1043,7 @@ static int fuse_permission(struct inode *inode, int mask, unsigned int flags)
1044 if (refreshed) 1043 if (refreshed)
1045 return -EACCES; 1044 return -EACCES;
1046 1045
1047 err = fuse_perm_getattr(inode, flags); 1046 err = fuse_perm_getattr(inode, mask);
1048 if (!err && !(inode->i_mode & S_IXUGO)) 1047 if (!err && !(inode->i_mode & S_IXUGO))
1049 return -EACCES; 1048 return -EACCES;
1050 } 1049 }
@@ -1177,9 +1176,10 @@ static int fuse_dir_release(struct inode *inode, struct file *file)
1177 return 0; 1176 return 0;
1178} 1177}
1179 1178
1180static int fuse_dir_fsync(struct file *file, int datasync) 1179static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
1180 int datasync)
1181{ 1181{
1182 return fuse_fsync_common(file, datasync, 1); 1182 return fuse_fsync_common(file, start, end, datasync, 1);
1183} 1183}
1184 1184
1185static bool update_mtime(unsigned ivalid) 1185static bool update_mtime(unsigned ivalid)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 82a66466a24c..7bb685cdd00c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -400,7 +400,8 @@ static void fuse_sync_writes(struct inode *inode)
400 fuse_release_nowrite(inode); 400 fuse_release_nowrite(inode);
401} 401}
402 402
403int fuse_fsync_common(struct file *file, int datasync, int isdir) 403int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
404 int datasync, int isdir)
404{ 405{
405 struct inode *inode = file->f_mapping->host; 406 struct inode *inode = file->f_mapping->host;
406 struct fuse_conn *fc = get_fuse_conn(inode); 407 struct fuse_conn *fc = get_fuse_conn(inode);
@@ -412,9 +413,15 @@ int fuse_fsync_common(struct file *file, int datasync, int isdir)
412 if (is_bad_inode(inode)) 413 if (is_bad_inode(inode))
413 return -EIO; 414 return -EIO;
414 415
416 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
417 if (err)
418 return err;
419
415 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir)) 420 if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
416 return 0; 421 return 0;
417 422
423 mutex_lock(&inode->i_mutex);
424
418 /* 425 /*
419 * Start writeback against all dirty pages of the inode, then 426 * Start writeback against all dirty pages of the inode, then
420 * wait for all outstanding writes, before sending the FSYNC 427 * wait for all outstanding writes, before sending the FSYNC
@@ -422,13 +429,15 @@ int fuse_fsync_common(struct file *file, int datasync, int isdir)
422 */ 429 */
423 err = write_inode_now(inode, 0); 430 err = write_inode_now(inode, 0);
424 if (err) 431 if (err)
425 return err; 432 goto out;
426 433
427 fuse_sync_writes(inode); 434 fuse_sync_writes(inode);
428 435
429 req = fuse_get_req(fc); 436 req = fuse_get_req(fc);
430 if (IS_ERR(req)) 437 if (IS_ERR(req)) {
431 return PTR_ERR(req); 438 err = PTR_ERR(req);
439 goto out;
440 }
432 441
433 memset(&inarg, 0, sizeof(inarg)); 442 memset(&inarg, 0, sizeof(inarg));
434 inarg.fh = ff->fh; 443 inarg.fh = ff->fh;
@@ -448,12 +457,15 @@ int fuse_fsync_common(struct file *file, int datasync, int isdir)
448 fc->no_fsync = 1; 457 fc->no_fsync = 1;
449 err = 0; 458 err = 0;
450 } 459 }
460out:
461 mutex_unlock(&inode->i_mutex);
451 return err; 462 return err;
452} 463}
453 464
454static int fuse_fsync(struct file *file, int datasync) 465static int fuse_fsync(struct file *file, loff_t start, loff_t end,
466 int datasync)
455{ 467{
456 return fuse_fsync_common(file, datasync, 0); 468 return fuse_fsync_common(file, start, end, datasync, 0);
457} 469}
458 470
459void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos, 471void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
@@ -1600,15 +1612,32 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
1600 struct inode *inode = file->f_path.dentry->d_inode; 1612 struct inode *inode = file->f_path.dentry->d_inode;
1601 1613
1602 mutex_lock(&inode->i_mutex); 1614 mutex_lock(&inode->i_mutex);
1603 switch (origin) { 1615 if (origin != SEEK_CUR || origin != SEEK_SET) {
1604 case SEEK_END:
1605 retval = fuse_update_attributes(inode, NULL, file, NULL); 1616 retval = fuse_update_attributes(inode, NULL, file, NULL);
1606 if (retval) 1617 if (retval)
1607 goto exit; 1618 goto exit;
1619 }
1620
1621 switch (origin) {
1622 case SEEK_END:
1608 offset += i_size_read(inode); 1623 offset += i_size_read(inode);
1609 break; 1624 break;
1610 case SEEK_CUR: 1625 case SEEK_CUR:
1611 offset += file->f_pos; 1626 offset += file->f_pos;
1627 break;
1628 case SEEK_DATA:
1629 if (offset >= i_size_read(inode)) {
1630 retval = -ENXIO;
1631 goto exit;
1632 }
1633 break;
1634 case SEEK_HOLE:
1635 if (offset >= i_size_read(inode)) {
1636 retval = -ENXIO;
1637 goto exit;
1638 }
1639 offset = i_size_read(inode);
1640 break;
1612 } 1641 }
1613 retval = -EINVAL; 1642 retval = -EINVAL;
1614 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { 1643 if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index b788becada76..c6aa2d4b8517 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -589,7 +589,8 @@ void fuse_release_common(struct file *file, int opcode);
589/** 589/**
590 * Send FSYNC or FSYNCDIR request 590 * Send FSYNC or FSYNCDIR request
591 */ 591 */
592int fuse_fsync_common(struct file *file, int datasync, int isdir); 592int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
593 int datasync, int isdir);
593 594
594/** 595/**
595 * Notify poll wakeup 596 * Notify poll wakeup
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 8f26d1a58912..70e90b4974ce 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -190,9 +190,9 @@ generic_acl_chmod(struct inode *inode)
190} 190}
191 191
192int 192int
193generic_check_acl(struct inode *inode, int mask, unsigned int flags) 193generic_check_acl(struct inode *inode, int mask)
194{ 194{
195 if (flags & IPERM_FLAG_RCU) { 195 if (mask & MAY_NOT_BLOCK) {
196 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 196 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
197 return -ECHILD; 197 return -ECHILD;
198 } else { 198 } else {
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index cbc07155b1a0..8ef1079f1665 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -75,12 +75,12 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type)
75 * Returns: errno 75 * Returns: errno
76 */ 76 */
77 77
78int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags) 78int gfs2_check_acl(struct inode *inode, int mask)
79{ 79{
80 struct posix_acl *acl; 80 struct posix_acl *acl;
81 int error; 81 int error;
82 82
83 if (flags & IPERM_FLAG_RCU) { 83 if (mask & MAY_NOT_BLOCK) {
84 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 84 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
85 return -ECHILD; 85 return -ECHILD;
86 return -EAGAIN; 86 return -EAGAIN;
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index a93907c8159b..b522b0cb39ea 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -16,7 +16,7 @@
16#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" 16#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
17#define GFS2_ACL_MAX_ENTRIES 25 17#define GFS2_ACL_MAX_ENTRIES 25
18 18
19extern int gfs2_check_acl(struct inode *inode, int mask, unsigned int); 19extern int gfs2_check_acl(struct inode *inode, int mask);
20extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); 20extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode);
21extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); 21extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
22extern const struct xattr_handler gfs2_xattr_system_handler; 22extern const struct xattr_handler gfs2_xattr_system_handler;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index e65493a8ac00..7878c473ae62 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -854,11 +854,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
854 blen++; 854 blen++;
855 else { 855 else {
856 if (bstart) { 856 if (bstart) {
857 if (metadata) 857 __gfs2_free_blocks(ip, bstart, blen, metadata);
858 __gfs2_free_meta(ip, bstart, blen);
859 else
860 __gfs2_free_data(ip, bstart, blen);
861
862 btotal += blen; 858 btotal += blen;
863 } 859 }
864 860
@@ -870,11 +866,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
870 gfs2_add_inode_blocks(&ip->i_inode, -1); 866 gfs2_add_inode_blocks(&ip->i_inode, -1);
871 } 867 }
872 if (bstart) { 868 if (bstart) {
873 if (metadata) 869 __gfs2_free_blocks(ip, bstart, blen, metadata);
874 __gfs2_free_meta(ip, bstart, blen);
875 else
876 __gfs2_free_data(ip, bstart, blen);
877
878 btotal += blen; 870 btotal += blen;
879 } 871 }
880 872
@@ -1224,6 +1216,8 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
1224 if (ret) 1216 if (ret)
1225 return ret; 1217 return ret;
1226 1218
1219 inode_dio_wait(inode);
1220
1227 oldsize = inode->i_size; 1221 oldsize = inode->i_size;
1228 if (newsize >= oldsize) 1222 if (newsize >= oldsize)
1229 return do_grow(inode, newsize); 1223 return do_grow(inode, newsize);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 091ee4779538..1cc2f8ec52a2 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -339,6 +339,67 @@ fail:
339 return (copied) ? copied : error; 339 return (copied) ? copied : error;
340} 340}
341 341
342/**
343 * gfs2_dir_get_hash_table - Get pointer to the dir hash table
344 * @ip: The inode in question
345 *
346 * Returns: The hash table or an error
347 */
348
349static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
350{
351 struct inode *inode = &ip->i_inode;
352 int ret;
353 u32 hsize;
354 __be64 *hc;
355
356 BUG_ON(!(ip->i_diskflags & GFS2_DIF_EXHASH));
357
358 hc = ip->i_hash_cache;
359 if (hc)
360 return hc;
361
362 hsize = 1 << ip->i_depth;
363 hsize *= sizeof(__be64);
364 if (hsize != i_size_read(&ip->i_inode)) {
365 gfs2_consist_inode(ip);
366 return ERR_PTR(-EIO);
367 }
368
369 hc = kmalloc(hsize, GFP_NOFS);
370 ret = -ENOMEM;
371 if (hc == NULL)
372 return ERR_PTR(-ENOMEM);
373
374 ret = gfs2_dir_read_data(ip, (char *)hc, 0, hsize, 1);
375 if (ret < 0) {
376 kfree(hc);
377 return ERR_PTR(ret);
378 }
379
380 spin_lock(&inode->i_lock);
381 if (ip->i_hash_cache)
382 kfree(hc);
383 else
384 ip->i_hash_cache = hc;
385 spin_unlock(&inode->i_lock);
386
387 return ip->i_hash_cache;
388}
389
390/**
391 * gfs2_dir_hash_inval - Invalidate dir hash
392 * @ip: The directory inode
393 *
394 * Must be called with an exclusive glock, or during glock invalidation.
395 */
396void gfs2_dir_hash_inval(struct gfs2_inode *ip)
397{
398 __be64 *hc = ip->i_hash_cache;
399 ip->i_hash_cache = NULL;
400 kfree(hc);
401}
402
342static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent) 403static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
343{ 404{
344 return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0; 405 return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0;
@@ -686,17 +747,12 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no,
686static int get_leaf_nr(struct gfs2_inode *dip, u32 index, 747static int get_leaf_nr(struct gfs2_inode *dip, u32 index,
687 u64 *leaf_out) 748 u64 *leaf_out)
688{ 749{
689 __be64 leaf_no; 750 __be64 *hash;
690 int error;
691
692 error = gfs2_dir_read_data(dip, (char *)&leaf_no,
693 index * sizeof(__be64),
694 sizeof(__be64), 0);
695 if (error != sizeof(u64))
696 return (error < 0) ? error : -EIO;
697
698 *leaf_out = be64_to_cpu(leaf_no);
699 751
752 hash = gfs2_dir_get_hash_table(dip);
753 if (IS_ERR(hash))
754 return PTR_ERR(hash);
755 *leaf_out = be64_to_cpu(*(hash + index));
700 return 0; 756 return 0;
701} 757}
702 758
@@ -966,6 +1022,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
966 for (x = 0; x < half_len; x++) 1022 for (x = 0; x < half_len; x++)
967 lp[x] = cpu_to_be64(bn); 1023 lp[x] = cpu_to_be64(bn);
968 1024
1025 gfs2_dir_hash_inval(dip);
1026
969 error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64), 1027 error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64),
970 half_len * sizeof(u64)); 1028 half_len * sizeof(u64));
971 if (error != half_len * sizeof(u64)) { 1029 if (error != half_len * sizeof(u64)) {
@@ -1052,70 +1110,54 @@ fail_brelse:
1052 1110
1053static int dir_double_exhash(struct gfs2_inode *dip) 1111static int dir_double_exhash(struct gfs2_inode *dip)
1054{ 1112{
1055 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1056 struct buffer_head *dibh; 1113 struct buffer_head *dibh;
1057 u32 hsize; 1114 u32 hsize;
1058 u64 *buf; 1115 u32 hsize_bytes;
1059 u64 *from, *to; 1116 __be64 *hc;
1060 u64 block; 1117 __be64 *hc2, *h;
1061 u64 disksize = i_size_read(&dip->i_inode);
1062 int x; 1118 int x;
1063 int error = 0; 1119 int error = 0;
1064 1120
1065 hsize = 1 << dip->i_depth; 1121 hsize = 1 << dip->i_depth;
1066 if (hsize * sizeof(u64) != disksize) { 1122 hsize_bytes = hsize * sizeof(__be64);
1067 gfs2_consist_inode(dip);
1068 return -EIO;
1069 }
1070 1123
1071 /* Allocate both the "from" and "to" buffers in one big chunk */ 1124 hc = gfs2_dir_get_hash_table(dip);
1125 if (IS_ERR(hc))
1126 return PTR_ERR(hc);
1072 1127
1073 buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS); 1128 h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS);
1074 if (!buf) 1129 if (!hc2)
1075 return -ENOMEM; 1130 return -ENOMEM;
1076 1131
1077 for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) { 1132 error = gfs2_meta_inode_buffer(dip, &dibh);
1078 error = gfs2_dir_read_data(dip, (char *)buf, 1133 if (error)
1079 block * sdp->sd_hash_bsize, 1134 goto out_kfree;
1080 sdp->sd_hash_bsize, 1);
1081 if (error != sdp->sd_hash_bsize) {
1082 if (error >= 0)
1083 error = -EIO;
1084 goto fail;
1085 }
1086
1087 from = buf;
1088 to = (u64 *)((char *)buf + sdp->sd_hash_bsize);
1089
1090 for (x = sdp->sd_hash_ptrs; x--; from++) {
1091 *to++ = *from; /* No endianess worries */
1092 *to++ = *from;
1093 }
1094 1135
1095 error = gfs2_dir_write_data(dip, 1136 for (x = 0; x < hsize; x++) {
1096 (char *)buf + sdp->sd_hash_bsize, 1137 *h++ = *hc;
1097 block * sdp->sd_sb.sb_bsize, 1138 *h++ = *hc;
1098 sdp->sd_sb.sb_bsize); 1139 hc++;
1099 if (error != sdp->sd_sb.sb_bsize) {
1100 if (error >= 0)
1101 error = -EIO;
1102 goto fail;
1103 }
1104 } 1140 }
1105 1141
1106 kfree(buf); 1142 error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2);
1107 1143 if (error != (hsize_bytes * 2))
1108 error = gfs2_meta_inode_buffer(dip, &dibh); 1144 goto fail;
1109 if (!gfs2_assert_withdraw(sdp, !error)) {
1110 dip->i_depth++;
1111 gfs2_dinode_out(dip, dibh->b_data);
1112 brelse(dibh);
1113 }
1114 1145
1115 return error; 1146 gfs2_dir_hash_inval(dip);
1147 dip->i_hash_cache = hc2;
1148 dip->i_depth++;
1149 gfs2_dinode_out(dip, dibh->b_data);
1150 brelse(dibh);
1151 return 0;
1116 1152
1117fail: 1153fail:
1118 kfree(buf); 1154 /* Replace original hash table & size */
1155 gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes);
1156 i_size_write(&dip->i_inode, hsize_bytes);
1157 gfs2_dinode_out(dip, dibh->b_data);
1158 brelse(dibh);
1159out_kfree:
1160 kfree(hc2);
1119 return error; 1161 return error;
1120} 1162}
1121 1163
@@ -1348,6 +1390,7 @@ out:
1348 return error; 1390 return error;
1349} 1391}
1350 1392
1393
1351/** 1394/**
1352 * dir_e_read - Reads the entries from a directory into a filldir buffer 1395 * dir_e_read - Reads the entries from a directory into a filldir buffer
1353 * @dip: dinode pointer 1396 * @dip: dinode pointer
@@ -1362,9 +1405,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1362 filldir_t filldir) 1405 filldir_t filldir)
1363{ 1406{
1364 struct gfs2_inode *dip = GFS2_I(inode); 1407 struct gfs2_inode *dip = GFS2_I(inode);
1365 struct gfs2_sbd *sdp = GFS2_SB(inode);
1366 u32 hsize, len = 0; 1408 u32 hsize, len = 0;
1367 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1368 u32 hash, index; 1409 u32 hash, index;
1369 __be64 *lp; 1410 __be64 *lp;
1370 int copied = 0; 1411 int copied = 0;
@@ -1372,37 +1413,17 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1372 unsigned depth = 0; 1413 unsigned depth = 0;
1373 1414
1374 hsize = 1 << dip->i_depth; 1415 hsize = 1 << dip->i_depth;
1375 if (hsize * sizeof(u64) != i_size_read(inode)) {
1376 gfs2_consist_inode(dip);
1377 return -EIO;
1378 }
1379
1380 hash = gfs2_dir_offset2hash(*offset); 1416 hash = gfs2_dir_offset2hash(*offset);
1381 index = hash >> (32 - dip->i_depth); 1417 index = hash >> (32 - dip->i_depth);
1382 1418
1383 lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); 1419 lp = gfs2_dir_get_hash_table(dip);
1384 if (!lp) 1420 if (IS_ERR(lp))
1385 return -ENOMEM; 1421 return PTR_ERR(lp);
1386 1422
1387 while (index < hsize) { 1423 while (index < hsize) {
1388 lp_offset = index & (sdp->sd_hash_ptrs - 1);
1389 ht_offset = index - lp_offset;
1390
1391 if (ht_offset_cur != ht_offset) {
1392 error = gfs2_dir_read_data(dip, (char *)lp,
1393 ht_offset * sizeof(__be64),
1394 sdp->sd_hash_bsize, 1);
1395 if (error != sdp->sd_hash_bsize) {
1396 if (error >= 0)
1397 error = -EIO;
1398 goto out;
1399 }
1400 ht_offset_cur = ht_offset;
1401 }
1402
1403 error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, 1424 error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
1404 &copied, &depth, 1425 &copied, &depth,
1405 be64_to_cpu(lp[lp_offset])); 1426 be64_to_cpu(lp[index]));
1406 if (error) 1427 if (error)
1407 break; 1428 break;
1408 1429
@@ -1410,8 +1431,6 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
1410 index = (index & ~(len - 1)) + len; 1431 index = (index & ~(len - 1)) + len;
1411 } 1432 }
1412 1433
1413out:
1414 kfree(lp);
1415 if (error > 0) 1434 if (error > 0)
1416 error = 0; 1435 error = 0;
1417 return error; 1436 return error;
@@ -1914,43 +1933,22 @@ out:
1914 1933
1915int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip) 1934int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
1916{ 1935{
1917 struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
1918 struct buffer_head *bh; 1936 struct buffer_head *bh;
1919 struct gfs2_leaf *leaf; 1937 struct gfs2_leaf *leaf;
1920 u32 hsize, len; 1938 u32 hsize, len;
1921 u32 ht_offset, lp_offset, ht_offset_cur = -1;
1922 u32 index = 0, next_index; 1939 u32 index = 0, next_index;
1923 __be64 *lp; 1940 __be64 *lp;
1924 u64 leaf_no; 1941 u64 leaf_no;
1925 int error = 0, last; 1942 int error = 0, last;
1926 1943
1927 hsize = 1 << dip->i_depth; 1944 hsize = 1 << dip->i_depth;
1928 if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
1929 gfs2_consist_inode(dip);
1930 return -EIO;
1931 }
1932 1945
1933 lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS); 1946 lp = gfs2_dir_get_hash_table(dip);
1934 if (!lp) 1947 if (IS_ERR(lp))
1935 return -ENOMEM; 1948 return PTR_ERR(lp);
1936 1949
1937 while (index < hsize) { 1950 while (index < hsize) {
1938 lp_offset = index & (sdp->sd_hash_ptrs - 1); 1951 leaf_no = be64_to_cpu(lp[index]);
1939 ht_offset = index - lp_offset;
1940
1941 if (ht_offset_cur != ht_offset) {
1942 error = gfs2_dir_read_data(dip, (char *)lp,
1943 ht_offset * sizeof(__be64),
1944 sdp->sd_hash_bsize, 1);
1945 if (error != sdp->sd_hash_bsize) {
1946 if (error >= 0)
1947 error = -EIO;
1948 goto out;
1949 }
1950 ht_offset_cur = ht_offset;
1951 }
1952
1953 leaf_no = be64_to_cpu(lp[lp_offset]);
1954 if (leaf_no) { 1952 if (leaf_no) {
1955 error = get_leaf(dip, leaf_no, &bh); 1953 error = get_leaf(dip, leaf_no, &bh);
1956 if (error) 1954 if (error)
@@ -1976,7 +1974,6 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
1976 } 1974 }
1977 1975
1978out: 1976out:
1979 kfree(lp);
1980 1977
1981 return error; 1978 return error;
1982} 1979}
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index e686af11becd..ff5772fbf024 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -35,6 +35,7 @@ extern int gfs2_diradd_alloc_required(struct inode *dir,
35 const struct qstr *filename); 35 const struct qstr *filename);
36extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, 36extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
37 struct buffer_head **bhp); 37 struct buffer_head **bhp);
38extern void gfs2_dir_hash_inval(struct gfs2_inode *ip);
38 39
39static inline u32 gfs2_disk_hash(const char *data, int len) 40static inline u32 gfs2_disk_hash(const char *data, int len)
40{ 41{
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index a9f5cbe45cd9..edeb9e802903 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -174,7 +174,9 @@ void gfs2_set_inode_flags(struct inode *inode)
174 struct gfs2_inode *ip = GFS2_I(inode); 174 struct gfs2_inode *ip = GFS2_I(inode);
175 unsigned int flags = inode->i_flags; 175 unsigned int flags = inode->i_flags;
176 176
177 flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); 177 flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
178 if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
179 inode->i_flags |= S_NOSEC;
178 if (ip->i_diskflags & GFS2_DIF_IMMUTABLE) 180 if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
179 flags |= S_IMMUTABLE; 181 flags |= S_IMMUTABLE;
180 if (ip->i_diskflags & GFS2_DIF_APPENDONLY) 182 if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
@@ -243,7 +245,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
243 !capable(CAP_LINUX_IMMUTABLE)) 245 !capable(CAP_LINUX_IMMUTABLE))
244 goto out; 246 goto out;
245 if (!IS_IMMUTABLE(inode)) { 247 if (!IS_IMMUTABLE(inode)) {
246 error = gfs2_permission(inode, MAY_WRITE, 0); 248 error = gfs2_permission(inode, MAY_WRITE);
247 if (error) 249 if (error)
248 goto out; 250 goto out;
249 } 251 }
@@ -544,7 +546,9 @@ static int gfs2_close(struct inode *inode, struct file *file)
544 546
545/** 547/**
546 * gfs2_fsync - sync the dirty data for a file (across the cluster) 548 * gfs2_fsync - sync the dirty data for a file (across the cluster)
547 * @file: the file that points to the dentry (we ignore this) 549 * @file: the file that points to the dentry
550 * @start: the start position in the file to sync
551 * @end: the end position in the file to sync
548 * @datasync: set if we can ignore timestamp changes 552 * @datasync: set if we can ignore timestamp changes
549 * 553 *
550 * The VFS will flush data for us. We only need to worry 554 * The VFS will flush data for us. We only need to worry
@@ -553,23 +557,32 @@ static int gfs2_close(struct inode *inode, struct file *file)
553 * Returns: errno 557 * Returns: errno
554 */ 558 */
555 559
556static int gfs2_fsync(struct file *file, int datasync) 560static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
561 int datasync)
557{ 562{
558 struct inode *inode = file->f_mapping->host; 563 struct inode *inode = file->f_mapping->host;
559 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC); 564 int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
560 struct gfs2_inode *ip = GFS2_I(inode); 565 struct gfs2_inode *ip = GFS2_I(inode);
561 int ret; 566 int ret;
562 567
568 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
569 if (ret)
570 return ret;
571 mutex_lock(&inode->i_mutex);
572
563 if (datasync) 573 if (datasync)
564 sync_state &= ~I_DIRTY_SYNC; 574 sync_state &= ~I_DIRTY_SYNC;
565 575
566 if (sync_state) { 576 if (sync_state) {
567 ret = sync_inode_metadata(inode, 1); 577 ret = sync_inode_metadata(inode, 1);
568 if (ret) 578 if (ret) {
579 mutex_unlock(&inode->i_mutex);
569 return ret; 580 return ret;
581 }
570 gfs2_ail_flush(ip->i_gl); 582 gfs2_ail_flush(ip->i_gl);
571 } 583 }
572 584
585 mutex_unlock(&inode->i_mutex);
573 return 0; 586 return 0;
574} 587}
575 588
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1c1336e7b3b2..88e8a23d0026 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -409,6 +409,10 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
409 if (held1 && held2 && list_empty(&gl->gl_holders)) 409 if (held1 && held2 && list_empty(&gl->gl_holders))
410 clear_bit(GLF_QUEUED, &gl->gl_flags); 410 clear_bit(GLF_QUEUED, &gl->gl_flags);
411 411
412 if (new_state != gl->gl_target)
413 /* shorten our minimum hold time */
414 gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
415 GL_GLOCK_MIN_HOLD);
412 gl->gl_state = new_state; 416 gl->gl_state = new_state;
413 gl->gl_tchange = jiffies; 417 gl->gl_tchange = jiffies;
414} 418}
@@ -668,7 +672,7 @@ static void glock_work_func(struct work_struct *work)
668 gl->gl_demote_state != LM_ST_EXCLUSIVE) { 672 gl->gl_demote_state != LM_ST_EXCLUSIVE) {
669 unsigned long holdtime, now = jiffies; 673 unsigned long holdtime, now = jiffies;
670 674
671 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; 675 holdtime = gl->gl_tchange + gl->gl_hold_time;
672 if (time_before(now, holdtime)) 676 if (time_before(now, holdtime))
673 delay = holdtime - now; 677 delay = holdtime - now;
674 678
@@ -679,9 +683,14 @@ static void glock_work_func(struct work_struct *work)
679 } 683 }
680 run_queue(gl, 0); 684 run_queue(gl, 0);
681 spin_unlock(&gl->gl_spin); 685 spin_unlock(&gl->gl_spin);
682 if (!delay || 686 if (!delay)
683 queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
684 gfs2_glock_put(gl); 687 gfs2_glock_put(gl);
688 else {
689 if (gl->gl_name.ln_type != LM_TYPE_INODE)
690 delay = 0;
691 if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
692 gfs2_glock_put(gl);
693 }
685 if (drop_ref) 694 if (drop_ref)
686 gfs2_glock_put(gl); 695 gfs2_glock_put(gl);
687} 696}
@@ -743,6 +752,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
743 gl->gl_tchange = jiffies; 752 gl->gl_tchange = jiffies;
744 gl->gl_object = NULL; 753 gl->gl_object = NULL;
745 gl->gl_sbd = sdp; 754 gl->gl_sbd = sdp;
755 gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
746 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func); 756 INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
747 INIT_WORK(&gl->gl_delete, delete_work_func); 757 INIT_WORK(&gl->gl_delete, delete_work_func);
748 758
@@ -855,8 +865,15 @@ static int gfs2_glock_demote_wait(void *word)
855 865
856static void wait_on_holder(struct gfs2_holder *gh) 866static void wait_on_holder(struct gfs2_holder *gh)
857{ 867{
868 unsigned long time1 = jiffies;
869
858 might_sleep(); 870 might_sleep();
859 wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); 871 wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
872 if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
873 /* Lengthen the minimum hold time. */
874 gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
875 GL_GLOCK_HOLD_INCR,
876 GL_GLOCK_MAX_HOLD);
860} 877}
861 878
862static void wait_on_demote(struct gfs2_glock *gl) 879static void wait_on_demote(struct gfs2_glock *gl)
@@ -1093,8 +1110,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
1093 1110
1094 gfs2_glock_hold(gl); 1111 gfs2_glock_hold(gl);
1095 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && 1112 if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
1096 !test_bit(GLF_DEMOTE, &gl->gl_flags)) 1113 !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
1097 delay = gl->gl_ops->go_min_hold_time; 1114 gl->gl_name.ln_type == LM_TYPE_INODE)
1115 delay = gl->gl_hold_time;
1098 if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0) 1116 if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
1099 gfs2_glock_put(gl); 1117 gfs2_glock_put(gl);
1100} 1118}
@@ -1273,12 +1291,13 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
1273 unsigned long now = jiffies; 1291 unsigned long now = jiffies;
1274 1292
1275 gfs2_glock_hold(gl); 1293 gfs2_glock_hold(gl);
1276 holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; 1294 holdtime = gl->gl_tchange + gl->gl_hold_time;
1277 if (test_bit(GLF_QUEUED, &gl->gl_flags)) { 1295 if (test_bit(GLF_QUEUED, &gl->gl_flags) &&
1296 gl->gl_name.ln_type == LM_TYPE_INODE) {
1278 if (time_before(now, holdtime)) 1297 if (time_before(now, holdtime))
1279 delay = holdtime - now; 1298 delay = holdtime - now;
1280 if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) 1299 if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
1281 delay = gl->gl_ops->go_min_hold_time; 1300 delay = gl->gl_hold_time;
1282 } 1301 }
1283 1302
1284 spin_lock(&gl->gl_spin); 1303 spin_lock(&gl->gl_spin);
@@ -1667,7 +1686,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
1667 dtime *= 1000000/HZ; /* demote time in uSec */ 1686 dtime *= 1000000/HZ; /* demote time in uSec */
1668 if (!test_bit(GLF_DEMOTE, &gl->gl_flags)) 1687 if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
1669 dtime = 0; 1688 dtime = 0;
1670 gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n", 1689 gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n",
1671 state2str(gl->gl_state), 1690 state2str(gl->gl_state),
1672 gl->gl_name.ln_type, 1691 gl->gl_name.ln_type,
1673 (unsigned long long)gl->gl_name.ln_number, 1692 (unsigned long long)gl->gl_name.ln_number,
@@ -1676,7 +1695,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
1676 state2str(gl->gl_demote_state), dtime, 1695 state2str(gl->gl_demote_state), dtime,
1677 atomic_read(&gl->gl_ail_count), 1696 atomic_read(&gl->gl_ail_count),
1678 atomic_read(&gl->gl_revokes), 1697 atomic_read(&gl->gl_revokes),
1679 atomic_read(&gl->gl_ref)); 1698 atomic_read(&gl->gl_ref), gl->gl_hold_time);
1680 1699
1681 list_for_each_entry(gh, &gl->gl_holders, gh_list) { 1700 list_for_each_entry(gh, &gl->gl_holders, gh_list) {
1682 error = dump_holder(seq, gh); 1701 error = dump_holder(seq, gh);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 6b2f757b9281..66707118af25 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -113,6 +113,12 @@ enum {
113 113
114#define GLR_TRYFAILED 13 114#define GLR_TRYFAILED 13
115 115
116#define GL_GLOCK_MAX_HOLD (long)(HZ / 5)
117#define GL_GLOCK_DFT_HOLD (long)(HZ / 5)
118#define GL_GLOCK_MIN_HOLD (long)(10)
119#define GL_GLOCK_HOLD_INCR (long)(HZ / 20)
120#define GL_GLOCK_HOLD_DECR (long)(HZ / 40)
121
116struct lm_lockops { 122struct lm_lockops {
117 const char *lm_proto_name; 123 const char *lm_proto_name;
118 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname); 124 int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 2cca29316bd6..da21ecaafcc2 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -26,6 +26,7 @@
26#include "rgrp.h" 26#include "rgrp.h"
27#include "util.h" 27#include "util.h"
28#include "trans.h" 28#include "trans.h"
29#include "dir.h"
29 30
30/** 31/**
31 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL 32 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
@@ -218,6 +219,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
218 if (ip) { 219 if (ip) {
219 set_bit(GIF_INVALID, &ip->i_flags); 220 set_bit(GIF_INVALID, &ip->i_flags);
220 forget_all_cached_acls(&ip->i_inode); 221 forget_all_cached_acls(&ip->i_inode);
222 gfs2_dir_hash_inval(ip);
221 } 223 }
222 } 224 }
223 225
@@ -316,6 +318,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
316 ip->i_generation = be64_to_cpu(str->di_generation); 318 ip->i_generation = be64_to_cpu(str->di_generation);
317 319
318 ip->i_diskflags = be32_to_cpu(str->di_flags); 320 ip->i_diskflags = be32_to_cpu(str->di_flags);
321 ip->i_eattr = be64_to_cpu(str->di_eattr);
322 /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
319 gfs2_set_inode_flags(&ip->i_inode); 323 gfs2_set_inode_flags(&ip->i_inode);
320 height = be16_to_cpu(str->di_height); 324 height = be16_to_cpu(str->di_height);
321 if (unlikely(height > GFS2_MAX_META_HEIGHT)) 325 if (unlikely(height > GFS2_MAX_META_HEIGHT))
@@ -328,7 +332,6 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
328 ip->i_depth = (u8)depth; 332 ip->i_depth = (u8)depth;
329 ip->i_entries = be32_to_cpu(str->di_entries); 333 ip->i_entries = be32_to_cpu(str->di_entries);
330 334
331 ip->i_eattr = be64_to_cpu(str->di_eattr);
332 if (S_ISREG(ip->i_inode.i_mode)) 335 if (S_ISREG(ip->i_inode.i_mode))
333 gfs2_set_aops(&ip->i_inode); 336 gfs2_set_aops(&ip->i_inode);
334 337
@@ -549,7 +552,6 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
549 .go_lock = inode_go_lock, 552 .go_lock = inode_go_lock,
550 .go_dump = inode_go_dump, 553 .go_dump = inode_go_dump,
551 .go_type = LM_TYPE_INODE, 554 .go_type = LM_TYPE_INODE,
552 .go_min_hold_time = HZ / 5,
553 .go_flags = GLOF_ASPACE, 555 .go_flags = GLOF_ASPACE,
554}; 556};
555 557
@@ -560,7 +562,6 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = {
560 .go_unlock = rgrp_go_unlock, 562 .go_unlock = rgrp_go_unlock,
561 .go_dump = gfs2_rgrp_dump, 563 .go_dump = gfs2_rgrp_dump,
562 .go_type = LM_TYPE_RGRP, 564 .go_type = LM_TYPE_RGRP,
563 .go_min_hold_time = HZ / 5,
564 .go_flags = GLOF_ASPACE, 565 .go_flags = GLOF_ASPACE,
565}; 566};
566 567
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 81206e70cbf6..892ac37de8ae 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -163,7 +163,6 @@ struct gfs2_glock_operations {
163 int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl); 163 int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
164 void (*go_callback) (struct gfs2_glock *gl); 164 void (*go_callback) (struct gfs2_glock *gl);
165 const int go_type; 165 const int go_type;
166 const unsigned long go_min_hold_time;
167 const unsigned long go_flags; 166 const unsigned long go_flags;
168#define GLOF_ASPACE 1 167#define GLOF_ASPACE 1
169}; 168};
@@ -221,6 +220,7 @@ struct gfs2_glock {
221 220
222 unsigned int gl_hash; 221 unsigned int gl_hash;
223 unsigned long gl_demote_time; /* time of first demote request */ 222 unsigned long gl_demote_time; /* time of first demote request */
223 long gl_hold_time;
224 struct list_head gl_holders; 224 struct list_head gl_holders;
225 225
226 const struct gfs2_glock_operations *gl_ops; 226 const struct gfs2_glock_operations *gl_ops;
@@ -285,6 +285,7 @@ struct gfs2_inode {
285 u64 i_goal; /* goal block for allocations */ 285 u64 i_goal; /* goal block for allocations */
286 struct rw_semaphore i_rw_mutex; 286 struct rw_semaphore i_rw_mutex;
287 struct list_head i_trunc_list; 287 struct list_head i_trunc_list;
288 __be64 *i_hash_cache;
288 u32 i_entries; 289 u32 i_entries;
289 u32 i_diskflags; 290 u32 i_diskflags;
290 u8 i_height; 291 u8 i_height;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 03e0c529063e..0fb51a96eff0 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -307,7 +307,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
307 } 307 }
308 308
309 if (!is_root) { 309 if (!is_root) {
310 error = gfs2_permission(dir, MAY_EXEC, 0); 310 error = gfs2_permission(dir, MAY_EXEC);
311 if (error) 311 if (error)
312 goto out; 312 goto out;
313 } 313 }
@@ -337,7 +337,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
337{ 337{
338 int error; 338 int error;
339 339
340 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0); 340 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
341 if (error) 341 if (error)
342 return error; 342 return error;
343 343
@@ -792,13 +792,8 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
792static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, 792static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
793 struct nameidata *nd) 793 struct nameidata *nd)
794{ 794{
795 struct inode *inode = NULL; 795 struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0);
796 796 if (inode && !IS_ERR(inode)) {
797 inode = gfs2_lookupi(dir, &dentry->d_name, 0);
798 if (inode && IS_ERR(inode))
799 return ERR_CAST(inode);
800
801 if (inode) {
802 struct gfs2_glock *gl = GFS2_I(inode)->i_gl; 797 struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
803 struct gfs2_holder gh; 798 struct gfs2_holder gh;
804 int error; 799 int error;
@@ -808,11 +803,8 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
808 return ERR_PTR(error); 803 return ERR_PTR(error);
809 } 804 }
810 gfs2_glock_dq_uninit(&gh); 805 gfs2_glock_dq_uninit(&gh);
811 return d_splice_alias(inode, dentry);
812 } 806 }
813 d_add(dentry, inode); 807 return d_splice_alias(inode, dentry);
814
815 return NULL;
816} 808}
817 809
818/** 810/**
@@ -857,7 +849,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
857 if (inode->i_nlink == 0) 849 if (inode->i_nlink == 0)
858 goto out_gunlock; 850 goto out_gunlock;
859 851
860 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0); 852 error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
861 if (error) 853 if (error)
862 goto out_gunlock; 854 goto out_gunlock;
863 855
@@ -990,7 +982,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
990 if (IS_APPEND(&dip->i_inode)) 982 if (IS_APPEND(&dip->i_inode))
991 return -EPERM; 983 return -EPERM;
992 984
993 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0); 985 error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
994 if (error) 986 if (error)
995 return error; 987 return error;
996 988
@@ -1336,7 +1328,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1336 } 1328 }
1337 } 1329 }
1338 } else { 1330 } else {
1339 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0); 1331 error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC);
1340 if (error) 1332 if (error)
1341 goto out_gunlock; 1333 goto out_gunlock;
1342 1334
@@ -1371,7 +1363,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
1371 /* Check out the dir to be renamed */ 1363 /* Check out the dir to be renamed */
1372 1364
1373 if (dir_rename) { 1365 if (dir_rename) {
1374 error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0); 1366 error = gfs2_permission(odentry->d_inode, MAY_WRITE);
1375 if (error) 1367 if (error)
1376 goto out_gunlock; 1368 goto out_gunlock;
1377 } 1369 }
@@ -1543,7 +1535,7 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
1543 * Returns: errno 1535 * Returns: errno
1544 */ 1536 */
1545 1537
1546int gfs2_permission(struct inode *inode, int mask, unsigned int flags) 1538int gfs2_permission(struct inode *inode, int mask)
1547{ 1539{
1548 struct gfs2_inode *ip; 1540 struct gfs2_inode *ip;
1549 struct gfs2_holder i_gh; 1541 struct gfs2_holder i_gh;
@@ -1553,7 +1545,7 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1553 1545
1554 ip = GFS2_I(inode); 1546 ip = GFS2_I(inode);
1555 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) { 1547 if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
1556 if (flags & IPERM_FLAG_RCU) 1548 if (mask & MAY_NOT_BLOCK)
1557 return -ECHILD; 1549 return -ECHILD;
1558 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh); 1550 error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
1559 if (error) 1551 if (error)
@@ -1564,7 +1556,7 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
1564 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode)) 1556 if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
1565 error = -EACCES; 1557 error = -EACCES;
1566 else 1558 else
1567 error = generic_permission(inode, mask, flags, gfs2_check_acl); 1559 error = generic_permission(inode, mask);
1568 if (unlock) 1560 if (unlock)
1569 gfs2_glock_dq_uninit(&i_gh); 1561 gfs2_glock_dq_uninit(&i_gh);
1570 1562
@@ -1854,6 +1846,7 @@ const struct inode_operations gfs2_file_iops = {
1854 .listxattr = gfs2_listxattr, 1846 .listxattr = gfs2_listxattr,
1855 .removexattr = gfs2_removexattr, 1847 .removexattr = gfs2_removexattr,
1856 .fiemap = gfs2_fiemap, 1848 .fiemap = gfs2_fiemap,
1849 .check_acl = gfs2_check_acl,
1857}; 1850};
1858 1851
1859const struct inode_operations gfs2_dir_iops = { 1852const struct inode_operations gfs2_dir_iops = {
@@ -1874,6 +1867,7 @@ const struct inode_operations gfs2_dir_iops = {
1874 .listxattr = gfs2_listxattr, 1867 .listxattr = gfs2_listxattr,
1875 .removexattr = gfs2_removexattr, 1868 .removexattr = gfs2_removexattr,
1876 .fiemap = gfs2_fiemap, 1869 .fiemap = gfs2_fiemap,
1870 .check_acl = gfs2_check_acl,
1877}; 1871};
1878 1872
1879const struct inode_operations gfs2_symlink_iops = { 1873const struct inode_operations gfs2_symlink_iops = {
@@ -1888,5 +1882,6 @@ const struct inode_operations gfs2_symlink_iops = {
1888 .listxattr = gfs2_listxattr, 1882 .listxattr = gfs2_listxattr,
1889 .removexattr = gfs2_removexattr, 1883 .removexattr = gfs2_removexattr,
1890 .fiemap = gfs2_fiemap, 1884 .fiemap = gfs2_fiemap,
1885 .check_acl = gfs2_check_acl,
1891}; 1886};
1892 1887
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 31606076f701..8d90e0c07672 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -108,7 +108,7 @@ extern int gfs2_inode_refresh(struct gfs2_inode *ip);
108 108
109extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, 109extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
110 int is_root); 110 int is_root);
111extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags); 111extern int gfs2_permission(struct inode *inode, int mask);
112extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); 112extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
113extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); 113extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
114extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); 114extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index c2b34cd2abe0..29e1ace7953d 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -41,6 +41,7 @@ static void gfs2_init_inode_once(void *foo)
41 init_rwsem(&ip->i_rw_mutex); 41 init_rwsem(&ip->i_rw_mutex);
42 INIT_LIST_HEAD(&ip->i_trunc_list); 42 INIT_LIST_HEAD(&ip->i_trunc_list);
43 ip->i_alloc = NULL; 43 ip->i_alloc = NULL;
44 ip->i_hash_cache = NULL;
44} 45}
45 46
46static void gfs2_init_glock_once(void *foo) 47static void gfs2_init_glock_once(void *foo)
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 2a77071fb7b6..516516e0c2a2 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1094,6 +1094,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
1094 if (sdp->sd_args.ar_nobarrier) 1094 if (sdp->sd_args.ar_nobarrier)
1095 set_bit(SDF_NOBARRIERS, &sdp->sd_flags); 1095 set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
1096 1096
1097 sb->s_flags |= MS_NOSEC;
1097 sb->s_magic = GFS2_MAGIC; 1098 sb->s_magic = GFS2_MAGIC;
1098 sb->s_op = &gfs2_super_ops; 1099 sb->s_op = &gfs2_super_ops;
1099 sb->s_d_op = &gfs2_dops; 1100 sb->s_d_op = &gfs2_dops;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9b780df3fd54..7f8af1eb02de 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1607,14 +1607,15 @@ rgrp_error:
1607} 1607}
1608 1608
1609/** 1609/**
1610 * gfs2_free_data - free a contiguous run of data block(s) 1610 * __gfs2_free_blocks - free a contiguous run of block(s)
1611 * @ip: the inode these blocks are being freed from 1611 * @ip: the inode these blocks are being freed from
1612 * @bstart: first block of a run of contiguous blocks 1612 * @bstart: first block of a run of contiguous blocks
1613 * @blen: the length of the block run 1613 * @blen: the length of the block run
1614 * @meta: 1 if the blocks represent metadata
1614 * 1615 *
1615 */ 1616 */
1616 1617
1617void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) 1618void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
1618{ 1619{
1619 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1620 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1620 struct gfs2_rgrpd *rgd; 1621 struct gfs2_rgrpd *rgd;
@@ -1631,54 +1632,11 @@ void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1631 gfs2_trans_add_rg(rgd); 1632 gfs2_trans_add_rg(rgd);
1632 1633
1633 /* Directories keep their data in the metadata address space */ 1634 /* Directories keep their data in the metadata address space */
1634 if (ip->i_depth) 1635 if (meta || ip->i_depth)
1635 gfs2_meta_wipe(ip, bstart, blen); 1636 gfs2_meta_wipe(ip, bstart, blen);
1636} 1637}
1637 1638
1638/** 1639/**
1639 * gfs2_free_data - free a contiguous run of data block(s)
1640 * @ip: the inode these blocks are being freed from
1641 * @bstart: first block of a run of contiguous blocks
1642 * @blen: the length of the block run
1643 *
1644 */
1645
1646void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
1647{
1648 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1649
1650 __gfs2_free_data(ip, bstart, blen);
1651 gfs2_statfs_change(sdp, 0, +blen, 0);
1652 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1653}
1654
1655/**
1656 * gfs2_free_meta - free a contiguous run of data block(s)
1657 * @ip: the inode these blocks are being freed from
1658 * @bstart: first block of a run of contiguous blocks
1659 * @blen: the length of the block run
1660 *
1661 */
1662
1663void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1664{
1665 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1666 struct gfs2_rgrpd *rgd;
1667
1668 rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
1669 if (!rgd)
1670 return;
1671 trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
1672 rgd->rd_free += blen;
1673
1674 gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
1675 gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
1676
1677 gfs2_trans_add_rg(rgd);
1678 gfs2_meta_wipe(ip, bstart, blen);
1679}
1680
1681/**
1682 * gfs2_free_meta - free a contiguous run of data block(s) 1640 * gfs2_free_meta - free a contiguous run of data block(s)
1683 * @ip: the inode these blocks are being freed from 1641 * @ip: the inode these blocks are being freed from
1684 * @bstart: first block of a run of contiguous blocks 1642 * @bstart: first block of a run of contiguous blocks
@@ -1690,7 +1648,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
1690{ 1648{
1691 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1649 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1692 1650
1693 __gfs2_free_meta(ip, bstart, blen); 1651 __gfs2_free_blocks(ip, bstart, blen, 1);
1694 gfs2_statfs_change(sdp, 0, +blen, 0); 1652 gfs2_statfs_change(sdp, 0, +blen, 0);
1695 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); 1653 gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
1696} 1654}
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index a80e3034ac47..d253f9a8c70e 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -52,9 +52,7 @@ extern int gfs2_ri_update(struct gfs2_inode *ip);
52extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); 52extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
53extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation); 53extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
54 54
55extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); 55extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
56extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
57extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
58extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); 56extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
59extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); 57extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
60extern void gfs2_unlink_di(struct inode *inode); 58extern void gfs2_unlink_di(struct inode *inode);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index fb0edf735483..b7beadd9ba4c 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1533,7 +1533,7 @@ out:
1533 /* Case 3 starts here */ 1533 /* Case 3 starts here */
1534 truncate_inode_pages(&inode->i_data, 0); 1534 truncate_inode_pages(&inode->i_data, 0);
1535 end_writeback(inode); 1535 end_writeback(inode);
1536 1536 gfs2_dir_hash_inval(ip);
1537 ip->i_gl->gl_object = NULL; 1537 ip->i_gl->gl_object = NULL;
1538 gfs2_glock_add_to_lru(ip->i_gl); 1538 gfs2_glock_add_to_lru(ip->i_gl);
1539 gfs2_glock_put(ip->i_gl); 1539 gfs2_glock_put(ip->i_gl);
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index fff16c968e67..96a1b625fc74 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -123,8 +123,8 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb,
123 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; 123 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
124 ssize_t ret; 124 ssize_t ret;
125 125
126 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 126 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
127 offset, nr_segs, hfs_get_block, NULL); 127 hfs_get_block);
128 128
129 /* 129 /*
130 * In case of error extending write may have instantiated a few 130 * In case of error extending write may have instantiated a few
@@ -615,6 +615,8 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
615 615
616 if ((attr->ia_valid & ATTR_SIZE) && 616 if ((attr->ia_valid & ATTR_SIZE) &&
617 attr->ia_size != i_size_read(inode)) { 617 attr->ia_size != i_size_read(inode)) {
618 inode_dio_wait(inode);
619
618 error = vmtruncate(inode, attr->ia_size); 620 error = vmtruncate(inode, attr->ia_size);
619 if (error) 621 if (error)
620 return error; 622 return error;
@@ -625,12 +627,18 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
625 return 0; 627 return 0;
626} 628}
627 629
628static int hfs_file_fsync(struct file *filp, int datasync) 630static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end,
631 int datasync)
629{ 632{
630 struct inode *inode = filp->f_mapping->host; 633 struct inode *inode = filp->f_mapping->host;
631 struct super_block * sb; 634 struct super_block * sb;
632 int ret, err; 635 int ret, err;
633 636
637 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
638 if (ret)
639 return ret;
640 mutex_lock(&inode->i_mutex);
641
634 /* sync the inode to buffers */ 642 /* sync the inode to buffers */
635 ret = write_inode_now(inode, 0); 643 ret = write_inode_now(inode, 0);
636 644
@@ -647,6 +655,7 @@ static int hfs_file_fsync(struct file *filp, int datasync)
647 err = sync_blockdev(sb->s_bdev); 655 err = sync_blockdev(sb->s_bdev);
648 if (!ret) 656 if (!ret)
649 ret = err; 657 ret = err;
658 mutex_unlock(&inode->i_mutex);
650 return ret; 659 return ret;
651} 660}
652 661
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 2312de34bd42..2a734cfccc92 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -43,6 +43,10 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec)
43 node->tree->node_size - (rec + 1) * 2); 43 node->tree->node_size - (rec + 1) * 2);
44 if (!recoff) 44 if (!recoff)
45 return 0; 45 return 0;
46 if (recoff > node->tree->node_size - 2) {
47 printk(KERN_ERR "hfs: recoff %d too large\n", recoff);
48 return 0;
49 }
46 50
47 retval = hfs_bnode_read_u16(node, recoff) + 2; 51 retval = hfs_bnode_read_u16(node, recoff) + 2;
48 if (retval > node->tree->max_key_len + 2) { 52 if (retval > node->tree->max_key_len + 2) {
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index b4ba1b319333..4dfbfec357e8 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -212,7 +212,9 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
212 212
213 dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", 213 dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n",
214 str->name, cnid, inode->i_nlink); 214 str->name, cnid, inode->i_nlink);
215 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 215 err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
216 if (err)
217 return err;
216 218
217 hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); 219 hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
218 entry_size = hfsplus_fill_cat_thread(sb, &entry, 220 entry_size = hfsplus_fill_cat_thread(sb, &entry,
@@ -269,7 +271,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
269 271
270 dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", 272 dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n",
271 str ? str->name : NULL, cnid); 273 str ? str->name : NULL, cnid);
272 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 274 err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
275 if (err)
276 return err;
273 277
274 if (!str) { 278 if (!str) {
275 int len; 279 int len;
@@ -347,12 +351,14 @@ int hfsplus_rename_cat(u32 cnid,
347 struct hfs_find_data src_fd, dst_fd; 351 struct hfs_find_data src_fd, dst_fd;
348 hfsplus_cat_entry entry; 352 hfsplus_cat_entry entry;
349 int entry_size, type; 353 int entry_size, type;
350 int err = 0; 354 int err;
351 355
352 dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", 356 dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n",
353 cnid, src_dir->i_ino, src_name->name, 357 cnid, src_dir->i_ino, src_name->name,
354 dst_dir->i_ino, dst_name->name); 358 dst_dir->i_ino, dst_name->name);
355 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); 359 err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd);
360 if (err)
361 return err;
356 dst_fd = src_fd; 362 dst_fd = src_fd;
357 363
358 /* find the old dir entry and read the data */ 364 /* find the old dir entry and read the data */
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 4df5059c25da..25b2443a004c 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -38,7 +38,9 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
38 sb = dir->i_sb; 38 sb = dir->i_sb;
39 39
40 dentry->d_fsdata = NULL; 40 dentry->d_fsdata = NULL;
41 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 41 err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
42 if (err)
43 return ERR_PTR(err);
42 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); 44 hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
43again: 45again:
44 err = hfs_brec_read(&fd, &entry, sizeof(entry)); 46 err = hfs_brec_read(&fd, &entry, sizeof(entry));
@@ -132,7 +134,9 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
132 if (filp->f_pos >= inode->i_size) 134 if (filp->f_pos >= inode->i_size)
133 return 0; 135 return 0;
134 136
135 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 137 err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
138 if (err)
139 return err;
136 hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); 140 hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
137 err = hfs_brec_find(&fd); 141 err = hfs_brec_find(&fd);
138 if (err) 142 if (err)
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c
index b1991a2a08e0..5849e3ef35cc 100644
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -119,22 +119,31 @@ static void __hfsplus_ext_write_extent(struct inode *inode,
119 set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags); 119 set_bit(HFSPLUS_I_EXT_DIRTY, &hip->flags);
120} 120}
121 121
122static void hfsplus_ext_write_extent_locked(struct inode *inode) 122static int hfsplus_ext_write_extent_locked(struct inode *inode)
123{ 123{
124 int res;
125
124 if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) { 126 if (HFSPLUS_I(inode)->extent_state & HFSPLUS_EXT_DIRTY) {
125 struct hfs_find_data fd; 127 struct hfs_find_data fd;
126 128
127 hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); 129 res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
130 if (res)
131 return res;
128 __hfsplus_ext_write_extent(inode, &fd); 132 __hfsplus_ext_write_extent(inode, &fd);
129 hfs_find_exit(&fd); 133 hfs_find_exit(&fd);
130 } 134 }
135 return 0;
131} 136}
132 137
133void hfsplus_ext_write_extent(struct inode *inode) 138int hfsplus_ext_write_extent(struct inode *inode)
134{ 139{
140 int res;
141
135 mutex_lock(&HFSPLUS_I(inode)->extents_lock); 142 mutex_lock(&HFSPLUS_I(inode)->extents_lock);
136 hfsplus_ext_write_extent_locked(inode); 143 res = hfsplus_ext_write_extent_locked(inode);
137 mutex_unlock(&HFSPLUS_I(inode)->extents_lock); 144 mutex_unlock(&HFSPLUS_I(inode)->extents_lock);
145
146 return res;
138} 147}
139 148
140static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, 149static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd,
@@ -194,9 +203,11 @@ static int hfsplus_ext_read_extent(struct inode *inode, u32 block)
194 block < hip->cached_start + hip->cached_blocks) 203 block < hip->cached_start + hip->cached_blocks)
195 return 0; 204 return 0;
196 205
197 hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); 206 res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd);
198 res = __hfsplus_ext_cache_extent(&fd, inode, block); 207 if (!res) {
199 hfs_find_exit(&fd); 208 res = __hfsplus_ext_cache_extent(&fd, inode, block);
209 hfs_find_exit(&fd);
210 }
200 return res; 211 return res;
201} 212}
202 213
@@ -209,6 +220,7 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
209 struct hfsplus_inode_info *hip = HFSPLUS_I(inode); 220 struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
210 int res = -EIO; 221 int res = -EIO;
211 u32 ablock, dblock, mask; 222 u32 ablock, dblock, mask;
223 sector_t sector;
212 int was_dirty = 0; 224 int was_dirty = 0;
213 int shift; 225 int shift;
214 226
@@ -255,10 +267,12 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock,
255done: 267done:
256 dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", 268 dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n",
257 inode->i_ino, (long long)iblock, dblock); 269 inode->i_ino, (long long)iblock, dblock);
270
258 mask = (1 << sbi->fs_shift) - 1; 271 mask = (1 << sbi->fs_shift) - 1;
259 map_bh(bh_result, sb, 272 sector = ((sector_t)dblock << sbi->fs_shift) +
260 (dblock << sbi->fs_shift) + sbi->blockoffset + 273 sbi->blockoffset + (iblock & mask);
261 (iblock & mask)); 274 map_bh(bh_result, sb, sector);
275
262 if (create) { 276 if (create) {
263 set_buffer_new(bh_result); 277 set_buffer_new(bh_result);
264 hip->phys_size += sb->s_blocksize; 278 hip->phys_size += sb->s_blocksize;
@@ -371,7 +385,9 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid,
371 if (total_blocks == blocks) 385 if (total_blocks == blocks)
372 return 0; 386 return 0;
373 387
374 hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); 388 res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
389 if (res)
390 return res;
375 do { 391 do {
376 res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, 392 res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid,
377 total_blocks, type); 393 total_blocks, type);
@@ -469,7 +485,9 @@ out:
469 485
470insert_extent: 486insert_extent:
471 dprint(DBG_EXTENT, "insert new extent\n"); 487 dprint(DBG_EXTENT, "insert new extent\n");
472 hfsplus_ext_write_extent_locked(inode); 488 res = hfsplus_ext_write_extent_locked(inode);
489 if (res)
490 goto out;
473 491
474 memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); 492 memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec));
475 hip->cached_extents[0].start_block = cpu_to_be32(start); 493 hip->cached_extents[0].start_block = cpu_to_be32(start);
@@ -500,7 +518,6 @@ void hfsplus_file_truncate(struct inode *inode)
500 struct page *page; 518 struct page *page;
501 void *fsdata; 519 void *fsdata;
502 u32 size = inode->i_size; 520 u32 size = inode->i_size;
503 int res;
504 521
505 res = pagecache_write_begin(NULL, mapping, size, 0, 522 res = pagecache_write_begin(NULL, mapping, size, 0,
506 AOP_FLAG_UNINTERRUPTIBLE, 523 AOP_FLAG_UNINTERRUPTIBLE,
@@ -523,7 +540,12 @@ void hfsplus_file_truncate(struct inode *inode)
523 goto out; 540 goto out;
524 541
525 mutex_lock(&hip->extents_lock); 542 mutex_lock(&hip->extents_lock);
526 hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); 543 res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd);
544 if (res) {
545 mutex_unlock(&hip->extents_lock);
546 /* XXX: We lack error handling of hfsplus_file_truncate() */
547 return;
548 }
527 while (1) { 549 while (1) {
528 if (alloc_cnt == hip->first_blocks) { 550 if (alloc_cnt == hip->first_blocks) {
529 hfsplus_free_extents(sb, hip->first_extents, 551 hfsplus_free_extents(sb, hip->first_extents,
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index d6857523336d..d7674d051f52 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -13,6 +13,7 @@
13#include <linux/fs.h> 13#include <linux/fs.h>
14#include <linux/mutex.h> 14#include <linux/mutex.h>
15#include <linux/buffer_head.h> 15#include <linux/buffer_head.h>
16#include <linux/blkdev.h>
16#include "hfsplus_raw.h" 17#include "hfsplus_raw.h"
17 18
18#define DBG_BNODE_REFS 0x00000001 19#define DBG_BNODE_REFS 0x00000001
@@ -110,7 +111,9 @@ struct hfsplus_vh;
110struct hfs_btree; 111struct hfs_btree;
111 112
112struct hfsplus_sb_info { 113struct hfsplus_sb_info {
114 void *s_vhdr_buf;
113 struct hfsplus_vh *s_vhdr; 115 struct hfsplus_vh *s_vhdr;
116 void *s_backup_vhdr_buf;
114 struct hfsplus_vh *s_backup_vhdr; 117 struct hfsplus_vh *s_backup_vhdr;
115 struct hfs_btree *ext_tree; 118 struct hfs_btree *ext_tree;
116 struct hfs_btree *cat_tree; 119 struct hfs_btree *cat_tree;
@@ -258,6 +261,15 @@ struct hfsplus_readdir_data {
258 struct hfsplus_cat_key key; 261 struct hfsplus_cat_key key;
259}; 262};
260 263
264/*
265 * Find minimum acceptible I/O size for an hfsplus sb.
266 */
267static inline unsigned short hfsplus_min_io_size(struct super_block *sb)
268{
269 return max_t(unsigned short, bdev_logical_block_size(sb->s_bdev),
270 HFSPLUS_SECTOR_SIZE);
271}
272
261#define hfs_btree_open hfsplus_btree_open 273#define hfs_btree_open hfsplus_btree_open
262#define hfs_btree_close hfsplus_btree_close 274#define hfs_btree_close hfsplus_btree_close
263#define hfs_btree_write hfsplus_btree_write 275#define hfs_btree_write hfsplus_btree_write
@@ -374,7 +386,7 @@ extern const struct file_operations hfsplus_dir_operations;
374 386
375/* extents.c */ 387/* extents.c */
376int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *); 388int hfsplus_ext_cmp_key(const hfsplus_btree_key *, const hfsplus_btree_key *);
377void hfsplus_ext_write_extent(struct inode *); 389int hfsplus_ext_write_extent(struct inode *);
378int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int); 390int hfsplus_get_block(struct inode *, sector_t, struct buffer_head *, int);
379int hfsplus_free_fork(struct super_block *, u32, 391int hfsplus_free_fork(struct super_block *, u32,
380 struct hfsplus_fork_raw *, int); 392 struct hfsplus_fork_raw *, int);
@@ -392,7 +404,8 @@ int hfsplus_cat_read_inode(struct inode *, struct hfs_find_data *);
392int hfsplus_cat_write_inode(struct inode *); 404int hfsplus_cat_write_inode(struct inode *);
393struct inode *hfsplus_new_inode(struct super_block *, int); 405struct inode *hfsplus_new_inode(struct super_block *, int);
394void hfsplus_delete_inode(struct inode *); 406void hfsplus_delete_inode(struct inode *);
395int hfsplus_file_fsync(struct file *file, int datasync); 407int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
408 int datasync);
396 409
397/* ioctl.c */ 410/* ioctl.c */
398long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); 411long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
@@ -436,8 +449,8 @@ int hfsplus_compare_dentry(const struct dentry *parent,
436/* wrapper.c */ 449/* wrapper.c */
437int hfsplus_read_wrapper(struct super_block *); 450int hfsplus_read_wrapper(struct super_block *);
438int hfs_part_find(struct super_block *, sector_t *, sector_t *); 451int hfs_part_find(struct super_block *, sector_t *, sector_t *);
439int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, 452int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
440 void *data, int rw); 453 void *buf, void **data, int rw);
441 454
442/* time macros */ 455/* time macros */
443#define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U) 456#define __hfsp_mt2ut(t) (be32_to_cpu(t) - 2082844800U)
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index b248a6cfcad9..4cc1e3a36ec7 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -119,8 +119,8 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb,
119 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host; 119 struct inode *inode = file->f_path.dentry->d_inode->i_mapping->host;
120 ssize_t ret; 120 ssize_t ret;
121 121
122 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 122 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
123 offset, nr_segs, hfsplus_get_block, NULL); 123 hfsplus_get_block);
124 124
125 /* 125 /*
126 * In case of error extending write may have instantiated a few 126 * In case of error extending write may have instantiated a few
@@ -195,11 +195,13 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir,
195 hip->flags = 0; 195 hip->flags = 0;
196 set_bit(HFSPLUS_I_RSRC, &hip->flags); 196 set_bit(HFSPLUS_I_RSRC, &hip->flags);
197 197
198 hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); 198 err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
199 err = hfsplus_find_cat(sb, dir->i_ino, &fd); 199 if (!err) {
200 if (!err) 200 err = hfsplus_find_cat(sb, dir->i_ino, &fd);
201 err = hfsplus_cat_read_inode(inode, &fd); 201 if (!err)
202 hfs_find_exit(&fd); 202 err = hfsplus_cat_read_inode(inode, &fd);
203 hfs_find_exit(&fd);
204 }
203 if (err) { 205 if (err) {
204 iput(inode); 206 iput(inode);
205 return ERR_PTR(err); 207 return ERR_PTR(err);
@@ -296,6 +298,8 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
296 298
297 if ((attr->ia_valid & ATTR_SIZE) && 299 if ((attr->ia_valid & ATTR_SIZE) &&
298 attr->ia_size != i_size_read(inode)) { 300 attr->ia_size != i_size_read(inode)) {
301 inode_dio_wait(inode);
302
299 error = vmtruncate(inode, attr->ia_size); 303 error = vmtruncate(inode, attr->ia_size);
300 if (error) 304 if (error)
301 return error; 305 return error;
@@ -306,13 +310,19 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
306 return 0; 310 return 0;
307} 311}
308 312
309int hfsplus_file_fsync(struct file *file, int datasync) 313int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
314 int datasync)
310{ 315{
311 struct inode *inode = file->f_mapping->host; 316 struct inode *inode = file->f_mapping->host;
312 struct hfsplus_inode_info *hip = HFSPLUS_I(inode); 317 struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
313 struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); 318 struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb);
314 int error = 0, error2; 319 int error = 0, error2;
315 320
321 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
322 if (error)
323 return error;
324 mutex_lock(&inode->i_mutex);
325
316 /* 326 /*
317 * Sync inode metadata into the catalog and extent trees. 327 * Sync inode metadata into the catalog and extent trees.
318 */ 328 */
@@ -340,6 +350,8 @@ int hfsplus_file_fsync(struct file *file, int datasync)
340 if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags)) 350 if (!test_bit(HFSPLUS_SB_NOBARRIER, &sbi->flags))
341 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 351 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
342 352
353 mutex_unlock(&inode->i_mutex);
354
343 return error; 355 return error;
344} 356}
345 357
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c
index 40ad88c12c64..eb355d81e279 100644
--- a/fs/hfsplus/part_tbl.c
+++ b/fs/hfsplus/part_tbl.c
@@ -88,11 +88,12 @@ static int hfs_parse_old_pmap(struct super_block *sb, struct old_pmap *pm,
88 return -ENOENT; 88 return -ENOENT;
89} 89}
90 90
91static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm, 91static int hfs_parse_new_pmap(struct super_block *sb, void *buf,
92 sector_t *part_start, sector_t *part_size) 92 struct new_pmap *pm, sector_t *part_start, sector_t *part_size)
93{ 93{
94 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); 94 struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb);
95 int size = be32_to_cpu(pm->pmMapBlkCnt); 95 int size = be32_to_cpu(pm->pmMapBlkCnt);
96 int buf_size = hfsplus_min_io_size(sb);
96 int res; 97 int res;
97 int i = 0; 98 int i = 0;
98 99
@@ -107,11 +108,14 @@ static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm,
107 if (++i >= size) 108 if (++i >= size)
108 return -ENOENT; 109 return -ENOENT;
109 110
110 res = hfsplus_submit_bio(sb->s_bdev, 111 pm = (struct new_pmap *)((u8 *)pm + HFSPLUS_SECTOR_SIZE);
111 *part_start + HFS_PMAP_BLK + i, 112 if ((u8 *)pm - (u8 *)buf >= buf_size) {
112 pm, READ); 113 res = hfsplus_submit_bio(sb,
113 if (res) 114 *part_start + HFS_PMAP_BLK + i,
114 return res; 115 buf, (void **)&pm, READ);
116 if (res)
117 return res;
118 }
115 } while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC)); 119 } while (pm->pmSig == cpu_to_be16(HFS_NEW_PMAP_MAGIC));
116 120
117 return -ENOENT; 121 return -ENOENT;
@@ -124,15 +128,15 @@ static int hfs_parse_new_pmap(struct super_block *sb, struct new_pmap *pm,
124int hfs_part_find(struct super_block *sb, 128int hfs_part_find(struct super_block *sb,
125 sector_t *part_start, sector_t *part_size) 129 sector_t *part_start, sector_t *part_size)
126{ 130{
127 void *data; 131 void *buf, *data;
128 int res; 132 int res;
129 133
130 data = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); 134 buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
131 if (!data) 135 if (!buf)
132 return -ENOMEM; 136 return -ENOMEM;
133 137
134 res = hfsplus_submit_bio(sb->s_bdev, *part_start + HFS_PMAP_BLK, 138 res = hfsplus_submit_bio(sb, *part_start + HFS_PMAP_BLK,
135 data, READ); 139 buf, &data, READ);
136 if (res) 140 if (res)
137 goto out; 141 goto out;
138 142
@@ -141,13 +145,13 @@ int hfs_part_find(struct super_block *sb,
141 res = hfs_parse_old_pmap(sb, data, part_start, part_size); 145 res = hfs_parse_old_pmap(sb, data, part_start, part_size);
142 break; 146 break;
143 case HFS_NEW_PMAP_MAGIC: 147 case HFS_NEW_PMAP_MAGIC:
144 res = hfs_parse_new_pmap(sb, data, part_start, part_size); 148 res = hfs_parse_new_pmap(sb, buf, data, part_start, part_size);
145 break; 149 break;
146 default: 150 default:
147 res = -ENOENT; 151 res = -ENOENT;
148 break; 152 break;
149 } 153 }
150out: 154out:
151 kfree(data); 155 kfree(buf);
152 return res; 156 return res;
153} 157}
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 84a47b709f51..c106ca22e812 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -73,11 +73,13 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino)
73 73
74 if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || 74 if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID ||
75 inode->i_ino == HFSPLUS_ROOT_CNID) { 75 inode->i_ino == HFSPLUS_ROOT_CNID) {
76 hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); 76 err = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd);
77 err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); 77 if (!err) {
78 if (!err) 78 err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd);
79 err = hfsplus_cat_read_inode(inode, &fd); 79 if (!err)
80 hfs_find_exit(&fd); 80 err = hfsplus_cat_read_inode(inode, &fd);
81 hfs_find_exit(&fd);
82 }
81 } else { 83 } else {
82 err = hfsplus_system_read_inode(inode); 84 err = hfsplus_system_read_inode(inode);
83 } 85 }
@@ -133,9 +135,13 @@ static int hfsplus_system_write_inode(struct inode *inode)
133static int hfsplus_write_inode(struct inode *inode, 135static int hfsplus_write_inode(struct inode *inode,
134 struct writeback_control *wbc) 136 struct writeback_control *wbc)
135{ 137{
138 int err;
139
136 dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); 140 dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino);
137 141
138 hfsplus_ext_write_extent(inode); 142 err = hfsplus_ext_write_extent(inode);
143 if (err)
144 return err;
139 145
140 if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || 146 if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID ||
141 inode->i_ino == HFSPLUS_ROOT_CNID) 147 inode->i_ino == HFSPLUS_ROOT_CNID)
@@ -197,17 +203,17 @@ int hfsplus_sync_fs(struct super_block *sb, int wait)
197 write_backup = 1; 203 write_backup = 1;
198 } 204 }
199 205
200 error2 = hfsplus_submit_bio(sb->s_bdev, 206 error2 = hfsplus_submit_bio(sb,
201 sbi->part_start + HFSPLUS_VOLHEAD_SECTOR, 207 sbi->part_start + HFSPLUS_VOLHEAD_SECTOR,
202 sbi->s_vhdr, WRITE_SYNC); 208 sbi->s_vhdr_buf, NULL, WRITE_SYNC);
203 if (!error) 209 if (!error)
204 error = error2; 210 error = error2;
205 if (!write_backup) 211 if (!write_backup)
206 goto out; 212 goto out;
207 213
208 error2 = hfsplus_submit_bio(sb->s_bdev, 214 error2 = hfsplus_submit_bio(sb,
209 sbi->part_start + sbi->sect_count - 2, 215 sbi->part_start + sbi->sect_count - 2,
210 sbi->s_backup_vhdr, WRITE_SYNC); 216 sbi->s_backup_vhdr_buf, NULL, WRITE_SYNC);
211 if (!error) 217 if (!error)
212 error2 = error; 218 error2 = error;
213out: 219out:
@@ -251,8 +257,8 @@ static void hfsplus_put_super(struct super_block *sb)
251 hfs_btree_close(sbi->ext_tree); 257 hfs_btree_close(sbi->ext_tree);
252 iput(sbi->alloc_file); 258 iput(sbi->alloc_file);
253 iput(sbi->hidden_dir); 259 iput(sbi->hidden_dir);
254 kfree(sbi->s_vhdr); 260 kfree(sbi->s_vhdr_buf);
255 kfree(sbi->s_backup_vhdr); 261 kfree(sbi->s_backup_vhdr_buf);
256 unload_nls(sbi->nls); 262 unload_nls(sbi->nls);
257 kfree(sb->s_fs_info); 263 kfree(sb->s_fs_info);
258 sb->s_fs_info = NULL; 264 sb->s_fs_info = NULL;
@@ -393,6 +399,13 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
393 if (!sbi->rsrc_clump_blocks) 399 if (!sbi->rsrc_clump_blocks)
394 sbi->rsrc_clump_blocks = 1; 400 sbi->rsrc_clump_blocks = 1;
395 401
402 err = generic_check_addressable(sbi->alloc_blksz_shift,
403 sbi->total_blocks);
404 if (err) {
405 printk(KERN_ERR "hfs: filesystem size too large.\n");
406 goto out_free_vhdr;
407 }
408
396 /* Set up operations so we can load metadata */ 409 /* Set up operations so we can load metadata */
397 sb->s_op = &hfsplus_sops; 410 sb->s_op = &hfsplus_sops;
398 sb->s_maxbytes = MAX_LFS_FILESIZE; 411 sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -417,6 +430,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
417 sb->s_flags |= MS_RDONLY; 430 sb->s_flags |= MS_RDONLY;
418 } 431 }
419 432
433 err = -EINVAL;
434
420 /* Load metadata objects (B*Trees) */ 435 /* Load metadata objects (B*Trees) */
421 sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); 436 sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID);
422 if (!sbi->ext_tree) { 437 if (!sbi->ext_tree) {
@@ -447,7 +462,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
447 462
448 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; 463 str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1;
449 str.name = HFSP_HIDDENDIR_NAME; 464 str.name = HFSP_HIDDENDIR_NAME;
450 hfs_find_init(sbi->cat_tree, &fd); 465 err = hfs_find_init(sbi->cat_tree, &fd);
466 if (err)
467 goto out_put_root;
451 hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); 468 hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
452 if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { 469 if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
453 hfs_find_exit(&fd); 470 hfs_find_exit(&fd);
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c
index a3f0bfcc881e..a32998f29f0b 100644
--- a/fs/hfsplus/unicode.c
+++ b/fs/hfsplus/unicode.c
@@ -142,7 +142,11 @@ int hfsplus_uni2asc(struct super_block *sb,
142 /* search for single decomposed char */ 142 /* search for single decomposed char */
143 if (likely(compose)) 143 if (likely(compose))
144 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0); 144 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
145 if (ce1 && (cc = ce1[0])) { 145 if (ce1)
146 cc = ce1[0];
147 else
148 cc = 0;
149 if (cc) {
146 /* start of a possibly decomposed Hangul char */ 150 /* start of a possibly decomposed Hangul char */
147 if (cc != 0xffff) 151 if (cc != 0xffff)
148 goto done; 152 goto done;
@@ -209,7 +213,8 @@ int hfsplus_uni2asc(struct super_block *sb,
209 i++; 213 i++;
210 ce2 = ce1; 214 ce2 = ce1;
211 } 215 }
212 if ((cc = ce2[0])) { 216 cc = ce2[0];
217 if (cc) {
213 ip += i; 218 ip += i;
214 ustrlen -= i; 219 ustrlen -= i;
215 goto done; 220 goto done;
@@ -301,7 +306,11 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr,
301 while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { 306 while (outlen < HFSPLUS_MAX_STRLEN && len > 0) {
302 size = asc2unichar(sb, astr, len, &c); 307 size = asc2unichar(sb, astr, len, &c);
303 308
304 if (decompose && (dstr = decompose_unichar(c, &dsize))) { 309 if (decompose)
310 dstr = decompose_unichar(c, &dsize);
311 else
312 dstr = NULL;
313 if (dstr) {
305 if (outlen + dsize > HFSPLUS_MAX_STRLEN) 314 if (outlen + dsize > HFSPLUS_MAX_STRLEN)
306 break; 315 break;
307 do { 316 do {
@@ -346,15 +355,23 @@ int hfsplus_hash_dentry(const struct dentry *dentry, const struct inode *inode,
346 astr += size; 355 astr += size;
347 len -= size; 356 len -= size;
348 357
349 if (decompose && (dstr = decompose_unichar(c, &dsize))) { 358 if (decompose)
359 dstr = decompose_unichar(c, &dsize);
360 else
361 dstr = NULL;
362 if (dstr) {
350 do { 363 do {
351 c2 = *dstr++; 364 c2 = *dstr++;
352 if (!casefold || (c2 = case_fold(c2))) 365 if (casefold)
366 c2 = case_fold(c2);
367 if (!casefold || c2)
353 hash = partial_name_hash(c2, hash); 368 hash = partial_name_hash(c2, hash);
354 } while (--dsize > 0); 369 } while (--dsize > 0);
355 } else { 370 } else {
356 c2 = c; 371 c2 = c;
357 if (!casefold || (c2 = case_fold(c2))) 372 if (casefold)
373 c2 = case_fold(c2);
374 if (!casefold || c2)
358 hash = partial_name_hash(c2, hash); 375 hash = partial_name_hash(c2, hash);
359 } 376 }
360 } 377 }
@@ -422,12 +439,14 @@ int hfsplus_compare_dentry(const struct dentry *parent,
422 c1 = *dstr1; 439 c1 = *dstr1;
423 c2 = *dstr2; 440 c2 = *dstr2;
424 if (casefold) { 441 if (casefold) {
425 if (!(c1 = case_fold(c1))) { 442 c1 = case_fold(c1);
443 if (!c1) {
426 dstr1++; 444 dstr1++;
427 dsize1--; 445 dsize1--;
428 continue; 446 continue;
429 } 447 }
430 if (!(c2 = case_fold(c2))) { 448 c2 = case_fold(c2);
449 if (!c2) {
431 dstr2++; 450 dstr2++;
432 dsize2--; 451 dsize2--;
433 continue; 452 continue;
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c
index 4ac88ff79aa6..10e515a0d452 100644
--- a/fs/hfsplus/wrapper.c
+++ b/fs/hfsplus/wrapper.c
@@ -31,25 +31,67 @@ static void hfsplus_end_io_sync(struct bio *bio, int err)
31 complete(bio->bi_private); 31 complete(bio->bi_private);
32} 32}
33 33
34int hfsplus_submit_bio(struct block_device *bdev, sector_t sector, 34/*
35 void *data, int rw) 35 * hfsplus_submit_bio - Perfrom block I/O
36 * @sb: super block of volume for I/O
37 * @sector: block to read or write, for blocks of HFSPLUS_SECTOR_SIZE bytes
38 * @buf: buffer for I/O
39 * @data: output pointer for location of requested data
40 * @rw: direction of I/O
41 *
42 * The unit of I/O is hfsplus_min_io_size(sb), which may be bigger than
43 * HFSPLUS_SECTOR_SIZE, and @buf must be sized accordingly. On reads
44 * @data will return a pointer to the start of the requested sector,
45 * which may not be the same location as @buf.
46 *
47 * If @sector is not aligned to the bdev logical block size it will
48 * be rounded down. For writes this means that @buf should contain data
49 * that starts at the rounded-down address. As long as the data was
50 * read using hfsplus_submit_bio() and the same buffer is used things
51 * will work correctly.
52 */
53int hfsplus_submit_bio(struct super_block *sb, sector_t sector,
54 void *buf, void **data, int rw)
36{ 55{
37 DECLARE_COMPLETION_ONSTACK(wait); 56 DECLARE_COMPLETION_ONSTACK(wait);
38 struct bio *bio; 57 struct bio *bio;
39 int ret = 0; 58 int ret = 0;
59 unsigned int io_size;
60 loff_t start;
61 int offset;
62
63 /*
64 * Align sector to hardware sector size and find offset. We
65 * assume that io_size is a power of two, which _should_
66 * be true.
67 */
68 io_size = hfsplus_min_io_size(sb);
69 start = (loff_t)sector << HFSPLUS_SECTOR_SHIFT;
70 offset = start & (io_size - 1);
71 sector &= ~((io_size >> HFSPLUS_SECTOR_SHIFT) - 1);
40 72
41 bio = bio_alloc(GFP_NOIO, 1); 73 bio = bio_alloc(GFP_NOIO, 1);
42 bio->bi_sector = sector; 74 bio->bi_sector = sector;
43 bio->bi_bdev = bdev; 75 bio->bi_bdev = sb->s_bdev;
44 bio->bi_end_io = hfsplus_end_io_sync; 76 bio->bi_end_io = hfsplus_end_io_sync;
45 bio->bi_private = &wait; 77 bio->bi_private = &wait;
46 78
47 /* 79 if (!(rw & WRITE) && data)
48 * We always submit one sector at a time, so bio_add_page must not fail. 80 *data = (u8 *)buf + offset;
49 */ 81
50 if (bio_add_page(bio, virt_to_page(data), HFSPLUS_SECTOR_SIZE, 82 while (io_size > 0) {
51 offset_in_page(data)) != HFSPLUS_SECTOR_SIZE) 83 unsigned int page_offset = offset_in_page(buf);
52 BUG(); 84 unsigned int len = min_t(unsigned int, PAGE_SIZE - page_offset,
85 io_size);
86
87 ret = bio_add_page(bio, virt_to_page(buf), len, page_offset);
88 if (ret != len) {
89 ret = -EIO;
90 goto out;
91 }
92 io_size -= len;
93 buf = (u8 *)buf + len;
94 }
53 95
54 submit_bio(rw, bio); 96 submit_bio(rw, bio);
55 wait_for_completion(&wait); 97 wait_for_completion(&wait);
@@ -57,8 +99,9 @@ int hfsplus_submit_bio(struct block_device *bdev, sector_t sector,
57 if (!bio_flagged(bio, BIO_UPTODATE)) 99 if (!bio_flagged(bio, BIO_UPTODATE))
58 ret = -EIO; 100 ret = -EIO;
59 101
102out:
60 bio_put(bio); 103 bio_put(bio);
61 return ret; 104 return ret < 0 ? ret : 0;
62} 105}
63 106
64static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd) 107static int hfsplus_read_mdb(void *bufptr, struct hfsplus_wd *wd)
@@ -141,23 +184,19 @@ int hfsplus_read_wrapper(struct super_block *sb)
141 184
142 if (hfsplus_get_last_session(sb, &part_start, &part_size)) 185 if (hfsplus_get_last_session(sb, &part_start, &part_size))
143 goto out; 186 goto out;
144 if ((u64)part_start + part_size > 0x100000000ULL) {
145 pr_err("hfs: volumes larger than 2TB are not supported yet\n");
146 goto out;
147 }
148 187
149 error = -ENOMEM; 188 error = -ENOMEM;
150 sbi->s_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); 189 sbi->s_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
151 if (!sbi->s_vhdr) 190 if (!sbi->s_vhdr_buf)
152 goto out; 191 goto out;
153 sbi->s_backup_vhdr = kmalloc(HFSPLUS_SECTOR_SIZE, GFP_KERNEL); 192 sbi->s_backup_vhdr_buf = kmalloc(hfsplus_min_io_size(sb), GFP_KERNEL);
154 if (!sbi->s_backup_vhdr) 193 if (!sbi->s_backup_vhdr_buf)
155 goto out_free_vhdr; 194 goto out_free_vhdr;
156 195
157reread: 196reread:
158 error = hfsplus_submit_bio(sb->s_bdev, 197 error = hfsplus_submit_bio(sb, part_start + HFSPLUS_VOLHEAD_SECTOR,
159 part_start + HFSPLUS_VOLHEAD_SECTOR, 198 sbi->s_vhdr_buf, (void **)&sbi->s_vhdr,
160 sbi->s_vhdr, READ); 199 READ);
161 if (error) 200 if (error)
162 goto out_free_backup_vhdr; 201 goto out_free_backup_vhdr;
163 202
@@ -172,8 +211,9 @@ reread:
172 if (!hfsplus_read_mdb(sbi->s_vhdr, &wd)) 211 if (!hfsplus_read_mdb(sbi->s_vhdr, &wd))
173 goto out_free_backup_vhdr; 212 goto out_free_backup_vhdr;
174 wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT; 213 wd.ablk_size >>= HFSPLUS_SECTOR_SHIFT;
175 part_start += wd.ablk_start + wd.embed_start * wd.ablk_size; 214 part_start += (sector_t)wd.ablk_start +
176 part_size = wd.embed_count * wd.ablk_size; 215 (sector_t)wd.embed_start * wd.ablk_size;
216 part_size = (sector_t)wd.embed_count * wd.ablk_size;
177 goto reread; 217 goto reread;
178 default: 218 default:
179 /* 219 /*
@@ -186,9 +226,9 @@ reread:
186 goto reread; 226 goto reread;
187 } 227 }
188 228
189 error = hfsplus_submit_bio(sb->s_bdev, 229 error = hfsplus_submit_bio(sb, part_start + part_size - 2,
190 part_start + part_size - 2, 230 sbi->s_backup_vhdr_buf,
191 sbi->s_backup_vhdr, READ); 231 (void **)&sbi->s_backup_vhdr, READ);
192 if (error) 232 if (error)
193 goto out_free_backup_vhdr; 233 goto out_free_backup_vhdr;
194 234
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 2638c834ed28..0d22afdd4611 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -362,9 +362,20 @@ retry:
362 return 0; 362 return 0;
363} 363}
364 364
365int hostfs_fsync(struct file *file, int datasync) 365int hostfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
366{ 366{
367 return fsync_file(HOSTFS_I(file->f_mapping->host)->fd, datasync); 367 struct inode *inode = file->f_mapping->host;
368 int ret;
369
370 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
371 if (ret)
372 return ret;
373
374 mutex_lock(&inode->i_mutex);
375 ret = fsync_file(HOSTFS_I(inode)->fd, datasync);
376 mutex_unlock(&inode->i_mutex);
377
378 return ret;
368} 379}
369 380
370static const struct file_operations hostfs_file_fops = { 381static const struct file_operations hostfs_file_fops = {
@@ -748,12 +759,12 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from,
748 return err; 759 return err;
749} 760}
750 761
751int hostfs_permission(struct inode *ino, int desired, unsigned int flags) 762int hostfs_permission(struct inode *ino, int desired)
752{ 763{
753 char *name; 764 char *name;
754 int r = 0, w = 0, x = 0, err; 765 int r = 0, w = 0, x = 0, err;
755 766
756 if (flags & IPERM_FLAG_RCU) 767 if (desired & MAY_NOT_BLOCK)
757 return -ECHILD; 768 return -ECHILD;
758 769
759 if (desired & MAY_READ) r = 1; 770 if (desired & MAY_READ) r = 1;
@@ -770,7 +781,7 @@ int hostfs_permission(struct inode *ino, int desired, unsigned int flags)
770 err = access_file(name, r, w, x); 781 err = access_file(name, r, w, x);
771 __putname(name); 782 __putname(name);
772 if (!err) 783 if (!err)
773 err = generic_permission(ino, desired, flags, NULL); 784 err = generic_permission(ino, desired);
774 return err; 785 return err;
775} 786}
776 787
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index f46ae025bfb5..96a8ed91cedd 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -29,6 +29,10 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence)
29 struct hpfs_inode_info *hpfs_inode = hpfs_i(i); 29 struct hpfs_inode_info *hpfs_inode = hpfs_i(i);
30 struct super_block *s = i->i_sb; 30 struct super_block *s = i->i_sb;
31 31
32 /* Somebody else will have to figure out what to do here */
33 if (whence == SEEK_DATA || whence == SEEK_HOLE)
34 return -EINVAL;
35
32 hpfs_lock(s); 36 hpfs_lock(s);
33 37
34 /*printk("dir lseek\n");*/ 38 /*printk("dir lseek\n");*/
diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index 89c500ee5213..89d2a5803ae3 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -18,9 +18,14 @@ static int hpfs_file_release(struct inode *inode, struct file *file)
18 return 0; 18 return 0;
19} 19}
20 20
21int hpfs_file_fsync(struct file *file, int datasync) 21int hpfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
22{ 22{
23 struct inode *inode = file->f_mapping->host; 23 struct inode *inode = file->f_mapping->host;
24 int ret;
25
26 ret = filemap_write_and_wait_range(file->f_mapping, start, end);
27 if (ret)
28 return ret;
24 return sync_blockdev(inode->i_sb->s_bdev); 29 return sync_blockdev(inode->i_sb->s_bdev);
25} 30}
26 31
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index dd552f862c8f..331b5e234ef3 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -258,7 +258,7 @@ void hpfs_set_ea(struct inode *, struct fnode *, const char *,
258 258
259/* file.c */ 259/* file.c */
260 260
261int hpfs_file_fsync(struct file *, int); 261int hpfs_file_fsync(struct file *, loff_t, loff_t, int);
262extern const struct file_operations hpfs_file_ops; 262extern const struct file_operations hpfs_file_ops;
263extern const struct inode_operations hpfs_file_iops; 263extern const struct inode_operations hpfs_file_iops;
264extern const struct address_space_operations hpfs_aops; 264extern const struct address_space_operations hpfs_aops;
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index acf95dab2aac..2df69e2f07cf 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -398,7 +398,7 @@ again:
398 hpfs_unlock(dir->i_sb); 398 hpfs_unlock(dir->i_sb);
399 return -ENOSPC; 399 return -ENOSPC;
400 } 400 }
401 if (generic_permission(inode, MAY_WRITE, 0, NULL) || 401 if (generic_permission(inode, MAY_WRITE) ||
402 !S_ISREG(inode->i_mode) || 402 !S_ISREG(inode->i_mode) ||
403 get_write_access(inode)) { 403 get_write_access(inode)) {
404 d_rehash(dentry); 404 d_rehash(dentry);
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 85c098a499f3..8635be5ffd97 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -573,9 +573,10 @@ static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
573 return err; 573 return err;
574} 574}
575 575
576static int hppfs_fsync(struct file *file, int datasync) 576static int hppfs_fsync(struct file *file, loff_t start, loff_t end,
577 int datasync)
577{ 578{
578 return 0; 579 return filemap_write_and_wait_range(file->f_mapping, start, end);
579} 580}
580 581
581static const struct file_operations hppfs_dir_fops = { 582static const struct file_operations hppfs_dir_fops = {
diff --git a/fs/inode.c b/fs/inode.c
index 43566d17d1b8..96c77b81167c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -33,8 +33,8 @@
33 * 33 *
34 * inode->i_lock protects: 34 * inode->i_lock protects:
35 * inode->i_state, inode->i_hash, __iget() 35 * inode->i_state, inode->i_hash, __iget()
36 * inode_lru_lock protects: 36 * inode->i_sb->s_inode_lru_lock protects:
37 * inode_lru, inode->i_lru 37 * inode->i_sb->s_inode_lru, inode->i_lru
38 * inode_sb_list_lock protects: 38 * inode_sb_list_lock protects:
39 * sb->s_inodes, inode->i_sb_list 39 * sb->s_inodes, inode->i_sb_list
40 * inode_wb_list_lock protects: 40 * inode_wb_list_lock protects:
@@ -46,7 +46,7 @@
46 * 46 *
47 * inode_sb_list_lock 47 * inode_sb_list_lock
48 * inode->i_lock 48 * inode->i_lock
49 * inode_lru_lock 49 * inode->i_sb->s_inode_lru_lock
50 * 50 *
51 * inode_wb_list_lock 51 * inode_wb_list_lock
52 * inode->i_lock 52 * inode->i_lock
@@ -64,24 +64,10 @@ static unsigned int i_hash_shift __read_mostly;
64static struct hlist_head *inode_hashtable __read_mostly; 64static struct hlist_head *inode_hashtable __read_mostly;
65static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); 65static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
66 66
67static LIST_HEAD(inode_lru);
68static DEFINE_SPINLOCK(inode_lru_lock);
69
70__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); 67__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
71__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); 68__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock);
72 69
73/* 70/*
74 * iprune_sem provides exclusion between the icache shrinking and the
75 * umount path.
76 *
77 * We don't actually need it to protect anything in the umount path,
78 * but only need to cycle through it to make sure any inode that
79 * prune_icache took off the LRU list has been fully torn down by the
80 * time we are past evict_inodes.
81 */
82static DECLARE_RWSEM(iprune_sem);
83
84/*
85 * Empty aops. Can be used for the cases where the user does not 71 * Empty aops. Can be used for the cases where the user does not
86 * define any of the address_space operations. 72 * define any of the address_space operations.
87 */ 73 */
@@ -95,6 +81,7 @@ EXPORT_SYMBOL(empty_aops);
95struct inodes_stat_t inodes_stat; 81struct inodes_stat_t inodes_stat;
96 82
97static DEFINE_PER_CPU(unsigned int, nr_inodes); 83static DEFINE_PER_CPU(unsigned int, nr_inodes);
84static DEFINE_PER_CPU(unsigned int, nr_unused);
98 85
99static struct kmem_cache *inode_cachep __read_mostly; 86static struct kmem_cache *inode_cachep __read_mostly;
100 87
@@ -109,7 +96,11 @@ static int get_nr_inodes(void)
109 96
110static inline int get_nr_inodes_unused(void) 97static inline int get_nr_inodes_unused(void)
111{ 98{
112 return inodes_stat.nr_unused; 99 int i;
100 int sum = 0;
101 for_each_possible_cpu(i)
102 sum += per_cpu(nr_unused, i);
103 return sum < 0 ? 0 : sum;
113} 104}
114 105
115int get_nr_dirty_inodes(void) 106int get_nr_dirty_inodes(void)
@@ -127,6 +118,7 @@ int proc_nr_inodes(ctl_table *table, int write,
127 void __user *buffer, size_t *lenp, loff_t *ppos) 118 void __user *buffer, size_t *lenp, loff_t *ppos)
128{ 119{
129 inodes_stat.nr_inodes = get_nr_inodes(); 120 inodes_stat.nr_inodes = get_nr_inodes();
121 inodes_stat.nr_unused = get_nr_inodes_unused();
130 return proc_dointvec(table, write, buffer, lenp, ppos); 122 return proc_dointvec(table, write, buffer, lenp, ppos);
131} 123}
132#endif 124#endif
@@ -176,8 +168,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
176 mutex_init(&inode->i_mutex); 168 mutex_init(&inode->i_mutex);
177 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); 169 lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key);
178 170
179 init_rwsem(&inode->i_alloc_sem); 171 atomic_set(&inode->i_dio_count, 0);
180 lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key);
181 172
182 mapping->a_ops = &empty_aops; 173 mapping->a_ops = &empty_aops;
183 mapping->host = inode; 174 mapping->host = inode;
@@ -337,22 +328,24 @@ EXPORT_SYMBOL(ihold);
337 328
338static void inode_lru_list_add(struct inode *inode) 329static void inode_lru_list_add(struct inode *inode)
339{ 330{
340 spin_lock(&inode_lru_lock); 331 spin_lock(&inode->i_sb->s_inode_lru_lock);
341 if (list_empty(&inode->i_lru)) { 332 if (list_empty(&inode->i_lru)) {
342 list_add(&inode->i_lru, &inode_lru); 333 list_add(&inode->i_lru, &inode->i_sb->s_inode_lru);
343 inodes_stat.nr_unused++; 334 inode->i_sb->s_nr_inodes_unused++;
335 this_cpu_inc(nr_unused);
344 } 336 }
345 spin_unlock(&inode_lru_lock); 337 spin_unlock(&inode->i_sb->s_inode_lru_lock);
346} 338}
347 339
348static void inode_lru_list_del(struct inode *inode) 340static void inode_lru_list_del(struct inode *inode)
349{ 341{
350 spin_lock(&inode_lru_lock); 342 spin_lock(&inode->i_sb->s_inode_lru_lock);
351 if (!list_empty(&inode->i_lru)) { 343 if (!list_empty(&inode->i_lru)) {
352 list_del_init(&inode->i_lru); 344 list_del_init(&inode->i_lru);
353 inodes_stat.nr_unused--; 345 inode->i_sb->s_nr_inodes_unused--;
346 this_cpu_dec(nr_unused);
354 } 347 }
355 spin_unlock(&inode_lru_lock); 348 spin_unlock(&inode->i_sb->s_inode_lru_lock);
356} 349}
357 350
358/** 351/**
@@ -537,14 +530,6 @@ void evict_inodes(struct super_block *sb)
537 spin_unlock(&inode_sb_list_lock); 530 spin_unlock(&inode_sb_list_lock);
538 531
539 dispose_list(&dispose); 532 dispose_list(&dispose);
540
541 /*
542 * Cycle through iprune_sem to make sure any inode that prune_icache
543 * moved off the list before we took the lock has been fully torn
544 * down.
545 */
546 down_write(&iprune_sem);
547 up_write(&iprune_sem);
548} 533}
549 534
550/** 535/**
@@ -607,8 +592,10 @@ static int can_unuse(struct inode *inode)
607} 592}
608 593
609/* 594/*
610 * Scan `goal' inodes on the unused list for freeable ones. They are moved to a 595 * Walk the superblock inode LRU for freeable inodes and attempt to free them.
611 * temporary list and then are freed outside inode_lru_lock by dispose_list(). 596 * This is called from the superblock shrinker function with a number of inodes
597 * to trim from the LRU. Inodes to be freed are moved to a temporary list and
598 * then are freed outside inode_lock by dispose_list().
612 * 599 *
613 * Any inodes which are pinned purely because of attached pagecache have their 600 * Any inodes which are pinned purely because of attached pagecache have their
614 * pagecache removed. If the inode has metadata buffers attached to 601 * pagecache removed. If the inode has metadata buffers attached to
@@ -622,29 +609,28 @@ static int can_unuse(struct inode *inode)
622 * LRU does not have strict ordering. Hence we don't want to reclaim inodes 609 * LRU does not have strict ordering. Hence we don't want to reclaim inodes
623 * with this flag set because they are the inodes that are out of order. 610 * with this flag set because they are the inodes that are out of order.
624 */ 611 */
625static void prune_icache(int nr_to_scan) 612void prune_icache_sb(struct super_block *sb, int nr_to_scan)
626{ 613{
627 LIST_HEAD(freeable); 614 LIST_HEAD(freeable);
628 int nr_scanned; 615 int nr_scanned;
629 unsigned long reap = 0; 616 unsigned long reap = 0;
630 617
631 down_read(&iprune_sem); 618 spin_lock(&sb->s_inode_lru_lock);
632 spin_lock(&inode_lru_lock); 619 for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) {
633 for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
634 struct inode *inode; 620 struct inode *inode;
635 621
636 if (list_empty(&inode_lru)) 622 if (list_empty(&sb->s_inode_lru))
637 break; 623 break;
638 624
639 inode = list_entry(inode_lru.prev, struct inode, i_lru); 625 inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru);
640 626
641 /* 627 /*
642 * we are inverting the inode_lru_lock/inode->i_lock here, 628 * we are inverting the sb->s_inode_lru_lock/inode->i_lock here,
643 * so use a trylock. If we fail to get the lock, just move the 629 * so use a trylock. If we fail to get the lock, just move the
644 * inode to the back of the list so we don't spin on it. 630 * inode to the back of the list so we don't spin on it.
645 */ 631 */
646 if (!spin_trylock(&inode->i_lock)) { 632 if (!spin_trylock(&inode->i_lock)) {
647 list_move(&inode->i_lru, &inode_lru); 633 list_move(&inode->i_lru, &sb->s_inode_lru);
648 continue; 634 continue;
649 } 635 }
650 636
@@ -656,28 +642,29 @@ static void prune_icache(int nr_to_scan)
656 (inode->i_state & ~I_REFERENCED)) { 642 (inode->i_state & ~I_REFERENCED)) {
657 list_del_init(&inode->i_lru); 643 list_del_init(&inode->i_lru);
658 spin_unlock(&inode->i_lock); 644 spin_unlock(&inode->i_lock);
659 inodes_stat.nr_unused--; 645 sb->s_nr_inodes_unused--;
646 this_cpu_dec(nr_unused);
660 continue; 647 continue;
661 } 648 }
662 649
663 /* recently referenced inodes get one more pass */ 650 /* recently referenced inodes get one more pass */
664 if (inode->i_state & I_REFERENCED) { 651 if (inode->i_state & I_REFERENCED) {
665 inode->i_state &= ~I_REFERENCED; 652 inode->i_state &= ~I_REFERENCED;
666 list_move(&inode->i_lru, &inode_lru); 653 list_move(&inode->i_lru, &sb->s_inode_lru);
667 spin_unlock(&inode->i_lock); 654 spin_unlock(&inode->i_lock);
668 continue; 655 continue;
669 } 656 }
670 if (inode_has_buffers(inode) || inode->i_data.nrpages) { 657 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
671 __iget(inode); 658 __iget(inode);
672 spin_unlock(&inode->i_lock); 659 spin_unlock(&inode->i_lock);
673 spin_unlock(&inode_lru_lock); 660 spin_unlock(&sb->s_inode_lru_lock);
674 if (remove_inode_buffers(inode)) 661 if (remove_inode_buffers(inode))
675 reap += invalidate_mapping_pages(&inode->i_data, 662 reap += invalidate_mapping_pages(&inode->i_data,
676 0, -1); 663 0, -1);
677 iput(inode); 664 iput(inode);
678 spin_lock(&inode_lru_lock); 665 spin_lock(&sb->s_inode_lru_lock);
679 666
680 if (inode != list_entry(inode_lru.next, 667 if (inode != list_entry(sb->s_inode_lru.next,
681 struct inode, i_lru)) 668 struct inode, i_lru))
682 continue; /* wrong inode or list_empty */ 669 continue; /* wrong inode or list_empty */
683 /* avoid lock inversions with trylock */ 670 /* avoid lock inversions with trylock */
@@ -693,51 +680,18 @@ static void prune_icache(int nr_to_scan)
693 spin_unlock(&inode->i_lock); 680 spin_unlock(&inode->i_lock);
694 681
695 list_move(&inode->i_lru, &freeable); 682 list_move(&inode->i_lru, &freeable);
696 inodes_stat.nr_unused--; 683 sb->s_nr_inodes_unused--;
684 this_cpu_dec(nr_unused);
697 } 685 }
698 if (current_is_kswapd()) 686 if (current_is_kswapd())
699 __count_vm_events(KSWAPD_INODESTEAL, reap); 687 __count_vm_events(KSWAPD_INODESTEAL, reap);
700 else 688 else
701 __count_vm_events(PGINODESTEAL, reap); 689 __count_vm_events(PGINODESTEAL, reap);
702 spin_unlock(&inode_lru_lock); 690 spin_unlock(&sb->s_inode_lru_lock);
703 691
704 dispose_list(&freeable); 692 dispose_list(&freeable);
705 up_read(&iprune_sem);
706} 693}
707 694
708/*
709 * shrink_icache_memory() will attempt to reclaim some unused inodes. Here,
710 * "unused" means that no dentries are referring to the inodes: the files are
711 * not open and the dcache references to those inodes have already been
712 * reclaimed.
713 *
714 * This function is passed the number of inodes to scan, and it returns the
715 * total number of remaining possibly-reclaimable inodes.
716 */
717static int shrink_icache_memory(struct shrinker *shrink,
718 struct shrink_control *sc)
719{
720 int nr = sc->nr_to_scan;
721 gfp_t gfp_mask = sc->gfp_mask;
722
723 if (nr) {
724 /*
725 * Nasty deadlock avoidance. We may hold various FS locks,
726 * and we don't want to recurse into the FS that called us
727 * in clear_inode() and friends..
728 */
729 if (!(gfp_mask & __GFP_FS))
730 return -1;
731 prune_icache(nr);
732 }
733 return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
734}
735
736static struct shrinker icache_shrinker = {
737 .shrink = shrink_icache_memory,
738 .seeks = DEFAULT_SEEKS,
739};
740
741static void __wait_on_freeing_inode(struct inode *inode); 695static void __wait_on_freeing_inode(struct inode *inode);
742/* 696/*
743 * Called with the inode lock held. 697 * Called with the inode lock held.
@@ -1331,7 +1285,7 @@ static void iput_final(struct inode *inode)
1331 1285
1332 WARN_ON(inode->i_state & I_NEW); 1286 WARN_ON(inode->i_state & I_NEW);
1333 1287
1334 if (op && op->drop_inode) 1288 if (op->drop_inode)
1335 drop = op->drop_inode(inode); 1289 drop = op->drop_inode(inode);
1336 else 1290 else
1337 drop = generic_drop_inode(inode); 1291 drop = generic_drop_inode(inode);
@@ -1617,7 +1571,6 @@ void __init inode_init(void)
1617 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC| 1571 (SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|
1618 SLAB_MEM_SPREAD), 1572 SLAB_MEM_SPREAD),
1619 init_once); 1573 init_once);
1620 register_shrinker(&icache_shrinker);
1621 1574
1622 /* Hash may have been set up in inode_init_early */ 1575 /* Hash may have been set up in inode_init_early */
1623 if (!hashdist) 1576 if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h
index b29c46e4e32f..fe327c20af83 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -97,6 +97,7 @@ extern struct file *get_empty_filp(void);
97 * super.c 97 * super.c
98 */ 98 */
99extern int do_remount_sb(struct super_block *, int, void *, int); 99extern int do_remount_sb(struct super_block *, int, void *, int);
100extern bool grab_super_passive(struct super_block *sb);
100extern void __put_super(struct super_block *sb); 101extern void __put_super(struct super_block *sb);
101extern void put_super(struct super_block *sb); 102extern void put_super(struct super_block *sb);
102extern struct dentry *mount_fs(struct file_system_type *, 103extern struct dentry *mount_fs(struct file_system_type *,
@@ -135,3 +136,8 @@ extern void inode_wb_list_del(struct inode *inode);
135extern int get_nr_dirty_inodes(void); 136extern int get_nr_dirty_inodes(void);
136extern void evict_inodes(struct super_block *); 137extern void evict_inodes(struct super_block *);
137extern int invalidate_inodes(struct super_block *, bool); 138extern int invalidate_inodes(struct super_block *, bool);
139
140/*
141 * dcache.c
142 */
143extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 0542b6eedf80..f20437c068a0 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -254,19 +254,16 @@ static int isofs_readdir(struct file *filp,
254 char *tmpname; 254 char *tmpname;
255 struct iso_directory_record *tmpde; 255 struct iso_directory_record *tmpde;
256 struct inode *inode = filp->f_path.dentry->d_inode; 256 struct inode *inode = filp->f_path.dentry->d_inode;
257 struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb);
258 257
259 tmpname = (char *)__get_free_page(GFP_KERNEL); 258 tmpname = (char *)__get_free_page(GFP_KERNEL);
260 if (tmpname == NULL) 259 if (tmpname == NULL)
261 return -ENOMEM; 260 return -ENOMEM;
262 261
263 mutex_lock(&sbi->s_mutex);
264 tmpde = (struct iso_directory_record *) (tmpname+1024); 262 tmpde = (struct iso_directory_record *) (tmpname+1024);
265 263
266 result = do_isofs_readdir(inode, filp, dirent, filldir, tmpname, tmpde); 264 result = do_isofs_readdir(inode, filp, dirent, filldir, tmpname, tmpde);
267 265
268 free_page((unsigned long) tmpname); 266 free_page((unsigned long) tmpname);
269 mutex_unlock(&sbi->s_mutex);
270 return result; 267 return result;
271} 268}
272 269
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index b3cc8586984e..a5d03672d04e 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -863,7 +863,6 @@ root_found:
863 sbi->s_utf8 = opt.utf8; 863 sbi->s_utf8 = opt.utf8;
864 sbi->s_nocompress = opt.nocompress; 864 sbi->s_nocompress = opt.nocompress;
865 sbi->s_overriderockperm = opt.overriderockperm; 865 sbi->s_overriderockperm = opt.overriderockperm;
866 mutex_init(&sbi->s_mutex);
867 /* 866 /*
868 * It would be incredibly stupid to allow people to mark every file 867 * It would be incredibly stupid to allow people to mark every file
869 * on the disk as suid, so we merely allow them to set the default 868 * on the disk as suid, so we merely allow them to set the default
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 2882dc089f87..7d33de84f52a 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -55,7 +55,6 @@ struct isofs_sb_info {
55 gid_t s_gid; 55 gid_t s_gid;
56 uid_t s_uid; 56 uid_t s_uid;
57 struct nls_table *s_nls_iocharset; /* Native language support table */ 57 struct nls_table *s_nls_iocharset; /* Native language support table */
58 struct mutex s_mutex; /* replaces BKL, please remove if possible */
59}; 58};
60 59
61#define ISOFS_INVALID_MODE ((mode_t) -1) 60#define ISOFS_INVALID_MODE ((mode_t) -1)
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 4fb3e8074fd4..1e2946f2a69e 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -168,7 +168,6 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam
168 int found; 168 int found;
169 unsigned long uninitialized_var(block); 169 unsigned long uninitialized_var(block);
170 unsigned long uninitialized_var(offset); 170 unsigned long uninitialized_var(offset);
171 struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb);
172 struct inode *inode; 171 struct inode *inode;
173 struct page *page; 172 struct page *page;
174 173
@@ -176,21 +175,13 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam
176 if (!page) 175 if (!page)
177 return ERR_PTR(-ENOMEM); 176 return ERR_PTR(-ENOMEM);
178 177
179 mutex_lock(&sbi->s_mutex);
180 found = isofs_find_entry(dir, dentry, 178 found = isofs_find_entry(dir, dentry,
181 &block, &offset, 179 &block, &offset,
182 page_address(page), 180 page_address(page),
183 1024 + page_address(page)); 181 1024 + page_address(page));
184 __free_page(page); 182 __free_page(page);
185 183
186 inode = NULL; 184 inode = found ? isofs_iget(dir->i_sb, block, offset) : NULL;
187 if (found) { 185
188 inode = isofs_iget(dir->i_sb, block, offset);
189 if (IS_ERR(inode)) {
190 mutex_unlock(&sbi->s_mutex);
191 return ERR_CAST(inode);
192 }
193 }
194 mutex_unlock(&sbi->s_mutex);
195 return d_splice_alias(inode, dentry); 186 return d_splice_alias(inode, dentry);
196} 187}
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index f9cd04db6eab..1fbc7de88f50 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -678,7 +678,6 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
678 678
679 init_rock_state(&rs, inode); 679 init_rock_state(&rs, inode);
680 block = ei->i_iget5_block; 680 block = ei->i_iget5_block;
681 mutex_lock(&sbi->s_mutex);
682 bh = sb_bread(inode->i_sb, block); 681 bh = sb_bread(inode->i_sb, block);
683 if (!bh) 682 if (!bh)
684 goto out_noread; 683 goto out_noread;
@@ -748,7 +747,6 @@ repeat:
748 goto fail; 747 goto fail;
749 brelse(bh); 748 brelse(bh);
750 *rpnt = '\0'; 749 *rpnt = '\0';
751 mutex_unlock(&sbi->s_mutex);
752 SetPageUptodate(page); 750 SetPageUptodate(page);
753 kunmap(page); 751 kunmap(page);
754 unlock_page(page); 752 unlock_page(page);
@@ -765,7 +763,6 @@ out_bad_span:
765 printk("symlink spans iso9660 blocks\n"); 763 printk("symlink spans iso9660 blocks\n");
766fail: 764fail:
767 brelse(bh); 765 brelse(bh);
768 mutex_unlock(&sbi->s_mutex);
769error: 766error:
770 SetPageError(page); 767 SetPageError(page);
771 kunmap(page); 768 kunmap(page);
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 828a0e1ea438..3675b3cdee89 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -259,12 +259,12 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
259 return rc; 259 return rc;
260} 260}
261 261
262int jffs2_check_acl(struct inode *inode, int mask, unsigned int flags) 262int jffs2_check_acl(struct inode *inode, int mask)
263{ 263{
264 struct posix_acl *acl; 264 struct posix_acl *acl;
265 int rc; 265 int rc;
266 266
267 if (flags & IPERM_FLAG_RCU) 267 if (mask & MAY_NOT_BLOCK)
268 return -ECHILD; 268 return -ECHILD;
269 269
270 acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS); 270 acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS);
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 3119f59253d3..5e42de8d9541 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,7 +26,7 @@ struct jffs2_acl_header {
26 26
27#ifdef CONFIG_JFFS2_FS_POSIX_ACL 27#ifdef CONFIG_JFFS2_FS_POSIX_ACL
28 28
29extern int jffs2_check_acl(struct inode *, int, unsigned int); 29extern int jffs2_check_acl(struct inode *, int);
30extern int jffs2_acl_chmod(struct inode *); 30extern int jffs2_acl_chmod(struct inode *);
31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); 31extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
32extern int jffs2_init_acl_post(struct inode *); 32extern int jffs2_init_acl_post(struct inode *);
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 4bca6a2e5c07..5f243cd63afc 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -102,10 +102,8 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
102 mutex_unlock(&dir_f->sem); 102 mutex_unlock(&dir_f->sem);
103 if (ino) { 103 if (ino) {
104 inode = jffs2_iget(dir_i->i_sb, ino); 104 inode = jffs2_iget(dir_i->i_sb, ino);
105 if (IS_ERR(inode)) { 105 if (IS_ERR(inode))
106 printk(KERN_WARNING "iget() failed for ino #%u\n", ino); 106 printk(KERN_WARNING "iget() failed for ino #%u\n", ino);
107 return ERR_CAST(inode);
108 }
109 } 107 }
110 108
111 return d_splice_alias(inode, target); 109 return d_splice_alias(inode, target);
@@ -822,7 +820,10 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
822 820
823 if (victim_f) { 821 if (victim_f) {
824 /* There was a victim. Kill it off nicely */ 822 /* There was a victim. Kill it off nicely */
825 drop_nlink(new_dentry->d_inode); 823 if (S_ISDIR(new_dentry->d_inode->i_mode))
824 clear_nlink(new_dentry->d_inode);
825 else
826 drop_nlink(new_dentry->d_inode);
826 /* Don't oops if the victim was a dirent pointing to an 827 /* Don't oops if the victim was a dirent pointing to an
827 inode which didn't exist. */ 828 inode which didn't exist. */
828 if (victim_f->inocache) { 829 if (victim_f->inocache) {
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 1c0a08d711aa..3989f7e09f7f 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -27,13 +27,20 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
27 struct page **pagep, void **fsdata); 27 struct page **pagep, void **fsdata);
28static int jffs2_readpage (struct file *filp, struct page *pg); 28static int jffs2_readpage (struct file *filp, struct page *pg);
29 29
30int jffs2_fsync(struct file *filp, int datasync) 30int jffs2_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
31{ 31{
32 struct inode *inode = filp->f_mapping->host; 32 struct inode *inode = filp->f_mapping->host;
33 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb); 33 struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
34 int ret;
35
36 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
37 if (ret)
38 return ret;
34 39
40 mutex_lock(&inode->i_mutex);
35 /* Trigger GC to flush any pending writes for this inode */ 41 /* Trigger GC to flush any pending writes for this inode */
36 jffs2_flush_wbuf_gc(c, inode->i_ino); 42 jffs2_flush_wbuf_gc(c, inode->i_ino);
43 mutex_unlock(&inode->i_mutex);
37 44
38 return 0; 45 return 0;
39} 46}
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 65c6c43ca482..9c252835e8e5 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -158,7 +158,7 @@ extern const struct inode_operations jffs2_dir_inode_operations;
158extern const struct file_operations jffs2_file_operations; 158extern const struct file_operations jffs2_file_operations;
159extern const struct inode_operations jffs2_file_inode_operations; 159extern const struct inode_operations jffs2_file_inode_operations;
160extern const struct address_space_operations jffs2_file_address_operations; 160extern const struct address_space_operations jffs2_file_address_operations;
161int jffs2_fsync(struct file *, int); 161int jffs2_fsync(struct file *, loff_t, loff_t, int);
162int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); 162int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
163 163
164/* ioctl.c */ 164/* ioctl.c */
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index e5de9422fa32..8a0a0666d5a6 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -114,11 +114,11 @@ out:
114 return rc; 114 return rc;
115} 115}
116 116
117int jfs_check_acl(struct inode *inode, int mask, unsigned int flags) 117int jfs_check_acl(struct inode *inode, int mask)
118{ 118{
119 struct posix_acl *acl; 119 struct posix_acl *acl;
120 120
121 if (flags & IPERM_FLAG_RCU) 121 if (mask & MAY_NOT_BLOCK)
122 return -ECHILD; 122 return -ECHILD;
123 123
124 acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); 124 acl = jfs_get_acl(inode, ACL_TYPE_ACCESS);
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 2f3f531f3606..7527855b5cc6 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -28,19 +28,26 @@
28#include "jfs_acl.h" 28#include "jfs_acl.h"
29#include "jfs_debug.h" 29#include "jfs_debug.h"
30 30
31int jfs_fsync(struct file *file, int datasync) 31int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
32{ 32{
33 struct inode *inode = file->f_mapping->host; 33 struct inode *inode = file->f_mapping->host;
34 int rc = 0; 34 int rc = 0;
35 35
36 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
37 if (rc)
38 return rc;
39
40 mutex_lock(&inode->i_mutex);
36 if (!(inode->i_state & I_DIRTY) || 41 if (!(inode->i_state & I_DIRTY) ||
37 (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { 42 (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
38 /* Make sure committed changes hit the disk */ 43 /* Make sure committed changes hit the disk */
39 jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); 44 jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
45 mutex_unlock(&inode->i_mutex);
40 return rc; 46 return rc;
41 } 47 }
42 48
43 rc |= jfs_commit_inode(inode, 1); 49 rc |= jfs_commit_inode(inode, 1);
50 mutex_unlock(&inode->i_mutex);
44 51
45 return rc ? -EIO : 0; 52 return rc ? -EIO : 0;
46} 53}
@@ -110,6 +117,8 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
110 117
111 if ((iattr->ia_valid & ATTR_SIZE) && 118 if ((iattr->ia_valid & ATTR_SIZE) &&
112 iattr->ia_size != i_size_read(inode)) { 119 iattr->ia_size != i_size_read(inode)) {
120 inode_dio_wait(inode);
121
113 rc = vmtruncate(inode, iattr->ia_size); 122 rc = vmtruncate(inode, iattr->ia_size);
114 if (rc) 123 if (rc)
115 return rc; 124 return rc;
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 109655904bbc..77b69b27f825 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -329,8 +329,8 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb,
329 struct inode *inode = file->f_mapping->host; 329 struct inode *inode = file->f_mapping->host;
330 ssize_t ret; 330 ssize_t ret;
331 331
332 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 332 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
333 offset, nr_segs, jfs_get_block, NULL); 333 jfs_get_block);
334 334
335 /* 335 /*
336 * In case of error extending write may have instantiated a few 336 * In case of error extending write may have instantiated a few
diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h
index f9285c4900fa..54e07559878d 100644
--- a/fs/jfs/jfs_acl.h
+++ b/fs/jfs/jfs_acl.h
@@ -20,7 +20,7 @@
20 20
21#ifdef CONFIG_JFS_POSIX_ACL 21#ifdef CONFIG_JFS_POSIX_ACL
22 22
23int jfs_check_acl(struct inode *, int, unsigned int flags); 23int jfs_check_acl(struct inode *, int);
24int jfs_init_acl(tid_t, struct inode *, struct inode *); 24int jfs_init_acl(tid_t, struct inode *, struct inode *);
25int jfs_acl_chmod(struct inode *inode); 25int jfs_acl_chmod(struct inode *inode);
26 26
diff --git a/fs/jfs/jfs_inode.h b/fs/jfs/jfs_inode.h
index ec2fb8b945fc..9271cfe4a149 100644
--- a/fs/jfs/jfs_inode.h
+++ b/fs/jfs/jfs_inode.h
@@ -21,7 +21,7 @@
21struct fid; 21struct fid;
22 22
23extern struct inode *ialloc(struct inode *, umode_t); 23extern struct inode *ialloc(struct inode *, umode_t);
24extern int jfs_fsync(struct file *, int); 24extern int jfs_fsync(struct file *, loff_t, loff_t, int);
25extern long jfs_ioctl(struct file *, unsigned int, unsigned long); 25extern long jfs_ioctl(struct file *, unsigned int, unsigned long);
26extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long); 26extern long jfs_compat_ioctl(struct file *, unsigned int, unsigned long);
27extern struct inode *jfs_iget(struct super_block *, unsigned long); 27extern struct inode *jfs_iget(struct super_block *, unsigned long);
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index eaaf2b511e89..03787ef6a118 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -1456,34 +1456,23 @@ static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struc
1456 ino_t inum; 1456 ino_t inum;
1457 struct inode *ip; 1457 struct inode *ip;
1458 struct component_name key; 1458 struct component_name key;
1459 const char *name = dentry->d_name.name;
1460 int len = dentry->d_name.len;
1461 int rc; 1459 int rc;
1462 1460
1463 jfs_info("jfs_lookup: name = %s", name); 1461 jfs_info("jfs_lookup: name = %s", dentry->d_name.name);
1464 1462
1465 if ((name[0] == '.') && (len == 1)) 1463 if ((rc = get_UCSname(&key, dentry)))
1466 inum = dip->i_ino; 1464 return ERR_PTR(rc);
1467 else if (strcmp(name, "..") == 0) 1465 rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP);
1468 inum = PARENT(dip); 1466 free_UCSname(&key);
1469 else { 1467 if (rc == -ENOENT) {
1470 if ((rc = get_UCSname(&key, dentry))) 1468 ip = NULL;
1471 return ERR_PTR(rc); 1469 } else if (rc) {
1472 rc = dtSearch(dip, &key, &inum, &btstack, JFS_LOOKUP); 1470 jfs_err("jfs_lookup: dtSearch returned %d", rc);
1473 free_UCSname(&key); 1471 ip = ERR_PTR(rc);
1474 if (rc == -ENOENT) { 1472 } else {
1475 d_add(dentry, NULL); 1473 ip = jfs_iget(dip->i_sb, inum);
1476 return NULL; 1474 if (IS_ERR(ip))
1477 } else if (rc) { 1475 jfs_err("jfs_lookup: iget failed on inum %d", (uint)inum);
1478 jfs_err("jfs_lookup: dtSearch returned %d", rc);
1479 return ERR_PTR(rc);
1480 }
1481 }
1482
1483 ip = jfs_iget(dip->i_sb, inum);
1484 if (IS_ERR(ip)) {
1485 jfs_err("jfs_lookup: iget failed on inum %d", (uint) inum);
1486 return ERR_CAST(ip);
1487 } 1476 }
1488 1477
1489 return d_splice_alias(ip, dentry); 1478 return d_splice_alias(ip, dentry);
@@ -1597,8 +1586,6 @@ out:
1597 1586
1598static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) 1587static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1599{ 1588{
1600 if (nd && nd->flags & LOOKUP_RCU)
1601 return -ECHILD;
1602 /* 1589 /*
1603 * This is not negative dentry. Always valid. 1590 * This is not negative dentry. Always valid.
1604 * 1591 *
@@ -1624,10 +1611,8 @@ static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd)
1624 * case sensitive name which is specified by user if this is 1611 * case sensitive name which is specified by user if this is
1625 * for creation. 1612 * for creation.
1626 */ 1613 */
1627 if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { 1614 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
1628 if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) 1615 return 0;
1629 return 0;
1630 }
1631 return 1; 1616 return 1;
1632} 1617}
1633 1618
diff --git a/fs/libfs.c b/fs/libfs.c
index 275ca4749a2e..c18e9a1235b6 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -16,6 +16,8 @@
16 16
17#include <asm/uaccess.h> 17#include <asm/uaccess.h>
18 18
19#include "internal.h"
20
19static inline int simple_positive(struct dentry *dentry) 21static inline int simple_positive(struct dentry *dentry)
20{ 22{
21 return dentry->d_inode && !d_unhashed(dentry); 23 return dentry->d_inode && !d_unhashed(dentry);
@@ -246,13 +248,11 @@ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
246 root->i_ino = 1; 248 root->i_ino = 1;
247 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR; 249 root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
248 root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME; 250 root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
249 dentry = d_alloc(NULL, &d_name); 251 dentry = __d_alloc(s, &d_name);
250 if (!dentry) { 252 if (!dentry) {
251 iput(root); 253 iput(root);
252 goto Enomem; 254 goto Enomem;
253 } 255 }
254 dentry->d_sb = s;
255 dentry->d_parent = dentry;
256 d_instantiate(dentry, root); 256 d_instantiate(dentry, root);
257 s->s_root = dentry; 257 s->s_root = dentry;
258 s->s_d_op = dops; 258 s->s_d_op = dops;
@@ -328,8 +328,10 @@ int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
328 328
329 if (new_dentry->d_inode) { 329 if (new_dentry->d_inode) {
330 simple_unlink(new_dir, new_dentry); 330 simple_unlink(new_dir, new_dentry);
331 if (they_are_dirs) 331 if (they_are_dirs) {
332 drop_nlink(new_dentry->d_inode);
332 drop_nlink(old_dir); 333 drop_nlink(old_dir);
334 }
333 } else if (they_are_dirs) { 335 } else if (they_are_dirs) {
334 drop_nlink(old_dir); 336 drop_nlink(old_dir);
335 inc_nlink(new_dir); 337 inc_nlink(new_dir);
@@ -905,21 +907,29 @@ EXPORT_SYMBOL_GPL(generic_fh_to_parent);
905 * filesystems which track all non-inode metadata in the buffers list 907 * filesystems which track all non-inode metadata in the buffers list
906 * hanging off the address_space structure. 908 * hanging off the address_space structure.
907 */ 909 */
908int generic_file_fsync(struct file *file, int datasync) 910int generic_file_fsync(struct file *file, loff_t start, loff_t end,
911 int datasync)
909{ 912{
910 struct inode *inode = file->f_mapping->host; 913 struct inode *inode = file->f_mapping->host;
911 int err; 914 int err;
912 int ret; 915 int ret;
913 916
917 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
918 if (err)
919 return err;
920
921 mutex_lock(&inode->i_mutex);
914 ret = sync_mapping_buffers(inode->i_mapping); 922 ret = sync_mapping_buffers(inode->i_mapping);
915 if (!(inode->i_state & I_DIRTY)) 923 if (!(inode->i_state & I_DIRTY))
916 return ret; 924 goto out;
917 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 925 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
918 return ret; 926 goto out;
919 927
920 err = sync_inode_metadata(inode, 1); 928 err = sync_inode_metadata(inode, 1);
921 if (ret == 0) 929 if (ret == 0)
922 ret = err; 930 ret = err;
931out:
932 mutex_unlock(&inode->i_mutex);
923 return ret; 933 return ret;
924} 934}
925EXPORT_SYMBOL(generic_file_fsync); 935EXPORT_SYMBOL(generic_file_fsync);
@@ -956,7 +966,7 @@ EXPORT_SYMBOL(generic_check_addressable);
956/* 966/*
957 * No-op implementation of ->fsync for in-memory filesystems. 967 * No-op implementation of ->fsync for in-memory filesystems.
958 */ 968 */
959int noop_fsync(struct file *file, int datasync) 969int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
960{ 970{
961 return 0; 971 return 0;
962} 972}
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 1afae26cf236..b3ff3d894165 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -371,11 +371,9 @@ static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
371 page_cache_release(page); 371 page_cache_release(page);
372 372
373 inode = logfs_iget(dir->i_sb, ino); 373 inode = logfs_iget(dir->i_sb, ino);
374 if (IS_ERR(inode)) { 374 if (IS_ERR(inode))
375 printk(KERN_ERR"LogFS: Cannot read inode #%llx for dentry (%lx, %lx)n", 375 printk(KERN_ERR"LogFS: Cannot read inode #%llx for dentry (%lx, %lx)n",
376 ino, dir->i_ino, index); 376 ino, dir->i_ino, index);
377 return ERR_CAST(inode);
378 }
379 return d_splice_alias(inode, dentry); 377 return d_splice_alias(inode, dentry);
380} 378}
381 379
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index c2ad7028def4..b548c87a86f1 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -219,11 +219,20 @@ long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
219 } 219 }
220} 220}
221 221
222int logfs_fsync(struct file *file, int datasync) 222int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
223{ 223{
224 struct super_block *sb = file->f_mapping->host->i_sb; 224 struct super_block *sb = file->f_mapping->host->i_sb;
225 struct inode *inode = file->f_mapping->host;
226 int ret;
227
228 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
229 if (ret)
230 return ret;
225 231
232 mutex_lock(&inode->i_mutex);
226 logfs_write_anchor(sb); 233 logfs_write_anchor(sb);
234 mutex_unlock(&inode->i_mutex);
235
227 return 0; 236 return 0;
228} 237}
229 238
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 57afd4a6fabb..f22d108bfa5d 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -506,7 +506,7 @@ extern const struct file_operations logfs_reg_fops;
506extern const struct address_space_operations logfs_reg_aops; 506extern const struct address_space_operations logfs_reg_aops;
507int logfs_readpage(struct file *file, struct page *page); 507int logfs_readpage(struct file *file, struct page *page);
508long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 508long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
509int logfs_fsync(struct file *file, int datasync); 509int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
510 510
511/* gc.c */ 511/* gc.c */
512u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec); 512u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec);
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index adcdc0a4e182..e7d23e25bf1d 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -596,8 +596,7 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
596 596
597int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) 597int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
598{ 598{
599 struct inode *dir = dentry->d_parent->d_inode; 599 struct super_block *sb = dentry->d_sb;
600 struct super_block *sb = dir->i_sb;
601 generic_fillattr(dentry->d_inode, stat); 600 generic_fillattr(dentry->d_inode, stat);
602 if (INODE_VERSION(dentry->d_inode) == MINIX_V1) 601 if (INODE_VERSION(dentry->d_inode) == MINIX_V1)
603 stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb); 602 stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
diff --git a/fs/namei.c b/fs/namei.c
index 14ab8d3f2f0c..b7fad009bbf6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -176,12 +176,12 @@ EXPORT_SYMBOL(putname);
176/* 176/*
177 * This does basic POSIX ACL permission checking 177 * This does basic POSIX ACL permission checking
178 */ 178 */
179static int acl_permission_check(struct inode *inode, int mask, unsigned int flags, 179static int acl_permission_check(struct inode *inode, int mask)
180 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
181{ 180{
181 int (*check_acl)(struct inode *inode, int mask);
182 unsigned int mode = inode->i_mode; 182 unsigned int mode = inode->i_mode;
183 183
184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 184 mask &= MAY_READ | MAY_WRITE | MAY_EXEC | MAY_NOT_BLOCK;
185 185
186 if (current_user_ns() != inode_userns(inode)) 186 if (current_user_ns() != inode_userns(inode))
187 goto other_perms; 187 goto other_perms;
@@ -189,8 +189,9 @@ static int acl_permission_check(struct inode *inode, int mask, unsigned int flag
189 if (current_fsuid() == inode->i_uid) 189 if (current_fsuid() == inode->i_uid)
190 mode >>= 6; 190 mode >>= 6;
191 else { 191 else {
192 check_acl = inode->i_op->check_acl;
192 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { 193 if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) {
193 int error = check_acl(inode, mask, flags); 194 int error = check_acl(inode, mask);
194 if (error != -EAGAIN) 195 if (error != -EAGAIN)
195 return error; 196 return error;
196 } 197 }
@@ -203,7 +204,7 @@ other_perms:
203 /* 204 /*
204 * If the DACs are ok we don't need any capability check. 205 * If the DACs are ok we don't need any capability check.
205 */ 206 */
206 if ((mask & ~mode) == 0) 207 if ((mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
207 return 0; 208 return 0;
208 return -EACCES; 209 return -EACCES;
209} 210}
@@ -212,8 +213,6 @@ other_perms:
212 * generic_permission - check for access rights on a Posix-like filesystem 213 * generic_permission - check for access rights on a Posix-like filesystem
213 * @inode: inode to check access rights for 214 * @inode: inode to check access rights for
214 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) 215 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
215 * @check_acl: optional callback to check for Posix ACLs
216 * @flags: IPERM_FLAG_ flags.
217 * 216 *
218 * Used to check for read/write/execute permissions on a file. 217 * Used to check for read/write/execute permissions on a file.
219 * We use "fsuid" for this, letting us set arbitrary permissions 218 * We use "fsuid" for this, letting us set arbitrary permissions
@@ -224,24 +223,32 @@ other_perms:
224 * request cannot be satisfied (eg. requires blocking or too much complexity). 223 * request cannot be satisfied (eg. requires blocking or too much complexity).
225 * It would then be called again in ref-walk mode. 224 * It would then be called again in ref-walk mode.
226 */ 225 */
227int generic_permission(struct inode *inode, int mask, unsigned int flags, 226int generic_permission(struct inode *inode, int mask)
228 int (*check_acl)(struct inode *inode, int mask, unsigned int flags))
229{ 227{
230 int ret; 228 int ret;
231 229
232 /* 230 /*
233 * Do the basic POSIX ACL permission checks. 231 * Do the basic POSIX ACL permission checks.
234 */ 232 */
235 ret = acl_permission_check(inode, mask, flags, check_acl); 233 ret = acl_permission_check(inode, mask);
236 if (ret != -EACCES) 234 if (ret != -EACCES)
237 return ret; 235 return ret;
238 236
237 if (S_ISDIR(inode->i_mode)) {
238 /* DACs are overridable for directories */
239 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
240 return 0;
241 if (!(mask & MAY_WRITE))
242 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
243 return 0;
244 return -EACCES;
245 }
239 /* 246 /*
240 * Read/write DACs are always overridable. 247 * Read/write DACs are always overridable.
241 * Executable DACs are overridable for all directories and 248 * Executable DACs are overridable when there is
242 * for non-directories that have least one exec bit set. 249 * at least one exec bit set.
243 */ 250 */
244 if (!(mask & MAY_EXEC) || execute_ok(inode)) 251 if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
245 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE)) 252 if (ns_capable(inode_userns(inode), CAP_DAC_OVERRIDE))
246 return 0; 253 return 0;
247 254
@@ -249,7 +256,7 @@ int generic_permission(struct inode *inode, int mask, unsigned int flags,
249 * Searching includes executable on directories, else just read. 256 * Searching includes executable on directories, else just read.
250 */ 257 */
251 mask &= MAY_READ | MAY_WRITE | MAY_EXEC; 258 mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
252 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE))) 259 if (mask == MAY_READ)
253 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH)) 260 if (ns_capable(inode_userns(inode), CAP_DAC_READ_SEARCH))
254 return 0; 261 return 0;
255 262
@@ -288,10 +295,9 @@ int inode_permission(struct inode *inode, int mask)
288 } 295 }
289 296
290 if (inode->i_op->permission) 297 if (inode->i_op->permission)
291 retval = inode->i_op->permission(inode, mask, 0); 298 retval = inode->i_op->permission(inode, mask);
292 else 299 else
293 retval = generic_permission(inode, mask, 0, 300 retval = generic_permission(inode, mask);
294 inode->i_op->check_acl);
295 301
296 if (retval) 302 if (retval)
297 return retval; 303 return retval;
@@ -304,69 +310,6 @@ int inode_permission(struct inode *inode, int mask)
304} 310}
305 311
306/** 312/**
307 * file_permission - check for additional access rights to a given file
308 * @file: file to check access rights for
309 * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
310 *
311 * Used to check for read/write/execute permissions on an already opened
312 * file.
313 *
314 * Note:
315 * Do not use this function in new code. All access checks should
316 * be done using inode_permission().
317 */
318int file_permission(struct file *file, int mask)
319{
320 return inode_permission(file->f_path.dentry->d_inode, mask);
321}
322
323/*
324 * get_write_access() gets write permission for a file.
325 * put_write_access() releases this write permission.
326 * This is used for regular files.
327 * We cannot support write (and maybe mmap read-write shared) accesses and
328 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
329 * can have the following values:
330 * 0: no writers, no VM_DENYWRITE mappings
331 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
332 * > 0: (i_writecount) users are writing to the file.
333 *
334 * Normally we operate on that counter with atomic_{inc,dec} and it's safe
335 * except for the cases where we don't hold i_writecount yet. Then we need to
336 * use {get,deny}_write_access() - these functions check the sign and refuse
337 * to do the change if sign is wrong. Exclusion between them is provided by
338 * the inode->i_lock spinlock.
339 */
340
341int get_write_access(struct inode * inode)
342{
343 spin_lock(&inode->i_lock);
344 if (atomic_read(&inode->i_writecount) < 0) {
345 spin_unlock(&inode->i_lock);
346 return -ETXTBSY;
347 }
348 atomic_inc(&inode->i_writecount);
349 spin_unlock(&inode->i_lock);
350
351 return 0;
352}
353
354int deny_write_access(struct file * file)
355{
356 struct inode *inode = file->f_path.dentry->d_inode;
357
358 spin_lock(&inode->i_lock);
359 if (atomic_read(&inode->i_writecount) > 0) {
360 spin_unlock(&inode->i_lock);
361 return -ETXTBSY;
362 }
363 atomic_dec(&inode->i_writecount);
364 spin_unlock(&inode->i_lock);
365
366 return 0;
367}
368
369/**
370 * path_get - get a reference to a path 313 * path_get - get a reference to a path
371 * @path: path to get the reference to 314 * @path: path to get the reference to
372 * 315 *
@@ -492,28 +435,6 @@ static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
492 return dentry->d_op->d_revalidate(dentry, nd); 435 return dentry->d_op->d_revalidate(dentry, nd);
493} 436}
494 437
495static struct dentry *
496do_revalidate(struct dentry *dentry, struct nameidata *nd)
497{
498 int status = d_revalidate(dentry, nd);
499 if (unlikely(status <= 0)) {
500 /*
501 * The dentry failed validation.
502 * If d_revalidate returned 0 attempt to invalidate
503 * the dentry otherwise d_revalidate is asking us
504 * to return a fail status.
505 */
506 if (status < 0) {
507 dput(dentry);
508 dentry = ERR_PTR(status);
509 } else if (!d_invalidate(dentry)) {
510 dput(dentry);
511 dentry = NULL;
512 }
513 }
514 return dentry;
515}
516
517/** 438/**
518 * complete_walk - successful completion of path walk 439 * complete_walk - successful completion of path walk
519 * @nd: pointer nameidata 440 * @nd: pointer nameidata
@@ -568,40 +489,6 @@ static int complete_walk(struct nameidata *nd)
568 return status; 489 return status;
569} 490}
570 491
571/*
572 * Short-cut version of permission(), for calling on directories
573 * during pathname resolution. Combines parts of permission()
574 * and generic_permission(), and tests ONLY for MAY_EXEC permission.
575 *
576 * If appropriate, check DAC only. If not appropriate, or
577 * short-cut DAC fails, then call ->permission() to do more
578 * complete permission check.
579 */
580static inline int exec_permission(struct inode *inode, unsigned int flags)
581{
582 int ret;
583 struct user_namespace *ns = inode_userns(inode);
584
585 if (inode->i_op->permission) {
586 ret = inode->i_op->permission(inode, MAY_EXEC, flags);
587 } else {
588 ret = acl_permission_check(inode, MAY_EXEC, flags,
589 inode->i_op->check_acl);
590 }
591 if (likely(!ret))
592 goto ok;
593 if (ret == -ECHILD)
594 return ret;
595
596 if (ns_capable(ns, CAP_DAC_OVERRIDE) ||
597 ns_capable(ns, CAP_DAC_READ_SEARCH))
598 goto ok;
599
600 return ret;
601ok:
602 return security_inode_exec_permission(inode, flags);
603}
604
605static __always_inline void set_root(struct nameidata *nd) 492static __always_inline void set_root(struct nameidata *nd)
606{ 493{
607 if (!nd->root.mnt) 494 if (!nd->root.mnt)
@@ -776,7 +663,7 @@ static int follow_automount(struct path *path, unsigned flags,
776 /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT 663 /* We don't want to mount if someone supplied AT_NO_AUTOMOUNT
777 * and this is the terminal part of the path. 664 * and this is the terminal part of the path.
778 */ 665 */
779 if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_CONTINUE)) 666 if ((flags & LOOKUP_NO_AUTOMOUNT) && !(flags & LOOKUP_PARENT))
780 return -EISDIR; /* we actually want to stop here */ 667 return -EISDIR; /* we actually want to stop here */
781 668
782 /* We want to mount if someone is trying to open/create a file of any 669 /* We want to mount if someone is trying to open/create a file of any
@@ -788,7 +675,7 @@ static int follow_automount(struct path *path, unsigned flags,
788 * appended a '/' to the name. 675 * appended a '/' to the name.
789 */ 676 */
790 if (!(flags & LOOKUP_FOLLOW) && 677 if (!(flags & LOOKUP_FOLLOW) &&
791 !(flags & (LOOKUP_CONTINUE | LOOKUP_DIRECTORY | 678 !(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY |
792 LOOKUP_OPEN | LOOKUP_CREATE))) 679 LOOKUP_OPEN | LOOKUP_CREATE)))
793 return -EISDIR; 680 return -EISDIR;
794 681
@@ -807,7 +694,7 @@ static int follow_automount(struct path *path, unsigned flags,
807 * the path being looked up; if it wasn't then the remainder of 694 * the path being looked up; if it wasn't then the remainder of
808 * the path is inaccessible and we should say so. 695 * the path is inaccessible and we should say so.
809 */ 696 */
810 if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_CONTINUE)) 697 if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_PARENT))
811 return -EREMOTE; 698 return -EREMOTE;
812 return PTR_ERR(mnt); 699 return PTR_ERR(mnt);
813 } 700 }
@@ -1134,6 +1021,30 @@ static struct dentry *d_alloc_and_lookup(struct dentry *parent,
1134} 1021}
1135 1022
1136/* 1023/*
1024 * We already have a dentry, but require a lookup to be performed on the parent
1025 * directory to fill in d_inode. Returns the new dentry, or ERR_PTR on error.
1026 * parent->d_inode->i_mutex must be held. d_lookup must have verified that no
1027 * child exists while under i_mutex.
1028 */
1029static struct dentry *d_inode_lookup(struct dentry *parent, struct dentry *dentry,
1030 struct nameidata *nd)
1031{
1032 struct inode *inode = parent->d_inode;
1033 struct dentry *old;
1034
1035 /* Don't create child dentry for a dead directory. */
1036 if (unlikely(IS_DEADDIR(inode)))
1037 return ERR_PTR(-ENOENT);
1038
1039 old = inode->i_op->lookup(inode, dentry, nd);
1040 if (unlikely(old)) {
1041 dput(dentry);
1042 dentry = old;
1043 }
1044 return dentry;
1045}
1046
1047/*
1137 * It's more convoluted than I'd like it to be, but... it's still fairly 1048 * It's more convoluted than I'd like it to be, but... it's still fairly
1138 * small and for now I'd prefer to have fast path as straight as possible. 1049 * small and for now I'd prefer to have fast path as straight as possible.
1139 * It _is_ time-critical. 1050 * It _is_ time-critical.
@@ -1172,6 +1083,8 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
1172 goto unlazy; 1083 goto unlazy;
1173 } 1084 }
1174 } 1085 }
1086 if (unlikely(d_need_lookup(dentry)))
1087 goto unlazy;
1175 path->mnt = mnt; 1088 path->mnt = mnt;
1176 path->dentry = dentry; 1089 path->dentry = dentry;
1177 if (unlikely(!__follow_mount_rcu(nd, path, inode))) 1090 if (unlikely(!__follow_mount_rcu(nd, path, inode)))
@@ -1186,6 +1099,10 @@ unlazy:
1186 dentry = __d_lookup(parent, name); 1099 dentry = __d_lookup(parent, name);
1187 } 1100 }
1188 1101
1102 if (dentry && unlikely(d_need_lookup(dentry))) {
1103 dput(dentry);
1104 dentry = NULL;
1105 }
1189retry: 1106retry:
1190 if (unlikely(!dentry)) { 1107 if (unlikely(!dentry)) {
1191 struct inode *dir = parent->d_inode; 1108 struct inode *dir = parent->d_inode;
@@ -1202,6 +1119,15 @@ retry:
1202 /* known good */ 1119 /* known good */
1203 need_reval = 0; 1120 need_reval = 0;
1204 status = 1; 1121 status = 1;
1122 } else if (unlikely(d_need_lookup(dentry))) {
1123 dentry = d_inode_lookup(parent, dentry, nd);
1124 if (IS_ERR(dentry)) {
1125 mutex_unlock(&dir->i_mutex);
1126 return PTR_ERR(dentry);
1127 }
1128 /* known good */
1129 need_reval = 0;
1130 status = 1;
1205 } 1131 }
1206 mutex_unlock(&dir->i_mutex); 1132 mutex_unlock(&dir->i_mutex);
1207 } 1133 }
@@ -1234,13 +1160,13 @@ retry:
1234static inline int may_lookup(struct nameidata *nd) 1160static inline int may_lookup(struct nameidata *nd)
1235{ 1161{
1236 if (nd->flags & LOOKUP_RCU) { 1162 if (nd->flags & LOOKUP_RCU) {
1237 int err = exec_permission(nd->inode, IPERM_FLAG_RCU); 1163 int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
1238 if (err != -ECHILD) 1164 if (err != -ECHILD)
1239 return err; 1165 return err;
1240 if (unlazy_walk(nd, NULL)) 1166 if (unlazy_walk(nd, NULL))
1241 return -ECHILD; 1167 return -ECHILD;
1242 } 1168 }
1243 return exec_permission(nd->inode, 0); 1169 return inode_permission(nd->inode, MAY_EXEC);
1244} 1170}
1245 1171
1246static inline int handle_dots(struct nameidata *nd, int type) 1172static inline int handle_dots(struct nameidata *nd, int type)
@@ -1354,7 +1280,6 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1354{ 1280{
1355 struct path next; 1281 struct path next;
1356 int err; 1282 int err;
1357 unsigned int lookup_flags = nd->flags;
1358 1283
1359 while (*name=='/') 1284 while (*name=='/')
1360 name++; 1285 name++;
@@ -1368,8 +1293,6 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1368 unsigned int c; 1293 unsigned int c;
1369 int type; 1294 int type;
1370 1295
1371 nd->flags |= LOOKUP_CONTINUE;
1372
1373 err = may_lookup(nd); 1296 err = may_lookup(nd);
1374 if (err) 1297 if (err)
1375 break; 1298 break;
@@ -1431,8 +1354,6 @@ static int link_path_walk(const char *name, struct nameidata *nd)
1431 /* here ends the main loop */ 1354 /* here ends the main loop */
1432 1355
1433last_component: 1356last_component:
1434 /* Clear LOOKUP_CONTINUE iff it was previously unset */
1435 nd->flags &= lookup_flags | ~LOOKUP_CONTINUE;
1436 nd->last = this; 1357 nd->last = this;
1437 nd->last_type = type; 1358 nd->last_type = type;
1438 return 0; 1359 return 0;
@@ -1515,7 +1436,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
1515 if (!S_ISDIR(dentry->d_inode->i_mode)) 1436 if (!S_ISDIR(dentry->d_inode->i_mode))
1516 goto fput_fail; 1437 goto fput_fail;
1517 1438
1518 retval = file_permission(file, MAY_EXEC); 1439 retval = inode_permission(dentry->d_inode, MAY_EXEC);
1519 if (retval) 1440 if (retval)
1520 goto fput_fail; 1441 goto fput_fail;
1521 } 1442 }
@@ -1653,16 +1574,22 @@ int kern_path(const char *name, unsigned int flags, struct path *path)
1653 * @mnt: pointer to vfs mount of the base directory 1574 * @mnt: pointer to vfs mount of the base directory
1654 * @name: pointer to file name 1575 * @name: pointer to file name
1655 * @flags: lookup flags 1576 * @flags: lookup flags
1656 * @nd: pointer to nameidata 1577 * @path: pointer to struct path to fill
1657 */ 1578 */
1658int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, 1579int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
1659 const char *name, unsigned int flags, 1580 const char *name, unsigned int flags,
1660 struct nameidata *nd) 1581 struct path *path)
1661{ 1582{
1662 nd->root.dentry = dentry; 1583 struct nameidata nd;
1663 nd->root.mnt = mnt; 1584 int err;
1585 nd.root.dentry = dentry;
1586 nd.root.mnt = mnt;
1587 BUG_ON(flags & LOOKUP_PARENT);
1664 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */ 1588 /* the first argument of do_path_lookup() is ignored with LOOKUP_ROOT */
1665 return do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, nd); 1589 err = do_path_lookup(AT_FDCWD, name, flags | LOOKUP_ROOT, &nd);
1590 if (!err)
1591 *path = nd.path;
1592 return err;
1666} 1593}
1667 1594
1668static struct dentry *__lookup_hash(struct qstr *name, 1595static struct dentry *__lookup_hash(struct qstr *name,
@@ -1672,7 +1599,7 @@ static struct dentry *__lookup_hash(struct qstr *name,
1672 struct dentry *dentry; 1599 struct dentry *dentry;
1673 int err; 1600 int err;
1674 1601
1675 err = exec_permission(inode, 0); 1602 err = inode_permission(inode, MAY_EXEC);
1676 if (err) 1603 if (err)
1677 return ERR_PTR(err); 1604 return ERR_PTR(err);
1678 1605
@@ -1683,8 +1610,34 @@ static struct dentry *__lookup_hash(struct qstr *name,
1683 */ 1610 */
1684 dentry = d_lookup(base, name); 1611 dentry = d_lookup(base, name);
1685 1612
1686 if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) 1613 if (dentry && d_need_lookup(dentry)) {
1687 dentry = do_revalidate(dentry, nd); 1614 /*
1615 * __lookup_hash is called with the parent dir's i_mutex already
1616 * held, so we are good to go here.
1617 */
1618 dentry = d_inode_lookup(base, dentry, nd);
1619 if (IS_ERR(dentry))
1620 return dentry;
1621 }
1622
1623 if (dentry && (dentry->d_flags & DCACHE_OP_REVALIDATE)) {
1624 int status = d_revalidate(dentry, nd);
1625 if (unlikely(status <= 0)) {
1626 /*
1627 * The dentry failed validation.
1628 * If d_revalidate returned 0 attempt to invalidate
1629 * the dentry otherwise d_revalidate is asking us
1630 * to return a fail status.
1631 */
1632 if (status < 0) {
1633 dput(dentry);
1634 return ERR_PTR(status);
1635 } else if (!d_invalidate(dentry)) {
1636 dput(dentry);
1637 dentry = NULL;
1638 }
1639 }
1640 }
1688 1641
1689 if (!dentry) 1642 if (!dentry)
1690 dentry = d_alloc_and_lookup(base, name, nd); 1643 dentry = d_alloc_and_lookup(base, name, nd);
@@ -2012,27 +1965,10 @@ static int handle_truncate(struct file *filp)
2012 return error; 1965 return error;
2013} 1966}
2014 1967
2015/*
2016 * Note that while the flag value (low two bits) for sys_open means:
2017 * 00 - read-only
2018 * 01 - write-only
2019 * 10 - read-write
2020 * 11 - special
2021 * it is changed into
2022 * 00 - no permissions needed
2023 * 01 - read-permission
2024 * 10 - write-permission
2025 * 11 - read-write
2026 * for the internal routines (ie open_namei()/follow_link() etc)
2027 * This is more logical, and also allows the 00 "no perm needed"
2028 * to be used for symlinks (where the permissions are checked
2029 * later).
2030 *
2031*/
2032static inline int open_to_namei_flags(int flag) 1968static inline int open_to_namei_flags(int flag)
2033{ 1969{
2034 if ((flag+1) & O_ACCMODE) 1970 if ((flag & O_ACCMODE) == 3)
2035 flag++; 1971 flag--;
2036 return flag; 1972 return flag;
2037} 1973}
2038 1974
@@ -2327,35 +2263,29 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
2327 return file; 2263 return file;
2328} 2264}
2329 2265
2330/** 2266struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, int is_dir)
2331 * lookup_create - lookup a dentry, creating it if it doesn't exist
2332 * @nd: nameidata info
2333 * @is_dir: directory flag
2334 *
2335 * Simple function to lookup and return a dentry and create it
2336 * if it doesn't exist. Is SMP-safe.
2337 *
2338 * Returns with nd->path.dentry->d_inode->i_mutex locked.
2339 */
2340struct dentry *lookup_create(struct nameidata *nd, int is_dir)
2341{ 2267{
2342 struct dentry *dentry = ERR_PTR(-EEXIST); 2268 struct dentry *dentry = ERR_PTR(-EEXIST);
2269 struct nameidata nd;
2270 int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
2271 if (error)
2272 return ERR_PTR(error);
2343 2273
2344 mutex_lock_nested(&nd->path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2345 /* 2274 /*
2346 * Yucky last component or no last component at all? 2275 * Yucky last component or no last component at all?
2347 * (foo/., foo/.., /////) 2276 * (foo/., foo/.., /////)
2348 */ 2277 */
2349 if (nd->last_type != LAST_NORM) 2278 if (nd.last_type != LAST_NORM)
2350 goto fail; 2279 goto out;
2351 nd->flags &= ~LOOKUP_PARENT; 2280 nd.flags &= ~LOOKUP_PARENT;
2352 nd->flags |= LOOKUP_CREATE | LOOKUP_EXCL; 2281 nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
2353 nd->intent.open.flags = O_EXCL; 2282 nd.intent.open.flags = O_EXCL;
2354 2283
2355 /* 2284 /*
2356 * Do the final lookup. 2285 * Do the final lookup.
2357 */ 2286 */
2358 dentry = lookup_hash(nd); 2287 mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
2288 dentry = lookup_hash(&nd);
2359 if (IS_ERR(dentry)) 2289 if (IS_ERR(dentry))
2360 goto fail; 2290 goto fail;
2361 2291
@@ -2367,18 +2297,35 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir)
2367 * all is fine. Let's be bastards - you had / on the end, you've 2297 * all is fine. Let's be bastards - you had / on the end, you've
2368 * been asking for (non-existent) directory. -ENOENT for you. 2298 * been asking for (non-existent) directory. -ENOENT for you.
2369 */ 2299 */
2370 if (unlikely(!is_dir && nd->last.name[nd->last.len])) { 2300 if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
2371 dput(dentry); 2301 dput(dentry);
2372 dentry = ERR_PTR(-ENOENT); 2302 dentry = ERR_PTR(-ENOENT);
2303 goto fail;
2373 } 2304 }
2305 *path = nd.path;
2374 return dentry; 2306 return dentry;
2375eexist: 2307eexist:
2376 dput(dentry); 2308 dput(dentry);
2377 dentry = ERR_PTR(-EEXIST); 2309 dentry = ERR_PTR(-EEXIST);
2378fail: 2310fail:
2311 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
2312out:
2313 path_put(&nd.path);
2379 return dentry; 2314 return dentry;
2380} 2315}
2381EXPORT_SYMBOL_GPL(lookup_create); 2316EXPORT_SYMBOL(kern_path_create);
2317
2318struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
2319{
2320 char *tmp = getname(pathname);
2321 struct dentry *res;
2322 if (IS_ERR(tmp))
2323 return ERR_CAST(tmp);
2324 res = kern_path_create(dfd, tmp, path, is_dir);
2325 putname(tmp);
2326 return res;
2327}
2328EXPORT_SYMBOL(user_path_create);
2382 2329
2383int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) 2330int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
2384{ 2331{
@@ -2428,54 +2375,46 @@ static int may_mknod(mode_t mode)
2428SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode, 2375SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, int, mode,
2429 unsigned, dev) 2376 unsigned, dev)
2430{ 2377{
2431 int error;
2432 char *tmp;
2433 struct dentry *dentry; 2378 struct dentry *dentry;
2434 struct nameidata nd; 2379 struct path path;
2380 int error;
2435 2381
2436 if (S_ISDIR(mode)) 2382 if (S_ISDIR(mode))
2437 return -EPERM; 2383 return -EPERM;
2438 2384
2439 error = user_path_parent(dfd, filename, &nd, &tmp); 2385 dentry = user_path_create(dfd, filename, &path, 0);
2440 if (error) 2386 if (IS_ERR(dentry))
2441 return error; 2387 return PTR_ERR(dentry);
2442 2388
2443 dentry = lookup_create(&nd, 0); 2389 if (!IS_POSIXACL(path.dentry->d_inode))
2444 if (IS_ERR(dentry)) {
2445 error = PTR_ERR(dentry);
2446 goto out_unlock;
2447 }
2448 if (!IS_POSIXACL(nd.path.dentry->d_inode))
2449 mode &= ~current_umask(); 2390 mode &= ~current_umask();
2450 error = may_mknod(mode); 2391 error = may_mknod(mode);
2451 if (error) 2392 if (error)
2452 goto out_dput; 2393 goto out_dput;
2453 error = mnt_want_write(nd.path.mnt); 2394 error = mnt_want_write(path.mnt);
2454 if (error) 2395 if (error)
2455 goto out_dput; 2396 goto out_dput;
2456 error = security_path_mknod(&nd.path, dentry, mode, dev); 2397 error = security_path_mknod(&path, dentry, mode, dev);
2457 if (error) 2398 if (error)
2458 goto out_drop_write; 2399 goto out_drop_write;
2459 switch (mode & S_IFMT) { 2400 switch (mode & S_IFMT) {
2460 case 0: case S_IFREG: 2401 case 0: case S_IFREG:
2461 error = vfs_create(nd.path.dentry->d_inode,dentry,mode,&nd); 2402 error = vfs_create(path.dentry->d_inode,dentry,mode,NULL);
2462 break; 2403 break;
2463 case S_IFCHR: case S_IFBLK: 2404 case S_IFCHR: case S_IFBLK:
2464 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode, 2405 error = vfs_mknod(path.dentry->d_inode,dentry,mode,
2465 new_decode_dev(dev)); 2406 new_decode_dev(dev));
2466 break; 2407 break;
2467 case S_IFIFO: case S_IFSOCK: 2408 case S_IFIFO: case S_IFSOCK:
2468 error = vfs_mknod(nd.path.dentry->d_inode,dentry,mode,0); 2409 error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
2469 break; 2410 break;
2470 } 2411 }
2471out_drop_write: 2412out_drop_write:
2472 mnt_drop_write(nd.path.mnt); 2413 mnt_drop_write(path.mnt);
2473out_dput: 2414out_dput:
2474 dput(dentry); 2415 dput(dentry);
2475out_unlock: 2416 mutex_unlock(&path.dentry->d_inode->i_mutex);
2476 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2417 path_put(&path);
2477 path_put(&nd.path);
2478 putname(tmp);
2479 2418
2480 return error; 2419 return error;
2481} 2420}
@@ -2508,38 +2447,29 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2508 2447
2509SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode) 2448SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, int, mode)
2510{ 2449{
2511 int error = 0;
2512 char * tmp;
2513 struct dentry *dentry; 2450 struct dentry *dentry;
2514 struct nameidata nd; 2451 struct path path;
2515 2452 int error;
2516 error = user_path_parent(dfd, pathname, &nd, &tmp);
2517 if (error)
2518 goto out_err;
2519 2453
2520 dentry = lookup_create(&nd, 1); 2454 dentry = user_path_create(dfd, pathname, &path, 1);
2521 error = PTR_ERR(dentry);
2522 if (IS_ERR(dentry)) 2455 if (IS_ERR(dentry))
2523 goto out_unlock; 2456 return PTR_ERR(dentry);
2524 2457
2525 if (!IS_POSIXACL(nd.path.dentry->d_inode)) 2458 if (!IS_POSIXACL(path.dentry->d_inode))
2526 mode &= ~current_umask(); 2459 mode &= ~current_umask();
2527 error = mnt_want_write(nd.path.mnt); 2460 error = mnt_want_write(path.mnt);
2528 if (error) 2461 if (error)
2529 goto out_dput; 2462 goto out_dput;
2530 error = security_path_mkdir(&nd.path, dentry, mode); 2463 error = security_path_mkdir(&path, dentry, mode);
2531 if (error) 2464 if (error)
2532 goto out_drop_write; 2465 goto out_drop_write;
2533 error = vfs_mkdir(nd.path.dentry->d_inode, dentry, mode); 2466 error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
2534out_drop_write: 2467out_drop_write:
2535 mnt_drop_write(nd.path.mnt); 2468 mnt_drop_write(path.mnt);
2536out_dput: 2469out_dput:
2537 dput(dentry); 2470 dput(dentry);
2538out_unlock: 2471 mutex_unlock(&path.dentry->d_inode->i_mutex);
2539 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2472 path_put(&path);
2540 path_put(&nd.path);
2541 putname(tmp);
2542out_err:
2543 return error; 2473 return error;
2544} 2474}
2545 2475
@@ -2799,38 +2729,31 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
2799{ 2729{
2800 int error; 2730 int error;
2801 char *from; 2731 char *from;
2802 char *to;
2803 struct dentry *dentry; 2732 struct dentry *dentry;
2804 struct nameidata nd; 2733 struct path path;
2805 2734
2806 from = getname(oldname); 2735 from = getname(oldname);
2807 if (IS_ERR(from)) 2736 if (IS_ERR(from))
2808 return PTR_ERR(from); 2737 return PTR_ERR(from);
2809 2738
2810 error = user_path_parent(newdfd, newname, &nd, &to); 2739 dentry = user_path_create(newdfd, newname, &path, 0);
2811 if (error)
2812 goto out_putname;
2813
2814 dentry = lookup_create(&nd, 0);
2815 error = PTR_ERR(dentry); 2740 error = PTR_ERR(dentry);
2816 if (IS_ERR(dentry)) 2741 if (IS_ERR(dentry))
2817 goto out_unlock; 2742 goto out_putname;
2818 2743
2819 error = mnt_want_write(nd.path.mnt); 2744 error = mnt_want_write(path.mnt);
2820 if (error) 2745 if (error)
2821 goto out_dput; 2746 goto out_dput;
2822 error = security_path_symlink(&nd.path, dentry, from); 2747 error = security_path_symlink(&path, dentry, from);
2823 if (error) 2748 if (error)
2824 goto out_drop_write; 2749 goto out_drop_write;
2825 error = vfs_symlink(nd.path.dentry->d_inode, dentry, from); 2750 error = vfs_symlink(path.dentry->d_inode, dentry, from);
2826out_drop_write: 2751out_drop_write:
2827 mnt_drop_write(nd.path.mnt); 2752 mnt_drop_write(path.mnt);
2828out_dput: 2753out_dput:
2829 dput(dentry); 2754 dput(dentry);
2830out_unlock: 2755 mutex_unlock(&path.dentry->d_inode->i_mutex);
2831 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2756 path_put(&path);
2832 path_put(&nd.path);
2833 putname(to);
2834out_putname: 2757out_putname:
2835 putname(from); 2758 putname(from);
2836 return error; 2759 return error;
@@ -2895,11 +2818,9 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
2895 int, newdfd, const char __user *, newname, int, flags) 2818 int, newdfd, const char __user *, newname, int, flags)
2896{ 2819{
2897 struct dentry *new_dentry; 2820 struct dentry *new_dentry;
2898 struct nameidata nd; 2821 struct path old_path, new_path;
2899 struct path old_path;
2900 int how = 0; 2822 int how = 0;
2901 int error; 2823 int error;
2902 char *to;
2903 2824
2904 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) 2825 if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
2905 return -EINVAL; 2826 return -EINVAL;
@@ -2921,32 +2842,27 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
2921 if (error) 2842 if (error)
2922 return error; 2843 return error;
2923 2844
2924 error = user_path_parent(newdfd, newname, &nd, &to); 2845 new_dentry = user_path_create(newdfd, newname, &new_path, 0);
2925 if (error)
2926 goto out;
2927 error = -EXDEV;
2928 if (old_path.mnt != nd.path.mnt)
2929 goto out_release;
2930 new_dentry = lookup_create(&nd, 0);
2931 error = PTR_ERR(new_dentry); 2846 error = PTR_ERR(new_dentry);
2932 if (IS_ERR(new_dentry)) 2847 if (IS_ERR(new_dentry))
2933 goto out_unlock; 2848 goto out;
2934 error = mnt_want_write(nd.path.mnt); 2849
2850 error = -EXDEV;
2851 if (old_path.mnt != new_path.mnt)
2852 goto out_dput;
2853 error = mnt_want_write(new_path.mnt);
2935 if (error) 2854 if (error)
2936 goto out_dput; 2855 goto out_dput;
2937 error = security_path_link(old_path.dentry, &nd.path, new_dentry); 2856 error = security_path_link(old_path.dentry, &new_path, new_dentry);
2938 if (error) 2857 if (error)
2939 goto out_drop_write; 2858 goto out_drop_write;
2940 error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry); 2859 error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
2941out_drop_write: 2860out_drop_write:
2942 mnt_drop_write(nd.path.mnt); 2861 mnt_drop_write(new_path.mnt);
2943out_dput: 2862out_dput:
2944 dput(new_dentry); 2863 dput(new_dentry);
2945out_unlock: 2864 mutex_unlock(&new_path.dentry->d_inode->i_mutex);
2946 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 2865 path_put(&new_path);
2947out_release:
2948 path_put(&nd.path);
2949 putname(to);
2950out: 2866out:
2951 path_put(&old_path); 2867 path_put(&old_path);
2952 2868
@@ -3352,11 +3268,9 @@ EXPORT_SYMBOL(page_readlink);
3352EXPORT_SYMBOL(__page_symlink); 3268EXPORT_SYMBOL(__page_symlink);
3353EXPORT_SYMBOL(page_symlink); 3269EXPORT_SYMBOL(page_symlink);
3354EXPORT_SYMBOL(page_symlink_inode_operations); 3270EXPORT_SYMBOL(page_symlink_inode_operations);
3355EXPORT_SYMBOL(kern_path_parent);
3356EXPORT_SYMBOL(kern_path); 3271EXPORT_SYMBOL(kern_path);
3357EXPORT_SYMBOL(vfs_path_lookup); 3272EXPORT_SYMBOL(vfs_path_lookup);
3358EXPORT_SYMBOL(inode_permission); 3273EXPORT_SYMBOL(inode_permission);
3359EXPORT_SYMBOL(file_permission);
3360EXPORT_SYMBOL(unlock_rename); 3274EXPORT_SYMBOL(unlock_rename);
3361EXPORT_SYMBOL(vfs_create); 3275EXPORT_SYMBOL(vfs_create);
3362EXPORT_SYMBOL(vfs_follow_link); 3276EXPORT_SYMBOL(vfs_follow_link);
diff --git a/fs/namespace.c b/fs/namespace.c
index fe59bd145d21..cda50fe9250a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -934,8 +934,8 @@ int mnt_had_events(struct proc_mounts *p)
934 int res = 0; 934 int res = 0;
935 935
936 br_read_lock(vfsmount_lock); 936 br_read_lock(vfsmount_lock);
937 if (p->event != ns->event) { 937 if (p->m.poll_event != ns->event) {
938 p->event = ns->event; 938 p->m.poll_event = ns->event;
939 res = 1; 939 res = 1;
940 } 940 }
941 br_read_unlock(vfsmount_lock); 941 br_read_unlock(vfsmount_lock);
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 0ed65e0c3dfe..64a326418aa2 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -20,9 +20,9 @@
20 20
21#include "ncp_fs.h" 21#include "ncp_fs.h"
22 22
23static int ncp_fsync(struct file *file, int datasync) 23static int ncp_fsync(struct file *file, loff_t start, loff_t end, int datasync)
24{ 24{
25 return 0; 25 return filemap_write_and_wait_range(file->f_mapping, start, end);
26} 26}
27 27
28/* 28/*
diff --git a/fs/nfs/cache_lib.c b/fs/nfs/cache_lib.c
index 84690319e625..c98b439332fc 100644
--- a/fs/nfs/cache_lib.c
+++ b/fs/nfs/cache_lib.c
@@ -113,19 +113,18 @@ int nfs_cache_wait_for_upcall(struct nfs_cache_defer_req *dreq)
113 113
114int nfs_cache_register(struct cache_detail *cd) 114int nfs_cache_register(struct cache_detail *cd)
115{ 115{
116 struct nameidata nd;
117 struct vfsmount *mnt; 116 struct vfsmount *mnt;
117 struct path path;
118 int ret; 118 int ret;
119 119
120 mnt = rpc_get_mount(); 120 mnt = rpc_get_mount();
121 if (IS_ERR(mnt)) 121 if (IS_ERR(mnt))
122 return PTR_ERR(mnt); 122 return PTR_ERR(mnt);
123 ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &nd); 123 ret = vfs_path_lookup(mnt->mnt_root, mnt, "/cache", 0, &path);
124 if (ret) 124 if (ret)
125 goto err; 125 goto err;
126 ret = sunrpc_cache_register_pipefs(nd.path.dentry, 126 ret = sunrpc_cache_register_pipefs(path.dentry, cd->name, 0600, cd);
127 cd->name, 0600, cd); 127 path_put(&path);
128 path_put(&nd.path);
129 if (!ret) 128 if (!ret)
130 return ret; 129 return ret;
131err: 130err:
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ededdbd0db38..57f578e2560a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -56,7 +56,7 @@ static int nfs_link(struct dentry *, struct inode *, struct dentry *);
56static int nfs_mknod(struct inode *, struct dentry *, int, dev_t); 56static int nfs_mknod(struct inode *, struct dentry *, int, dev_t);
57static int nfs_rename(struct inode *, struct dentry *, 57static int nfs_rename(struct inode *, struct dentry *,
58 struct inode *, struct dentry *); 58 struct inode *, struct dentry *);
59static int nfs_fsync_dir(struct file *, int); 59static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
60static loff_t nfs_llseek_dir(struct file *, loff_t, int); 60static loff_t nfs_llseek_dir(struct file *, loff_t, int);
61static void nfs_readdir_clear_array(struct page*); 61static void nfs_readdir_clear_array(struct page*);
62 62
@@ -945,15 +945,19 @@ out:
945 * All directory operations under NFS are synchronous, so fsync() 945 * All directory operations under NFS are synchronous, so fsync()
946 * is a dummy operation. 946 * is a dummy operation.
947 */ 947 */
948static int nfs_fsync_dir(struct file *filp, int datasync) 948static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
949 int datasync)
949{ 950{
950 struct dentry *dentry = filp->f_path.dentry; 951 struct dentry *dentry = filp->f_path.dentry;
952 struct inode *inode = dentry->d_inode;
951 953
952 dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n", 954 dfprintk(FILE, "NFS: fsync dir(%s/%s) datasync %d\n",
953 dentry->d_parent->d_name.name, dentry->d_name.name, 955 dentry->d_parent->d_name.name, dentry->d_name.name,
954 datasync); 956 datasync);
955 957
958 mutex_lock(&inode->i_mutex);
956 nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC); 959 nfs_inc_stats(dentry->d_inode, NFSIOS_VFSFSYNC);
960 mutex_unlock(&inode->i_mutex);
957 return 0; 961 return 0;
958} 962}
959 963
@@ -997,14 +1001,12 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
997 * Return the intent data that applies to this particular path component 1001 * Return the intent data that applies to this particular path component
998 * 1002 *
999 * Note that the current set of intents only apply to the very last 1003 * Note that the current set of intents only apply to the very last
1000 * component of the path. 1004 * component of the path and none of them is set before that last
1001 * We check for this using LOOKUP_CONTINUE and LOOKUP_PARENT. 1005 * component.
1002 */ 1006 */
1003static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, 1007static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd,
1004 unsigned int mask) 1008 unsigned int mask)
1005{ 1009{
1006 if (nd->flags & (LOOKUP_CONTINUE|LOOKUP_PARENT))
1007 return 0;
1008 return nd->flags & mask; 1010 return nd->flags & mask;
1009} 1011}
1010 1012
@@ -1338,25 +1340,31 @@ static int is_atomic_open(struct nameidata *nd)
1338 return 0; 1340 return 0;
1339 /* Are we trying to write to a read only partition? */ 1341 /* Are we trying to write to a read only partition? */
1340 if (__mnt_is_readonly(nd->path.mnt) && 1342 if (__mnt_is_readonly(nd->path.mnt) &&
1341 (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) 1343 (nd->intent.open.flags & (O_CREAT|O_TRUNC|O_ACCMODE)))
1342 return 0; 1344 return 0;
1343 return 1; 1345 return 1;
1344} 1346}
1345 1347
1346static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd) 1348static fmode_t flags_to_mode(int flags)
1349{
1350 fmode_t res = (__force fmode_t)flags & FMODE_EXEC;
1351 if ((flags & O_ACCMODE) != O_WRONLY)
1352 res |= FMODE_READ;
1353 if ((flags & O_ACCMODE) != O_RDONLY)
1354 res |= FMODE_WRITE;
1355 return res;
1356}
1357
1358static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags)
1347{ 1359{
1348 struct path path = {
1349 .mnt = nd->path.mnt,
1350 .dentry = dentry,
1351 };
1352 struct nfs_open_context *ctx; 1360 struct nfs_open_context *ctx;
1353 struct rpc_cred *cred; 1361 struct rpc_cred *cred;
1354 fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); 1362 fmode_t fmode = flags_to_mode(open_flags);
1355 1363
1356 cred = rpc_lookup_cred(); 1364 cred = rpc_lookup_cred();
1357 if (IS_ERR(cred)) 1365 if (IS_ERR(cred))
1358 return ERR_CAST(cred); 1366 return ERR_CAST(cred);
1359 ctx = alloc_nfs_open_context(&path, cred, fmode); 1367 ctx = alloc_nfs_open_context(dentry, cred, fmode);
1360 put_rpccred(cred); 1368 put_rpccred(cred);
1361 if (ctx == NULL) 1369 if (ctx == NULL)
1362 return ERR_PTR(-ENOMEM); 1370 return ERR_PTR(-ENOMEM);
@@ -1376,13 +1384,13 @@ static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ct
1376 1384
1377 /* If the open_intent is for execute, we have an extra check to make */ 1385 /* If the open_intent is for execute, we have an extra check to make */
1378 if (ctx->mode & FMODE_EXEC) { 1386 if (ctx->mode & FMODE_EXEC) {
1379 ret = nfs_may_open(ctx->path.dentry->d_inode, 1387 ret = nfs_may_open(ctx->dentry->d_inode,
1380 ctx->cred, 1388 ctx->cred,
1381 nd->intent.open.flags); 1389 nd->intent.open.flags);
1382 if (ret < 0) 1390 if (ret < 0)
1383 goto out; 1391 goto out;
1384 } 1392 }
1385 filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open); 1393 filp = lookup_instantiate_filp(nd, ctx->dentry, do_open);
1386 if (IS_ERR(filp)) 1394 if (IS_ERR(filp))
1387 ret = PTR_ERR(filp); 1395 ret = PTR_ERR(filp);
1388 else 1396 else
@@ -1420,12 +1428,13 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1420 goto out; 1428 goto out;
1421 } 1429 }
1422 1430
1423 ctx = nameidata_to_nfs_open_context(dentry, nd); 1431 open_flags = nd->intent.open.flags;
1432
1433 ctx = create_nfs_open_context(dentry, open_flags);
1424 res = ERR_CAST(ctx); 1434 res = ERR_CAST(ctx);
1425 if (IS_ERR(ctx)) 1435 if (IS_ERR(ctx))
1426 goto out; 1436 goto out;
1427 1437
1428 open_flags = nd->intent.open.flags;
1429 if (nd->flags & LOOKUP_CREATE) { 1438 if (nd->flags & LOOKUP_CREATE) {
1430 attr.ia_mode = nd->intent.open.create_mode; 1439 attr.ia_mode = nd->intent.open.create_mode;
1431 attr.ia_valid = ATTR_MODE; 1440 attr.ia_valid = ATTR_MODE;
@@ -1463,8 +1472,8 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
1463 res = d_add_unique(dentry, inode); 1472 res = d_add_unique(dentry, inode);
1464 nfs_unblock_sillyrename(dentry->d_parent); 1473 nfs_unblock_sillyrename(dentry->d_parent);
1465 if (res != NULL) { 1474 if (res != NULL) {
1466 dput(ctx->path.dentry); 1475 dput(ctx->dentry);
1467 ctx->path.dentry = dget(res); 1476 ctx->dentry = dget(res);
1468 dentry = res; 1477 dentry = res;
1469 } 1478 }
1470 err = nfs_intent_set_file(nd, ctx); 1479 err = nfs_intent_set_file(nd, ctx);
@@ -1517,7 +1526,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
1517 /* We can't create new files, or truncate existing ones here */ 1526 /* We can't create new files, or truncate existing ones here */
1518 openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); 1527 openflags &= ~(O_CREAT|O_EXCL|O_TRUNC);
1519 1528
1520 ctx = nameidata_to_nfs_open_context(dentry, nd); 1529 ctx = create_nfs_open_context(dentry, openflags);
1521 ret = PTR_ERR(ctx); 1530 ret = PTR_ERR(ctx);
1522 if (IS_ERR(ctx)) 1531 if (IS_ERR(ctx))
1523 goto out; 1532 goto out;
@@ -1570,7 +1579,7 @@ static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
1570 struct nfs_open_context *ctx = NULL; 1579 struct nfs_open_context *ctx = NULL;
1571 struct iattr attr; 1580 struct iattr attr;
1572 int error; 1581 int error;
1573 int open_flags = 0; 1582 int open_flags = O_CREAT|O_EXCL;
1574 1583
1575 dfprintk(VFS, "NFS: create(%s/%ld), %s\n", 1584 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1576 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1585 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1578,27 +1587,27 @@ static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode,
1578 attr.ia_mode = mode; 1587 attr.ia_mode = mode;
1579 attr.ia_valid = ATTR_MODE; 1588 attr.ia_valid = ATTR_MODE;
1580 1589
1581 if ((nd->flags & LOOKUP_CREATE) != 0) { 1590 if (nd)
1582 open_flags = nd->intent.open.flags; 1591 open_flags = nd->intent.open.flags;
1583 1592
1584 ctx = nameidata_to_nfs_open_context(dentry, nd); 1593 ctx = create_nfs_open_context(dentry, open_flags);
1585 error = PTR_ERR(ctx); 1594 error = PTR_ERR(ctx);
1586 if (IS_ERR(ctx)) 1595 if (IS_ERR(ctx))
1587 goto out_err_drop; 1596 goto out_err_drop;
1588 }
1589 1597
1590 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); 1598 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx);
1591 if (error != 0) 1599 if (error != 0)
1592 goto out_put_ctx; 1600 goto out_put_ctx;
1593 if (ctx != NULL) { 1601 if (nd) {
1594 error = nfs_intent_set_file(nd, ctx); 1602 error = nfs_intent_set_file(nd, ctx);
1595 if (error < 0) 1603 if (error < 0)
1596 goto out_err; 1604 goto out_err;
1605 } else {
1606 put_nfs_open_context(ctx);
1597 } 1607 }
1598 return 0; 1608 return 0;
1599out_put_ctx: 1609out_put_ctx:
1600 if (ctx != NULL) 1610 put_nfs_open_context(ctx);
1601 put_nfs_open_context(ctx);
1602out_err_drop: 1611out_err_drop:
1603 d_drop(dentry); 1612 d_drop(dentry);
1604out_err: 1613out_err:
@@ -1660,7 +1669,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1660{ 1669{
1661 struct iattr attr; 1670 struct iattr attr;
1662 int error; 1671 int error;
1663 int open_flags = 0; 1672 int open_flags = O_CREAT|O_EXCL;
1664 1673
1665 dfprintk(VFS, "NFS: create(%s/%ld), %s\n", 1674 dfprintk(VFS, "NFS: create(%s/%ld), %s\n",
1666 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); 1675 dir->i_sb->s_id, dir->i_ino, dentry->d_name.name);
@@ -1668,7 +1677,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
1668 attr.ia_mode = mode; 1677 attr.ia_mode = mode;
1669 attr.ia_valid = ATTR_MODE; 1678 attr.ia_valid = ATTR_MODE;
1670 1679
1671 if ((nd->flags & LOOKUP_CREATE) != 0) 1680 if (nd)
1672 open_flags = nd->intent.open.flags; 1681 open_flags = nd->intent.open.flags;
1673 1682
1674 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL); 1683 error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL);
@@ -2259,11 +2268,11 @@ static int nfs_open_permission_mask(int openflags)
2259{ 2268{
2260 int mask = 0; 2269 int mask = 0;
2261 2270
2262 if (openflags & FMODE_READ) 2271 if ((openflags & O_ACCMODE) != O_WRONLY)
2263 mask |= MAY_READ; 2272 mask |= MAY_READ;
2264 if (openflags & FMODE_WRITE) 2273 if ((openflags & O_ACCMODE) != O_RDONLY)
2265 mask |= MAY_WRITE; 2274 mask |= MAY_WRITE;
2266 if (openflags & FMODE_EXEC) 2275 if (openflags & __FMODE_EXEC)
2267 mask |= MAY_EXEC; 2276 mask |= MAY_EXEC;
2268 return mask; 2277 return mask;
2269} 2278}
@@ -2273,12 +2282,12 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
2273 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); 2282 return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
2274} 2283}
2275 2284
2276int nfs_permission(struct inode *inode, int mask, unsigned int flags) 2285int nfs_permission(struct inode *inode, int mask)
2277{ 2286{
2278 struct rpc_cred *cred; 2287 struct rpc_cred *cred;
2279 int res = 0; 2288 int res = 0;
2280 2289
2281 if (flags & IPERM_FLAG_RCU) 2290 if (mask & MAY_NOT_BLOCK)
2282 return -ECHILD; 2291 return -ECHILD;
2283 2292
2284 nfs_inc_stats(inode, NFSIOS_VFSACCESS); 2293 nfs_inc_stats(inode, NFSIOS_VFSACCESS);
@@ -2328,7 +2337,7 @@ out:
2328out_notsup: 2337out_notsup:
2329 res = nfs_revalidate_inode(NFS_SERVER(inode), inode); 2338 res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
2330 if (res == 0) 2339 if (res == 0)
2331 res = generic_permission(inode, mask, flags, NULL); 2340 res = generic_permission(inode, mask);
2332 goto out; 2341 goto out;
2333} 2342}
2334 2343
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 8eea25366717..b35d25b98da6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -284,7 +284,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
284 loff_t pos) 284 loff_t pos)
285{ 285{
286 struct nfs_open_context *ctx = dreq->ctx; 286 struct nfs_open_context *ctx = dreq->ctx;
287 struct inode *inode = ctx->path.dentry->d_inode; 287 struct inode *inode = ctx->dentry->d_inode;
288 unsigned long user_addr = (unsigned long)iov->iov_base; 288 unsigned long user_addr = (unsigned long)iov->iov_base;
289 size_t count = iov->iov_len; 289 size_t count = iov->iov_len;
290 size_t rsize = NFS_SERVER(inode)->rsize; 290 size_t rsize = NFS_SERVER(inode)->rsize;
@@ -715,7 +715,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
715 loff_t pos, int sync) 715 loff_t pos, int sync)
716{ 716{
717 struct nfs_open_context *ctx = dreq->ctx; 717 struct nfs_open_context *ctx = dreq->ctx;
718 struct inode *inode = ctx->path.dentry->d_inode; 718 struct inode *inode = ctx->dentry->d_inode;
719 unsigned long user_addr = (unsigned long)iov->iov_base; 719 unsigned long user_addr = (unsigned long)iov->iov_base;
720 size_t count = iov->iov_len; 720 size_t count = iov->iov_len;
721 struct rpc_task *task; 721 struct rpc_task *task;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2f093ed16980..28b8c3f3cda3 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -55,7 +55,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
55static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, 55static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov,
56 unsigned long nr_segs, loff_t pos); 56 unsigned long nr_segs, loff_t pos);
57static int nfs_file_flush(struct file *, fl_owner_t id); 57static int nfs_file_flush(struct file *, fl_owner_t id);
58static int nfs_file_fsync(struct file *, int datasync); 58static int nfs_file_fsync(struct file *, loff_t, loff_t, int datasync);
59static int nfs_check_flags(int flags); 59static int nfs_check_flags(int flags);
60static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); 60static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl);
61static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); 61static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl);
@@ -187,8 +187,11 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
187 filp->f_path.dentry->d_name.name, 187 filp->f_path.dentry->d_name.name,
188 offset, origin); 188 offset, origin);
189 189
190 /* origin == SEEK_END => we must revalidate the cached file length */ 190 /*
191 if (origin == SEEK_END) { 191 * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate
192 * the cached file length
193 */
194 if (origin != SEEK_SET || origin != SEEK_CUR) {
192 struct inode *inode = filp->f_mapping->host; 195 struct inode *inode = filp->f_mapping->host;
193 196
194 int retval = nfs_revalidate_file_size(inode, filp); 197 int retval = nfs_revalidate_file_size(inode, filp);
@@ -305,7 +308,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
305 * fall back to doing a synchronous write. 308 * fall back to doing a synchronous write.
306 */ 309 */
307static int 310static int
308nfs_file_fsync(struct file *file, int datasync) 311nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
309{ 312{
310 struct dentry *dentry = file->f_path.dentry; 313 struct dentry *dentry = file->f_path.dentry;
311 struct nfs_open_context *ctx = nfs_file_open_context(file); 314 struct nfs_open_context *ctx = nfs_file_open_context(file);
@@ -313,11 +316,15 @@ nfs_file_fsync(struct file *file, int datasync)
313 int have_error, status; 316 int have_error, status;
314 int ret = 0; 317 int ret = 0;
315 318
316
317 dprintk("NFS: fsync file(%s/%s) datasync %d\n", 319 dprintk("NFS: fsync file(%s/%s) datasync %d\n",
318 dentry->d_parent->d_name.name, dentry->d_name.name, 320 dentry->d_parent->d_name.name, dentry->d_name.name,
319 datasync); 321 datasync);
320 322
323 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
324 if (ret)
325 return ret;
326 mutex_lock(&inode->i_mutex);
327
321 nfs_inc_stats(inode, NFSIOS_VFSFSYNC); 328 nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
322 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 329 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
323 status = nfs_commit_inode(inode, FLUSH_SYNC); 330 status = nfs_commit_inode(inode, FLUSH_SYNC);
@@ -329,6 +336,7 @@ nfs_file_fsync(struct file *file, int datasync)
329 if (!ret && !datasync) 336 if (!ret && !datasync)
330 /* application has asked for meta-data sync */ 337 /* application has asked for meta-data sync */
331 ret = pnfs_layoutcommit_inode(inode, true); 338 ret = pnfs_layoutcommit_inode(inode, true);
339 mutex_unlock(&inode->i_mutex);
332 return ret; 340 return ret;
333} 341}
334 342
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6f4850deb272..fe1203797b2b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -567,7 +567,7 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context
567struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx) 567struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
568{ 568{
569 struct nfs_lock_context *res, *new = NULL; 569 struct nfs_lock_context *res, *new = NULL;
570 struct inode *inode = ctx->path.dentry->d_inode; 570 struct inode *inode = ctx->dentry->d_inode;
571 571
572 spin_lock(&inode->i_lock); 572 spin_lock(&inode->i_lock);
573 res = __nfs_find_lock_context(ctx); 573 res = __nfs_find_lock_context(ctx);
@@ -594,7 +594,7 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
594void nfs_put_lock_context(struct nfs_lock_context *l_ctx) 594void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
595{ 595{
596 struct nfs_open_context *ctx = l_ctx->open_context; 596 struct nfs_open_context *ctx = l_ctx->open_context;
597 struct inode *inode = ctx->path.dentry->d_inode; 597 struct inode *inode = ctx->dentry->d_inode;
598 598
599 if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock)) 599 if (!atomic_dec_and_lock(&l_ctx->count, &inode->i_lock))
600 return; 600 return;
@@ -620,7 +620,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
620 return; 620 return;
621 if (!is_sync) 621 if (!is_sync)
622 return; 622 return;
623 inode = ctx->path.dentry->d_inode; 623 inode = ctx->dentry->d_inode;
624 if (!list_empty(&NFS_I(inode)->open_files)) 624 if (!list_empty(&NFS_I(inode)->open_files))
625 return; 625 return;
626 server = NFS_SERVER(inode); 626 server = NFS_SERVER(inode);
@@ -629,14 +629,14 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
629 nfs_revalidate_inode(server, inode); 629 nfs_revalidate_inode(server, inode);
630} 630}
631 631
632struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode) 632struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred, fmode_t f_mode)
633{ 633{
634 struct nfs_open_context *ctx; 634 struct nfs_open_context *ctx;
635 635
636 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); 636 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
637 if (ctx != NULL) { 637 if (ctx != NULL) {
638 ctx->path = *path; 638 nfs_sb_active(dentry->d_sb);
639 path_get(&ctx->path); 639 ctx->dentry = dget(dentry);
640 ctx->cred = get_rpccred(cred); 640 ctx->cred = get_rpccred(cred);
641 ctx->state = NULL; 641 ctx->state = NULL;
642 ctx->mode = f_mode; 642 ctx->mode = f_mode;
@@ -658,7 +658,8 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
658 658
659static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) 659static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
660{ 660{
661 struct inode *inode = ctx->path.dentry->d_inode; 661 struct inode *inode = ctx->dentry->d_inode;
662 struct super_block *sb = ctx->dentry->d_sb;
662 663
663 if (!list_empty(&ctx->list)) { 664 if (!list_empty(&ctx->list)) {
664 if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) 665 if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
@@ -671,7 +672,8 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
671 NFS_PROTO(inode)->close_context(ctx, is_sync); 672 NFS_PROTO(inode)->close_context(ctx, is_sync);
672 if (ctx->cred != NULL) 673 if (ctx->cred != NULL)
673 put_rpccred(ctx->cred); 674 put_rpccred(ctx->cred);
674 path_put(&ctx->path); 675 dput(ctx->dentry);
676 nfs_sb_deactive(sb);
675 kfree(ctx); 677 kfree(ctx);
676} 678}
677 679
@@ -741,7 +743,7 @@ int nfs_open(struct inode *inode, struct file *filp)
741 cred = rpc_lookup_cred(); 743 cred = rpc_lookup_cred();
742 if (IS_ERR(cred)) 744 if (IS_ERR(cred))
743 return PTR_ERR(cred); 745 return PTR_ERR(cred);
744 ctx = alloc_nfs_open_context(&filp->f_path, cred, filp->f_mode); 746 ctx = alloc_nfs_open_context(filp->f_path.dentry, cred, filp->f_mode);
745 put_rpccred(cred); 747 put_rpccred(cred);
746 if (ctx == NULL) 748 if (ctx == NULL)
747 return -ENOMEM; 749 return -ENOMEM;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c4a69833dd0d..b788f2eb1ba0 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -238,7 +238,7 @@ extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
238extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); 238extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
239extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 239extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
240extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 240extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
241extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); 241extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc);
242extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 242extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
243extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, 243extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
244 struct nfs4_fs_locations *fs_locations, struct page *page); 244 struct nfs4_fs_locations *fs_locations, struct page *page);
@@ -341,8 +341,8 @@ extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struc
341extern void nfs4_put_state_owner(struct nfs4_state_owner *); 341extern void nfs4_put_state_owner(struct nfs4_state_owner *);
342extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); 342extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
343extern void nfs4_put_open_state(struct nfs4_state *); 343extern void nfs4_put_open_state(struct nfs4_state *);
344extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); 344extern void nfs4_close_state(struct nfs4_state *, fmode_t);
345extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); 345extern void nfs4_close_sync(struct nfs4_state *, fmode_t);
346extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 346extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
347extern void nfs4_schedule_lease_recovery(struct nfs_client *); 347extern void nfs4_schedule_lease_recovery(struct nfs_client *);
348extern void nfs4_schedule_state_manager(struct nfs_client *); 348extern void nfs4_schedule_state_manager(struct nfs_client *);
@@ -373,8 +373,8 @@ extern struct svc_version nfs4_callback_version4;
373 373
374#else 374#else
375 375
376#define nfs4_close_state(a, b, c) do { } while (0) 376#define nfs4_close_state(a, b) do { } while (0)
377#define nfs4_close_sync(a, b, c) do { } while (0) 377#define nfs4_close_sync(a, b) do { } while (0)
378 378
379#endif /* CONFIG_NFS_V4 */ 379#endif /* CONFIG_NFS_V4 */
380#endif /* __LINUX_FS_NFS_NFS4_FS.H */ 380#endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5879b23e0c99..26bece8f3083 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -763,8 +763,8 @@ struct nfs4_opendata {
763 struct nfs_open_confirmres c_res; 763 struct nfs_open_confirmres c_res;
764 struct nfs_fattr f_attr; 764 struct nfs_fattr f_attr;
765 struct nfs_fattr dir_attr; 765 struct nfs_fattr dir_attr;
766 struct path path;
767 struct dentry *dir; 766 struct dentry *dir;
767 struct dentry *dentry;
768 struct nfs4_state_owner *owner; 768 struct nfs4_state_owner *owner;
769 struct nfs4_state *state; 769 struct nfs4_state *state;
770 struct iattr attrs; 770 struct iattr attrs;
@@ -786,12 +786,12 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
786 nfs_fattr_init(&p->dir_attr); 786 nfs_fattr_init(&p->dir_attr);
787} 787}
788 788
789static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, 789static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
790 struct nfs4_state_owner *sp, fmode_t fmode, int flags, 790 struct nfs4_state_owner *sp, fmode_t fmode, int flags,
791 const struct iattr *attrs, 791 const struct iattr *attrs,
792 gfp_t gfp_mask) 792 gfp_t gfp_mask)
793{ 793{
794 struct dentry *parent = dget_parent(path->dentry); 794 struct dentry *parent = dget_parent(dentry);
795 struct inode *dir = parent->d_inode; 795 struct inode *dir = parent->d_inode;
796 struct nfs_server *server = NFS_SERVER(dir); 796 struct nfs_server *server = NFS_SERVER(dir);
797 struct nfs4_opendata *p; 797 struct nfs4_opendata *p;
@@ -802,8 +802,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
802 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask); 802 p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid, gfp_mask);
803 if (p->o_arg.seqid == NULL) 803 if (p->o_arg.seqid == NULL)
804 goto err_free; 804 goto err_free;
805 path_get(path); 805 nfs_sb_active(dentry->d_sb);
806 p->path = *path; 806 p->dentry = dget(dentry);
807 p->dir = parent; 807 p->dir = parent;
808 p->owner = sp; 808 p->owner = sp;
809 atomic_inc(&sp->so_count); 809 atomic_inc(&sp->so_count);
@@ -812,7 +812,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
812 p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); 812 p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
813 p->o_arg.clientid = server->nfs_client->cl_clientid; 813 p->o_arg.clientid = server->nfs_client->cl_clientid;
814 p->o_arg.id = sp->so_owner_id.id; 814 p->o_arg.id = sp->so_owner_id.id;
815 p->o_arg.name = &p->path.dentry->d_name; 815 p->o_arg.name = &dentry->d_name;
816 p->o_arg.server = server; 816 p->o_arg.server = server;
817 p->o_arg.bitmask = server->attr_bitmask; 817 p->o_arg.bitmask = server->attr_bitmask;
818 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL; 818 p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
@@ -842,13 +842,15 @@ static void nfs4_opendata_free(struct kref *kref)
842{ 842{
843 struct nfs4_opendata *p = container_of(kref, 843 struct nfs4_opendata *p = container_of(kref,
844 struct nfs4_opendata, kref); 844 struct nfs4_opendata, kref);
845 struct super_block *sb = p->dentry->d_sb;
845 846
846 nfs_free_seqid(p->o_arg.seqid); 847 nfs_free_seqid(p->o_arg.seqid);
847 if (p->state != NULL) 848 if (p->state != NULL)
848 nfs4_put_open_state(p->state); 849 nfs4_put_open_state(p->state);
849 nfs4_put_state_owner(p->owner); 850 nfs4_put_state_owner(p->owner);
850 dput(p->dir); 851 dput(p->dir);
851 path_put(&p->path); 852 dput(p->dentry);
853 nfs_sb_deactive(sb);
852 kfree(p); 854 kfree(p);
853} 855}
854 856
@@ -1130,7 +1132,7 @@ static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context
1130{ 1132{
1131 struct nfs4_opendata *opendata; 1133 struct nfs4_opendata *opendata;
1132 1134
1133 opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, 0, NULL, GFP_NOFS); 1135 opendata = nfs4_opendata_alloc(ctx->dentry, state->owner, 0, 0, NULL, GFP_NOFS);
1134 if (opendata == NULL) 1136 if (opendata == NULL)
1135 return ERR_PTR(-ENOMEM); 1137 return ERR_PTR(-ENOMEM);
1136 opendata->state = state; 1138 opendata->state = state;
@@ -1154,7 +1156,7 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
1154 newstate = nfs4_opendata_to_nfs4_state(opendata); 1156 newstate = nfs4_opendata_to_nfs4_state(opendata);
1155 if (IS_ERR(newstate)) 1157 if (IS_ERR(newstate))
1156 return PTR_ERR(newstate); 1158 return PTR_ERR(newstate);
1157 nfs4_close_state(&opendata->path, newstate, fmode); 1159 nfs4_close_state(newstate, fmode);
1158 *res = newstate; 1160 *res = newstate;
1159 return 0; 1161 return 0;
1160} 1162}
@@ -1352,7 +1354,7 @@ static void nfs4_open_confirm_release(void *calldata)
1352 goto out_free; 1354 goto out_free;
1353 state = nfs4_opendata_to_nfs4_state(data); 1355 state = nfs4_opendata_to_nfs4_state(data);
1354 if (!IS_ERR(state)) 1356 if (!IS_ERR(state))
1355 nfs4_close_state(&data->path, state, data->o_arg.fmode); 1357 nfs4_close_state(state, data->o_arg.fmode);
1356out_free: 1358out_free:
1357 nfs4_opendata_put(data); 1359 nfs4_opendata_put(data);
1358} 1360}
@@ -1497,7 +1499,7 @@ static void nfs4_open_release(void *calldata)
1497 goto out_free; 1499 goto out_free;
1498 state = nfs4_opendata_to_nfs4_state(data); 1500 state = nfs4_opendata_to_nfs4_state(data);
1499 if (!IS_ERR(state)) 1501 if (!IS_ERR(state))
1500 nfs4_close_state(&data->path, state, data->o_arg.fmode); 1502 nfs4_close_state(state, data->o_arg.fmode);
1501out_free: 1503out_free:
1502 nfs4_opendata_put(data); 1504 nfs4_opendata_put(data);
1503} 1505}
@@ -1648,7 +1650,7 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s
1648 return PTR_ERR(opendata); 1650 return PTR_ERR(opendata);
1649 ret = nfs4_open_recover(opendata, state); 1651 ret = nfs4_open_recover(opendata, state);
1650 if (ret == -ESTALE) 1652 if (ret == -ESTALE)
1651 d_drop(ctx->path.dentry); 1653 d_drop(ctx->dentry);
1652 nfs4_opendata_put(opendata); 1654 nfs4_opendata_put(opendata);
1653 return ret; 1655 return ret;
1654} 1656}
@@ -1706,7 +1708,7 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct
1706/* 1708/*
1707 * Returns a referenced nfs4_state 1709 * Returns a referenced nfs4_state
1708 */ 1710 */
1709static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) 1711static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
1710{ 1712{
1711 struct nfs4_state_owner *sp; 1713 struct nfs4_state_owner *sp;
1712 struct nfs4_state *state = NULL; 1714 struct nfs4_state *state = NULL;
@@ -1723,15 +1725,15 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
1723 status = nfs4_recover_expired_lease(server); 1725 status = nfs4_recover_expired_lease(server);
1724 if (status != 0) 1726 if (status != 0)
1725 goto err_put_state_owner; 1727 goto err_put_state_owner;
1726 if (path->dentry->d_inode != NULL) 1728 if (dentry->d_inode != NULL)
1727 nfs4_return_incompatible_delegation(path->dentry->d_inode, fmode); 1729 nfs4_return_incompatible_delegation(dentry->d_inode, fmode);
1728 status = -ENOMEM; 1730 status = -ENOMEM;
1729 opendata = nfs4_opendata_alloc(path, sp, fmode, flags, sattr, GFP_KERNEL); 1731 opendata = nfs4_opendata_alloc(dentry, sp, fmode, flags, sattr, GFP_KERNEL);
1730 if (opendata == NULL) 1732 if (opendata == NULL)
1731 goto err_put_state_owner; 1733 goto err_put_state_owner;
1732 1734
1733 if (path->dentry->d_inode != NULL) 1735 if (dentry->d_inode != NULL)
1734 opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp); 1736 opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
1735 1737
1736 status = _nfs4_proc_open(opendata); 1738 status = _nfs4_proc_open(opendata);
1737 if (status != 0) 1739 if (status != 0)
@@ -1769,14 +1771,14 @@ out_err:
1769} 1771}
1770 1772
1771 1773
1772static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred) 1774static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, fmode_t fmode, int flags, struct iattr *sattr, struct rpc_cred *cred)
1773{ 1775{
1774 struct nfs4_exception exception = { }; 1776 struct nfs4_exception exception = { };
1775 struct nfs4_state *res; 1777 struct nfs4_state *res;
1776 int status; 1778 int status;
1777 1779
1778 do { 1780 do {
1779 status = _nfs4_do_open(dir, path, fmode, flags, sattr, cred, &res); 1781 status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, &res);
1780 if (status == 0) 1782 if (status == 0)
1781 break; 1783 break;
1782 /* NOTE: BAD_SEQID means the server and client disagree about the 1784 /* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1873,7 +1875,6 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
1873} 1875}
1874 1876
1875struct nfs4_closedata { 1877struct nfs4_closedata {
1876 struct path path;
1877 struct inode *inode; 1878 struct inode *inode;
1878 struct nfs4_state *state; 1879 struct nfs4_state *state;
1879 struct nfs_closeargs arg; 1880 struct nfs_closeargs arg;
@@ -1888,13 +1889,14 @@ static void nfs4_free_closedata(void *data)
1888{ 1889{
1889 struct nfs4_closedata *calldata = data; 1890 struct nfs4_closedata *calldata = data;
1890 struct nfs4_state_owner *sp = calldata->state->owner; 1891 struct nfs4_state_owner *sp = calldata->state->owner;
1892 struct super_block *sb = calldata->state->inode->i_sb;
1891 1893
1892 if (calldata->roc) 1894 if (calldata->roc)
1893 pnfs_roc_release(calldata->state->inode); 1895 pnfs_roc_release(calldata->state->inode);
1894 nfs4_put_open_state(calldata->state); 1896 nfs4_put_open_state(calldata->state);
1895 nfs_free_seqid(calldata->arg.seqid); 1897 nfs_free_seqid(calldata->arg.seqid);
1896 nfs4_put_state_owner(sp); 1898 nfs4_put_state_owner(sp);
1897 path_put(&calldata->path); 1899 nfs_sb_deactive(sb);
1898 kfree(calldata); 1900 kfree(calldata);
1899} 1901}
1900 1902
@@ -2014,7 +2016,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
2014 * 2016 *
2015 * NOTE: Caller must be holding the sp->so_owner semaphore! 2017 * NOTE: Caller must be holding the sp->so_owner semaphore!
2016 */ 2018 */
2017int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) 2019int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
2018{ 2020{
2019 struct nfs_server *server = NFS_SERVER(state->inode); 2021 struct nfs_server *server = NFS_SERVER(state->inode);
2020 struct nfs4_closedata *calldata; 2022 struct nfs4_closedata *calldata;
@@ -2050,8 +2052,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i
2050 calldata->res.seqid = calldata->arg.seqid; 2052 calldata->res.seqid = calldata->arg.seqid;
2051 calldata->res.server = server; 2053 calldata->res.server = server;
2052 calldata->roc = roc; 2054 calldata->roc = roc;
2053 path_get(path); 2055 nfs_sb_active(calldata->inode->i_sb);
2054 calldata->path = *path;
2055 2056
2056 msg.rpc_argp = &calldata->arg; 2057 msg.rpc_argp = &calldata->arg;
2057 msg.rpc_resp = &calldata->res; 2058 msg.rpc_resp = &calldata->res;
@@ -2080,7 +2081,7 @@ nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags
2080 struct nfs4_state *state; 2081 struct nfs4_state *state;
2081 2082
2082 /* Protect against concurrent sillydeletes */ 2083 /* Protect against concurrent sillydeletes */
2083 state = nfs4_do_open(dir, &ctx->path, ctx->mode, open_flags, attr, ctx->cred); 2084 state = nfs4_do_open(dir, ctx->dentry, ctx->mode, open_flags, attr, ctx->cred);
2084 if (IS_ERR(state)) 2085 if (IS_ERR(state))
2085 return ERR_CAST(state); 2086 return ERR_CAST(state);
2086 ctx->state = state; 2087 ctx->state = state;
@@ -2092,9 +2093,9 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
2092 if (ctx->state == NULL) 2093 if (ctx->state == NULL)
2093 return; 2094 return;
2094 if (is_sync) 2095 if (is_sync)
2095 nfs4_close_sync(&ctx->path, ctx->state, ctx->mode); 2096 nfs4_close_sync(ctx->state, ctx->mode);
2096 else 2097 else
2097 nfs4_close_state(&ctx->path, ctx->state, ctx->mode); 2098 nfs4_close_state(ctx->state, ctx->mode);
2098} 2099}
2099 2100
2100static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) 2101static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
@@ -2616,10 +2617,7 @@ static int
2616nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, 2617nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2617 int flags, struct nfs_open_context *ctx) 2618 int flags, struct nfs_open_context *ctx)
2618{ 2619{
2619 struct path my_path = { 2620 struct dentry *de = dentry;
2620 .dentry = dentry,
2621 };
2622 struct path *path = &my_path;
2623 struct nfs4_state *state; 2621 struct nfs4_state *state;
2624 struct rpc_cred *cred = NULL; 2622 struct rpc_cred *cred = NULL;
2625 fmode_t fmode = 0; 2623 fmode_t fmode = 0;
@@ -2627,11 +2625,11 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2627 2625
2628 if (ctx != NULL) { 2626 if (ctx != NULL) {
2629 cred = ctx->cred; 2627 cred = ctx->cred;
2630 path = &ctx->path; 2628 de = ctx->dentry;
2631 fmode = ctx->mode; 2629 fmode = ctx->mode;
2632 } 2630 }
2633 sattr->ia_mode &= ~current_umask(); 2631 sattr->ia_mode &= ~current_umask();
2634 state = nfs4_do_open(dir, path, fmode, flags, sattr, cred); 2632 state = nfs4_do_open(dir, de, fmode, flags, sattr, cred);
2635 d_drop(dentry); 2633 d_drop(dentry);
2636 if (IS_ERR(state)) { 2634 if (IS_ERR(state)) {
2637 status = PTR_ERR(state); 2635 status = PTR_ERR(state);
@@ -2642,7 +2640,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
2642 if (ctx != NULL) 2640 if (ctx != NULL)
2643 ctx->state = state; 2641 ctx->state = state;
2644 else 2642 else
2645 nfs4_close_sync(path, state, fmode); 2643 nfs4_close_sync(state, fmode);
2646out: 2644out:
2647 return status; 2645 return status;
2648} 2646}
@@ -4294,7 +4292,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
4294 memcpy(data->lsp->ls_stateid.data, data->res.stateid.data, 4292 memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
4295 sizeof(data->lsp->ls_stateid.data)); 4293 sizeof(data->lsp->ls_stateid.data));
4296 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; 4294 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
4297 renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp); 4295 renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
4298 } 4296 }
4299out: 4297out:
4300 dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status); 4298 dprintk("%s: done, ret = %d!\n", __func__, data->rpc_status);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e97dd219f84f..7acfe8843626 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -641,7 +641,7 @@ void nfs4_put_open_state(struct nfs4_state *state)
641/* 641/*
642 * Close the current file. 642 * Close the current file.
643 */ 643 */
644static void __nfs4_close(struct path *path, struct nfs4_state *state, 644static void __nfs4_close(struct nfs4_state *state,
645 fmode_t fmode, gfp_t gfp_mask, int wait) 645 fmode_t fmode, gfp_t gfp_mask, int wait)
646{ 646{
647 struct nfs4_state_owner *owner = state->owner; 647 struct nfs4_state_owner *owner = state->owner;
@@ -685,18 +685,18 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state,
685 } else { 685 } else {
686 bool roc = pnfs_roc(state->inode); 686 bool roc = pnfs_roc(state->inode);
687 687
688 nfs4_do_close(path, state, gfp_mask, wait, roc); 688 nfs4_do_close(state, gfp_mask, wait, roc);
689 } 689 }
690} 690}
691 691
692void nfs4_close_state(struct path *path, struct nfs4_state *state, fmode_t fmode) 692void nfs4_close_state(struct nfs4_state *state, fmode_t fmode)
693{ 693{
694 __nfs4_close(path, state, fmode, GFP_NOFS, 0); 694 __nfs4_close(state, fmode, GFP_NOFS, 0);
695} 695}
696 696
697void nfs4_close_sync(struct path *path, struct nfs4_state *state, fmode_t fmode) 697void nfs4_close_sync(struct nfs4_state *state, fmode_t fmode)
698{ 698{
699 __nfs4_close(path, state, fmode, GFP_KERNEL, 1); 699 __nfs4_close(state, fmode, GFP_KERNEL, 1);
700} 700}
701 701
702/* 702/*
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 009855716286..18449f43c568 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -114,7 +114,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
114 if (!nfs_lock_request_dontget(req)) 114 if (!nfs_lock_request_dontget(req))
115 return 0; 115 return 0;
116 if (test_bit(PG_MAPPED, &req->wb_flags)) 116 if (test_bit(PG_MAPPED, &req->wb_flags))
117 radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); 117 radix_tree_tag_set(&NFS_I(req->wb_context->dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
118 return 1; 118 return 1;
119} 119}
120 120
@@ -124,7 +124,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
124void nfs_clear_page_tag_locked(struct nfs_page *req) 124void nfs_clear_page_tag_locked(struct nfs_page *req)
125{ 125{
126 if (test_bit(PG_MAPPED, &req->wb_flags)) { 126 if (test_bit(PG_MAPPED, &req->wb_flags)) {
127 struct inode *inode = req->wb_context->path.dentry->d_inode; 127 struct inode *inode = req->wb_context->dentry->d_inode;
128 struct nfs_inode *nfsi = NFS_I(inode); 128 struct nfs_inode *nfsi = NFS_I(inode);
129 129
130 spin_lock(&inode->i_lock); 130 spin_lock(&inode->i_lock);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 20a7f952e244..a68679f538fc 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -144,7 +144,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
144 144
145static void nfs_readpage_release(struct nfs_page *req) 145static void nfs_readpage_release(struct nfs_page *req)
146{ 146{
147 struct inode *d_inode = req->wb_context->path.dentry->d_inode; 147 struct inode *d_inode = req->wb_context->dentry->d_inode;
148 148
149 if (PageUptodate(req->wb_page)) 149 if (PageUptodate(req->wb_page))
150 nfs_readpage_to_fscache(d_inode, req->wb_page, 0); 150 nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
@@ -152,8 +152,8 @@ static void nfs_readpage_release(struct nfs_page *req)
152 unlock_page(req->wb_page); 152 unlock_page(req->wb_page);
153 153
154 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 154 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
155 req->wb_context->path.dentry->d_inode->i_sb->s_id, 155 req->wb_context->dentry->d_inode->i_sb->s_id,
156 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 156 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
157 req->wb_bytes, 157 req->wb_bytes,
158 (long long)req_offset(req)); 158 (long long)req_offset(req));
159 nfs_release_request(req); 159 nfs_release_request(req);
@@ -207,7 +207,7 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
207 unsigned int count, unsigned int offset, 207 unsigned int count, unsigned int offset,
208 struct pnfs_layout_segment *lseg) 208 struct pnfs_layout_segment *lseg)
209{ 209{
210 struct inode *inode = req->wb_context->path.dentry->d_inode; 210 struct inode *inode = req->wb_context->dentry->d_inode;
211 211
212 data->req = req; 212 data->req = req;
213 data->inode = inode; 213 data->inode = inode;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ce40e5c568ba..b961ceac66b4 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2773,16 +2773,12 @@ static void nfs_referral_loop_unprotect(void)
2773static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt, 2773static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2774 const char *export_path) 2774 const char *export_path)
2775{ 2775{
2776 struct nameidata *nd = NULL;
2777 struct mnt_namespace *ns_private; 2776 struct mnt_namespace *ns_private;
2778 struct super_block *s; 2777 struct super_block *s;
2779 struct dentry *dentry; 2778 struct dentry *dentry;
2779 struct path path;
2780 int ret; 2780 int ret;
2781 2781
2782 nd = kmalloc(sizeof(*nd), GFP_KERNEL);
2783 if (nd == NULL)
2784 return ERR_PTR(-ENOMEM);
2785
2786 ns_private = create_mnt_ns(root_mnt); 2782 ns_private = create_mnt_ns(root_mnt);
2787 ret = PTR_ERR(ns_private); 2783 ret = PTR_ERR(ns_private);
2788 if (IS_ERR(ns_private)) 2784 if (IS_ERR(ns_private))
@@ -2793,7 +2789,7 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2793 goto out_put_mnt_ns; 2789 goto out_put_mnt_ns;
2794 2790
2795 ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt, 2791 ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
2796 export_path, LOOKUP_FOLLOW, nd); 2792 export_path, LOOKUP_FOLLOW, &path);
2797 2793
2798 nfs_referral_loop_unprotect(); 2794 nfs_referral_loop_unprotect();
2799 put_mnt_ns(ns_private); 2795 put_mnt_ns(ns_private);
@@ -2801,12 +2797,11 @@ static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2801 if (ret != 0) 2797 if (ret != 0)
2802 goto out_err; 2798 goto out_err;
2803 2799
2804 s = nd->path.mnt->mnt_sb; 2800 s = path.mnt->mnt_sb;
2805 atomic_inc(&s->s_active); 2801 atomic_inc(&s->s_active);
2806 dentry = dget(nd->path.dentry); 2802 dentry = dget(path.dentry);
2807 2803
2808 path_put(&nd->path); 2804 path_put(&path);
2809 kfree(nd);
2810 down_write(&s->s_umount); 2805 down_write(&s->s_umount);
2811 return dentry; 2806 return dentry;
2812out_put_mnt_ns: 2807out_put_mnt_ns:
@@ -2814,7 +2809,6 @@ out_put_mnt_ns:
2814out_mntput: 2809out_mntput:
2815 mntput(root_mnt); 2810 mntput(root_mnt);
2816out_err: 2811out_err:
2817 kfree(nd);
2818 return ERR_PTR(ret); 2812 return ERR_PTR(ret);
2819} 2813}
2820 2814
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 727168059684..08579312c57b 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -409,7 +409,7 @@ out:
409 */ 409 */
410static void nfs_inode_remove_request(struct nfs_page *req) 410static void nfs_inode_remove_request(struct nfs_page *req)
411{ 411{
412 struct inode *inode = req->wb_context->path.dentry->d_inode; 412 struct inode *inode = req->wb_context->dentry->d_inode;
413 struct nfs_inode *nfsi = NFS_I(inode); 413 struct nfs_inode *nfsi = NFS_I(inode);
414 414
415 BUG_ON (!NFS_WBACK_BUSY(req)); 415 BUG_ON (!NFS_WBACK_BUSY(req));
@@ -438,7 +438,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
438static void 438static void
439nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 439nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
440{ 440{
441 struct inode *inode = req->wb_context->path.dentry->d_inode; 441 struct inode *inode = req->wb_context->dentry->d_inode;
442 struct nfs_inode *nfsi = NFS_I(inode); 442 struct nfs_inode *nfsi = NFS_I(inode);
443 443
444 spin_lock(&inode->i_lock); 444 spin_lock(&inode->i_lock);
@@ -852,13 +852,13 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
852 struct pnfs_layout_segment *lseg, 852 struct pnfs_layout_segment *lseg,
853 int how) 853 int how)
854{ 854{
855 struct inode *inode = req->wb_context->path.dentry->d_inode; 855 struct inode *inode = req->wb_context->dentry->d_inode;
856 856
857 /* Set up the RPC argument and reply structs 857 /* Set up the RPC argument and reply structs
858 * NB: take care not to mess about with data->commit et al. */ 858 * NB: take care not to mess about with data->commit et al. */
859 859
860 data->req = req; 860 data->req = req;
861 data->inode = inode = req->wb_context->path.dentry->d_inode; 861 data->inode = inode = req->wb_context->dentry->d_inode;
862 data->cred = req->wb_context->cred; 862 data->cred = req->wb_context->cred;
863 data->lseg = get_lseg(lseg); 863 data->lseg = get_lseg(lseg);
864 864
@@ -1053,9 +1053,9 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
1053 1053
1054 dprintk("NFS: %5u write(%s/%lld %d@%lld)", 1054 dprintk("NFS: %5u write(%s/%lld %d@%lld)",
1055 task->tk_pid, 1055 task->tk_pid,
1056 data->req->wb_context->path.dentry->d_inode->i_sb->s_id, 1056 data->req->wb_context->dentry->d_inode->i_sb->s_id,
1057 (long long) 1057 (long long)
1058 NFS_FILEID(data->req->wb_context->path.dentry->d_inode), 1058 NFS_FILEID(data->req->wb_context->dentry->d_inode),
1059 data->req->wb_bytes, (long long)req_offset(data->req)); 1059 data->req->wb_bytes, (long long)req_offset(data->req));
1060 1060
1061 nfs_writeback_done(task, data); 1061 nfs_writeback_done(task, data);
@@ -1148,8 +1148,8 @@ static void nfs_writeback_release_full(void *calldata)
1148 1148
1149 dprintk("NFS: %5u write (%s/%lld %d@%lld)", 1149 dprintk("NFS: %5u write (%s/%lld %d@%lld)",
1150 data->task.tk_pid, 1150 data->task.tk_pid,
1151 req->wb_context->path.dentry->d_inode->i_sb->s_id, 1151 req->wb_context->dentry->d_inode->i_sb->s_id,
1152 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 1152 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1153 req->wb_bytes, 1153 req->wb_bytes,
1154 (long long)req_offset(req)); 1154 (long long)req_offset(req));
1155 1155
@@ -1347,7 +1347,7 @@ void nfs_init_commit(struct nfs_write_data *data,
1347 struct pnfs_layout_segment *lseg) 1347 struct pnfs_layout_segment *lseg)
1348{ 1348{
1349 struct nfs_page *first = nfs_list_entry(head->next); 1349 struct nfs_page *first = nfs_list_entry(head->next);
1350 struct inode *inode = first->wb_context->path.dentry->d_inode; 1350 struct inode *inode = first->wb_context->dentry->d_inode;
1351 1351
1352 /* Set up the RPC argument and reply structs 1352 /* Set up the RPC argument and reply structs
1353 * NB: take care not to mess about with data->commit et al. */ 1353 * NB: take care not to mess about with data->commit et al. */
@@ -1435,8 +1435,8 @@ void nfs_commit_release_pages(struct nfs_write_data *data)
1435 nfs_clear_request_commit(req); 1435 nfs_clear_request_commit(req);
1436 1436
1437 dprintk("NFS: commit (%s/%lld %d@%lld)", 1437 dprintk("NFS: commit (%s/%lld %d@%lld)",
1438 req->wb_context->path.dentry->d_inode->i_sb->s_id, 1438 req->wb_context->dentry->d_sb->s_id,
1439 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 1439 (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
1440 req->wb_bytes, 1440 req->wb_bytes,
1441 (long long)req_offset(req)); 1441 (long long)req_offset(req));
1442 if (status < 0) { 1442 if (status < 0) {
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index ffb59ef6f82f..29d77f60585b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -191,52 +191,42 @@ nfsd4_build_namelist(void *arg, const char *name, int namlen,
191} 191}
192 192
193static int 193static int
194nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) 194nfsd4_list_rec_dir(recdir_func *f)
195{ 195{
196 const struct cred *original_cred; 196 const struct cred *original_cred;
197 struct file *filp; 197 struct dentry *dir = rec_file->f_path.dentry;
198 LIST_HEAD(names); 198 LIST_HEAD(names);
199 struct name_list *entry;
200 struct dentry *dentry;
201 int status; 199 int status;
202 200
203 if (!rec_file)
204 return 0;
205
206 status = nfs4_save_creds(&original_cred); 201 status = nfs4_save_creds(&original_cred);
207 if (status < 0) 202 if (status < 0)
208 return status; 203 return status;
209 204
210 filp = dentry_open(dget(dir), mntget(rec_file->f_path.mnt), O_RDONLY, 205 status = vfs_llseek(rec_file, 0, SEEK_SET);
211 current_cred()); 206 if (status < 0) {
212 status = PTR_ERR(filp); 207 nfs4_reset_creds(original_cred);
213 if (IS_ERR(filp)) 208 return status;
214 goto out; 209 }
215 status = vfs_readdir(filp, nfsd4_build_namelist, &names); 210
216 fput(filp); 211 status = vfs_readdir(rec_file, nfsd4_build_namelist, &names);
217 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); 212 mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
218 while (!list_empty(&names)) { 213 while (!list_empty(&names)) {
214 struct name_list *entry;
219 entry = list_entry(names.next, struct name_list, list); 215 entry = list_entry(names.next, struct name_list, list);
220 216 if (!status) {
221 dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); 217 struct dentry *dentry;
222 if (IS_ERR(dentry)) { 218 dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1);
223 status = PTR_ERR(dentry); 219 if (IS_ERR(dentry)) {
224 break; 220 status = PTR_ERR(dentry);
221 break;
222 }
223 status = f(dir, dentry);
224 dput(dentry);
225 } 225 }
226 status = f(dir, dentry);
227 dput(dentry);
228 if (status)
229 break;
230 list_del(&entry->list); 226 list_del(&entry->list);
231 kfree(entry); 227 kfree(entry);
232 } 228 }
233 mutex_unlock(&dir->d_inode->i_mutex); 229 mutex_unlock(&dir->d_inode->i_mutex);
234out:
235 while (!list_empty(&names)) {
236 entry = list_entry(names.next, struct name_list, list);
237 list_del(&entry->list);
238 kfree(entry);
239 }
240 nfs4_reset_creds(original_cred); 230 nfs4_reset_creds(original_cred);
241 return status; 231 return status;
242} 232}
@@ -322,7 +312,7 @@ nfsd4_recdir_purge_old(void) {
322 status = mnt_want_write(rec_file->f_path.mnt); 312 status = mnt_want_write(rec_file->f_path.mnt);
323 if (status) 313 if (status)
324 goto out; 314 goto out;
325 status = nfsd4_list_rec_dir(rec_file->f_path.dentry, purge_old); 315 status = nfsd4_list_rec_dir(purge_old);
326 if (status == 0) 316 if (status == 0)
327 vfs_fsync(rec_file, 0); 317 vfs_fsync(rec_file, 0);
328 mnt_drop_write(rec_file->f_path.mnt); 318 mnt_drop_write(rec_file->f_path.mnt);
@@ -352,7 +342,7 @@ nfsd4_recdir_load(void) {
352 if (!rec_file) 342 if (!rec_file)
353 return 0; 343 return 0;
354 344
355 status = nfsd4_list_rec_dir(rec_file->f_path.dentry, load_recdir); 345 status = nfsd4_list_rec_dir(load_recdir);
356 if (status) 346 if (status)
357 printk("nfsd4: failed loading clients from recovery" 347 printk("nfsd4: failed loading clients from recovery"
358 " directory %s\n", rec_file->f_path.dentry->d_name.name); 348 " directory %s\n", rec_file->f_path.dentry->d_name.name);
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index d7eeca62febd..26601529dc17 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -27,7 +27,7 @@
27#include "nilfs.h" 27#include "nilfs.h"
28#include "segment.h" 28#include "segment.h"
29 29
30int nilfs_sync_file(struct file *file, int datasync) 30int nilfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
31{ 31{
32 /* 32 /*
33 * Called from fsync() system call 33 * Called from fsync() system call
@@ -40,8 +40,15 @@ int nilfs_sync_file(struct file *file, int datasync)
40 struct inode *inode = file->f_mapping->host; 40 struct inode *inode = file->f_mapping->host;
41 int err; 41 int err;
42 42
43 if (!nilfs_inode_dirty(inode)) 43 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
44 if (err)
45 return err;
46 mutex_lock(&inode->i_mutex);
47
48 if (!nilfs_inode_dirty(inode)) {
49 mutex_unlock(&inode->i_mutex);
44 return 0; 50 return 0;
51 }
45 52
46 if (datasync) 53 if (datasync)
47 err = nilfs_construct_dsync_segment(inode->i_sb, inode, 0, 54 err = nilfs_construct_dsync_segment(inode->i_sb, inode, 0,
@@ -49,6 +56,7 @@ int nilfs_sync_file(struct file *file, int datasync)
49 else 56 else
50 err = nilfs_construct_segment(inode->i_sb); 57 err = nilfs_construct_segment(inode->i_sb);
51 58
59 mutex_unlock(&inode->i_mutex);
52 return err; 60 return err;
53} 61}
54 62
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index b9b45fc2903e..666628b395f1 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -259,8 +259,8 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
259 return 0; 259 return 0;
260 260
261 /* Needs synchronization with the cleaner */ 261 /* Needs synchronization with the cleaner */
262 size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 262 size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
263 offset, nr_segs, nilfs_get_block, NULL); 263 nilfs_get_block);
264 264
265 /* 265 /*
266 * In case of error extending write may have instantiated a few 266 * In case of error extending write may have instantiated a few
@@ -778,6 +778,8 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
778 778
779 if ((iattr->ia_valid & ATTR_SIZE) && 779 if ((iattr->ia_valid & ATTR_SIZE) &&
780 iattr->ia_size != i_size_read(inode)) { 780 iattr->ia_size != i_size_read(inode)) {
781 inode_dio_wait(inode);
782
781 err = vmtruncate(inode, iattr->ia_size); 783 err = vmtruncate(inode, iattr->ia_size);
782 if (unlikely(err)) 784 if (unlikely(err))
783 goto out_err; 785 goto out_err;
@@ -799,14 +801,14 @@ out_err:
799 return err; 801 return err;
800} 802}
801 803
802int nilfs_permission(struct inode *inode, int mask, unsigned int flags) 804int nilfs_permission(struct inode *inode, int mask)
803{ 805{
804 struct nilfs_root *root = NILFS_I(inode)->i_root; 806 struct nilfs_root *root = NILFS_I(inode)->i_root;
805 if ((mask & MAY_WRITE) && root && 807 if ((mask & MAY_WRITE) && root &&
806 root->cno != NILFS_CPTREE_CURRENT_CNO) 808 root->cno != NILFS_CPTREE_CURRENT_CNO)
807 return -EROFS; /* snapshot is not writable */ 809 return -EROFS; /* snapshot is not writable */
808 810
809 return generic_permission(inode, mask, flags, NULL); 811 return generic_permission(inode, mask);
810} 812}
811 813
812int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh) 814int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 546849b3e88f..a3141990061e 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -72,12 +72,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
72 return ERR_PTR(-ENAMETOOLONG); 72 return ERR_PTR(-ENAMETOOLONG);
73 73
74 ino = nilfs_inode_by_name(dir, &dentry->d_name); 74 ino = nilfs_inode_by_name(dir, &dentry->d_name);
75 inode = NULL; 75 inode = ino ? nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino) : NULL;
76 if (ino) {
77 inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino);
78 if (IS_ERR(inode))
79 return ERR_CAST(inode);
80 }
81 return d_splice_alias(inode, dentry); 76 return d_splice_alias(inode, dentry);
82} 77}
83 78
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index f02b9ad43a21..255d5e1c03b7 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -235,7 +235,7 @@ extern void nilfs_set_link(struct inode *, struct nilfs_dir_entry *,
235 struct page *, struct inode *); 235 struct page *, struct inode *);
236 236
237/* file.c */ 237/* file.c */
238extern int nilfs_sync_file(struct file *, int); 238extern int nilfs_sync_file(struct file *, loff_t, loff_t, int);
239 239
240/* ioctl.c */ 240/* ioctl.c */
241long nilfs_ioctl(struct file *, unsigned int, unsigned long); 241long nilfs_ioctl(struct file *, unsigned int, unsigned long);
@@ -264,7 +264,7 @@ extern void nilfs_update_inode(struct inode *, struct buffer_head *);
264extern void nilfs_truncate(struct inode *); 264extern void nilfs_truncate(struct inode *);
265extern void nilfs_evict_inode(struct inode *); 265extern void nilfs_evict_inode(struct inode *);
266extern int nilfs_setattr(struct dentry *, struct iattr *); 266extern int nilfs_setattr(struct dentry *, struct iattr *);
267int nilfs_permission(struct inode *inode, int mask, unsigned int flags); 267int nilfs_permission(struct inode *inode, int mask);
268int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh); 268int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);
269extern int nilfs_inode_dirty(struct inode *); 269extern int nilfs_inode_dirty(struct inode *);
270int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty); 270int nilfs_set_file_dirty(struct inode *inode, unsigned nr_dirty);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 0f48e7c5d9e1..99e36107ff60 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1527,13 +1527,20 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp)
1527 * this problem for now. We do write the $BITMAP attribute if it is present 1527 * this problem for now. We do write the $BITMAP attribute if it is present
1528 * which is the important one for a directory so things are not too bad. 1528 * which is the important one for a directory so things are not too bad.
1529 */ 1529 */
1530static int ntfs_dir_fsync(struct file *filp, int datasync) 1530static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
1531 int datasync)
1531{ 1532{
1532 struct inode *bmp_vi, *vi = filp->f_mapping->host; 1533 struct inode *bmp_vi, *vi = filp->f_mapping->host;
1533 int err, ret; 1534 int err, ret;
1534 ntfs_attr na; 1535 ntfs_attr na;
1535 1536
1536 ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); 1537 ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
1538
1539 err = filemap_write_and_wait_range(vi->i_mapping, start, end);
1540 if (err)
1541 return err;
1542 mutex_lock(&vi->i_mutex);
1543
1537 BUG_ON(!S_ISDIR(vi->i_mode)); 1544 BUG_ON(!S_ISDIR(vi->i_mode));
1538 /* If the bitmap attribute inode is in memory sync it, too. */ 1545 /* If the bitmap attribute inode is in memory sync it, too. */
1539 na.mft_no = vi->i_ino; 1546 na.mft_no = vi->i_ino;
@@ -1555,6 +1562,7 @@ static int ntfs_dir_fsync(struct file *filp, int datasync)
1555 else 1562 else
1556 ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error " 1563 ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
1557 "%u.", datasync ? "data" : "", vi->i_ino, -ret); 1564 "%u.", datasync ? "data" : "", vi->i_ino, -ret);
1565 mutex_unlock(&vi->i_mutex);
1558 return ret; 1566 return ret;
1559} 1567}
1560 1568
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index f4b1057abdd2..c587e2d27183 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1832,9 +1832,8 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
1832 * fails again. 1832 * fails again.
1833 */ 1833 */
1834 if (unlikely(NInoTruncateFailed(ni))) { 1834 if (unlikely(NInoTruncateFailed(ni))) {
1835 down_write(&vi->i_alloc_sem); 1835 inode_dio_wait(vi);
1836 err = ntfs_truncate(vi); 1836 err = ntfs_truncate(vi);
1837 up_write(&vi->i_alloc_sem);
1838 if (err || NInoTruncateFailed(ni)) { 1837 if (err || NInoTruncateFailed(ni)) {
1839 if (!err) 1838 if (!err)
1840 err = -EIO; 1839 err = -EIO;
@@ -2153,12 +2152,19 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
2153 * with this inode but since we have no simple way of getting to them we ignore 2152 * with this inode but since we have no simple way of getting to them we ignore
2154 * this problem for now. 2153 * this problem for now.
2155 */ 2154 */
2156static int ntfs_file_fsync(struct file *filp, int datasync) 2155static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
2156 int datasync)
2157{ 2157{
2158 struct inode *vi = filp->f_mapping->host; 2158 struct inode *vi = filp->f_mapping->host;
2159 int err, ret = 0; 2159 int err, ret = 0;
2160 2160
2161 ntfs_debug("Entering for inode 0x%lx.", vi->i_ino); 2161 ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
2162
2163 err = filemap_write_and_wait_range(vi->i_mapping, start, end);
2164 if (err)
2165 return err;
2166 mutex_lock(&vi->i_mutex);
2167
2162 BUG_ON(S_ISDIR(vi->i_mode)); 2168 BUG_ON(S_ISDIR(vi->i_mode));
2163 if (!datasync || !NInoNonResident(NTFS_I(vi))) 2169 if (!datasync || !NInoNonResident(NTFS_I(vi)))
2164 ret = __ntfs_write_inode(vi, 1); 2170 ret = __ntfs_write_inode(vi, 1);
@@ -2176,6 +2182,7 @@ static int ntfs_file_fsync(struct file *filp, int datasync)
2176 else 2182 else
2177 ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error " 2183 ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
2178 "%u.", datasync ? "data" : "", vi->i_ino, -ret); 2184 "%u.", datasync ? "data" : "", vi->i_ino, -ret);
2185 mutex_unlock(&vi->i_mutex);
2179 return ret; 2186 return ret;
2180} 2187}
2181 2188
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index c05d6dcf77a4..1371487da955 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -2357,12 +2357,7 @@ static const char *es = " Leaving inconsistent metadata. Unmount and run "
2357 * 2357 *
2358 * Returns 0 on success or -errno on error. 2358 * Returns 0 on success or -errno on error.
2359 * 2359 *
2360 * Called with ->i_mutex held. In all but one case ->i_alloc_sem is held for 2360 * Called with ->i_mutex held.
2361 * writing. The only case in the kernel where ->i_alloc_sem is not held is
2362 * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
2363 * with the current i_size as the offset. The analogous place in NTFS is in
2364 * fs/ntfs/file.c::ntfs_file_buffered_write() where we call vmtruncate() again
2365 * without holding ->i_alloc_sem.
2366 */ 2361 */
2367int ntfs_truncate(struct inode *vi) 2362int ntfs_truncate(struct inode *vi)
2368{ 2363{
@@ -2887,8 +2882,7 @@ void ntfs_truncate_vfs(struct inode *vi) {
2887 * We also abort all changes of user, group, and mode as we do not implement 2882 * We also abort all changes of user, group, and mode as we do not implement
2888 * the NTFS ACLs yet. 2883 * the NTFS ACLs yet.
2889 * 2884 *
2890 * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also 2885 * Called with ->i_mutex held.
2891 * called with ->i_alloc_sem held for writing.
2892 */ 2886 */
2893int ntfs_setattr(struct dentry *dentry, struct iattr *attr) 2887int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
2894{ 2888{
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index e913ad130fdd..1cee970eb55a 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -290,14 +290,14 @@ static int ocfs2_set_acl(handle_t *handle,
290 return ret; 290 return ret;
291} 291}
292 292
293int ocfs2_check_acl(struct inode *inode, int mask, unsigned int flags) 293int ocfs2_check_acl(struct inode *inode, int mask)
294{ 294{
295 struct ocfs2_super *osb; 295 struct ocfs2_super *osb;
296 struct buffer_head *di_bh = NULL; 296 struct buffer_head *di_bh = NULL;
297 struct posix_acl *acl; 297 struct posix_acl *acl;
298 int ret = -EAGAIN; 298 int ret = -EAGAIN;
299 299
300 if (flags & IPERM_FLAG_RCU) 300 if (mask & MAY_NOT_BLOCK)
301 return -ECHILD; 301 return -ECHILD;
302 302
303 osb = OCFS2_SB(inode->i_sb); 303 osb = OCFS2_SB(inode->i_sb);
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 4fe7c9cf4bfb..5c5d31f05853 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -26,7 +26,7 @@ struct ocfs2_acl_entry {
26 __le32 e_id; 26 __le32 e_id;
27}; 27};
28 28
29extern int ocfs2_check_acl(struct inode *, int, unsigned int); 29extern int ocfs2_check_acl(struct inode *, int);
30extern int ocfs2_acl_chmod(struct inode *); 30extern int ocfs2_acl_chmod(struct inode *);
31extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, 31extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
32 struct buffer_head *, struct buffer_head *, 32 struct buffer_head *, struct buffer_head *,
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index ac97bca282d2..c1efe939c774 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -551,9 +551,8 @@ bail:
551 551
552/* 552/*
553 * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're 553 * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're
554 * particularly interested in the aio/dio case. Like the core uses 554 * particularly interested in the aio/dio case. We use the rw_lock DLM lock
555 * i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from 555 * to protect io on one node from truncation on another.
556 * truncation on another.
557 */ 556 */
558static void ocfs2_dio_end_io(struct kiocb *iocb, 557static void ocfs2_dio_end_io(struct kiocb *iocb,
559 loff_t offset, 558 loff_t offset,
@@ -568,10 +567,8 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
568 /* this io's submitter should not have unlocked this before we could */ 567 /* this io's submitter should not have unlocked this before we could */
569 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); 568 BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
570 569
571 if (ocfs2_iocb_is_sem_locked(iocb)) { 570 if (ocfs2_iocb_is_sem_locked(iocb))
572 up_read(&inode->i_alloc_sem);
573 ocfs2_iocb_clear_sem_locked(iocb); 571 ocfs2_iocb_clear_sem_locked(iocb);
574 }
575 572
576 ocfs2_iocb_clear_rw_locked(iocb); 573 ocfs2_iocb_clear_rw_locked(iocb);
577 574
@@ -580,6 +577,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
580 577
581 if (is_async) 578 if (is_async)
582 aio_complete(iocb, ret, 0); 579 aio_complete(iocb, ret, 0);
580 inode_dio_done(inode);
583} 581}
584 582
585/* 583/*
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index b1e35a392ca5..0fc2bd34039d 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -171,7 +171,8 @@ static int ocfs2_dir_release(struct inode *inode, struct file *file)
171 return 0; 171 return 0;
172} 172}
173 173
174static int ocfs2_sync_file(struct file *file, int datasync) 174static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
175 int datasync)
175{ 176{
176 int err = 0; 177 int err = 0;
177 journal_t *journal; 178 journal_t *journal;
@@ -184,6 +185,16 @@ static int ocfs2_sync_file(struct file *file, int datasync)
184 file->f_path.dentry->d_name.name, 185 file->f_path.dentry->d_name.name,
185 (unsigned long long)datasync); 186 (unsigned long long)datasync);
186 187
188 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
189 if (err)
190 return err;
191
192 /*
193 * Probably don't need the i_mutex at all in here, just putting it here
194 * to be consistent with how fsync used to be called, someone more
195 * familiar with the fs could possibly remove it.
196 */
197 mutex_lock(&inode->i_mutex);
187 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { 198 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) {
188 /* 199 /*
189 * We still have to flush drive's caches to get data to the 200 * We still have to flush drive's caches to get data to the
@@ -200,6 +211,7 @@ static int ocfs2_sync_file(struct file *file, int datasync)
200bail: 211bail:
201 if (err) 212 if (err)
202 mlog_errno(err); 213 mlog_errno(err);
214 mutex_unlock(&inode->i_mutex);
203 215
204 return (err < 0) ? -EIO : 0; 216 return (err < 0) ? -EIO : 0;
205} 217}
@@ -1142,6 +1154,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1142 if (status) 1154 if (status)
1143 goto bail_unlock; 1155 goto bail_unlock;
1144 1156
1157 inode_dio_wait(inode);
1158
1145 if (i_size_read(inode) > attr->ia_size) { 1159 if (i_size_read(inode) > attr->ia_size) {
1146 if (ocfs2_should_order_data(inode)) { 1160 if (ocfs2_should_order_data(inode)) {
1147 status = ocfs2_begin_ordered_truncate(inode, 1161 status = ocfs2_begin_ordered_truncate(inode,
@@ -1279,11 +1293,11 @@ bail:
1279 return err; 1293 return err;
1280} 1294}
1281 1295
1282int ocfs2_permission(struct inode *inode, int mask, unsigned int flags) 1296int ocfs2_permission(struct inode *inode, int mask)
1283{ 1297{
1284 int ret; 1298 int ret;
1285 1299
1286 if (flags & IPERM_FLAG_RCU) 1300 if (mask & MAY_NOT_BLOCK)
1287 return -ECHILD; 1301 return -ECHILD;
1288 1302
1289 ret = ocfs2_inode_lock(inode, NULL, 0); 1303 ret = ocfs2_inode_lock(inode, NULL, 0);
@@ -1293,7 +1307,7 @@ int ocfs2_permission(struct inode *inode, int mask, unsigned int flags)
1293 goto out; 1307 goto out;
1294 } 1308 }
1295 1309
1296 ret = generic_permission(inode, mask, flags, ocfs2_check_acl); 1310 ret = generic_permission(inode, mask);
1297 1311
1298 ocfs2_inode_unlock(inode, 0); 1312 ocfs2_inode_unlock(inode, 0);
1299out: 1313out:
@@ -2236,9 +2250,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
2236 ocfs2_iocb_clear_sem_locked(iocb); 2250 ocfs2_iocb_clear_sem_locked(iocb);
2237 2251
2238relock: 2252relock:
2239 /* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */ 2253 /* to match setattr's i_mutex -> rw_lock ordering */
2240 if (direct_io) { 2254 if (direct_io) {
2241 down_read(&inode->i_alloc_sem);
2242 have_alloc_sem = 1; 2255 have_alloc_sem = 1;
2243 /* communicate with ocfs2_dio_end_io */ 2256 /* communicate with ocfs2_dio_end_io */
2244 ocfs2_iocb_set_sem_locked(iocb); 2257 ocfs2_iocb_set_sem_locked(iocb);
@@ -2290,7 +2303,6 @@ relock:
2290 */ 2303 */
2291 if (direct_io && !can_do_direct) { 2304 if (direct_io && !can_do_direct) {
2292 ocfs2_rw_unlock(inode, rw_level); 2305 ocfs2_rw_unlock(inode, rw_level);
2293 up_read(&inode->i_alloc_sem);
2294 2306
2295 have_alloc_sem = 0; 2307 have_alloc_sem = 0;
2296 rw_level = -1; 2308 rw_level = -1;
@@ -2361,8 +2373,7 @@ out_dio:
2361 /* 2373 /*
2362 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io 2374 * deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
2363 * function pointer which is called when o_direct io completes so that 2375 * function pointer which is called when o_direct io completes so that
2364 * it can unlock our rw lock. (it's the clustered equivalent of 2376 * it can unlock our rw lock.
2365 * i_alloc_sem; protects truncate from racing with pending ios).
2366 * Unfortunately there are error cases which call end_io and others 2377 * Unfortunately there are error cases which call end_io and others
2367 * that don't. so we don't have to unlock the rw_lock if either an 2378 * that don't. so we don't have to unlock the rw_lock if either an
2368 * async dio is going to do it in the future or an end_io after an 2379 * async dio is going to do it in the future or an end_io after an
@@ -2378,10 +2389,8 @@ out:
2378 ocfs2_rw_unlock(inode, rw_level); 2389 ocfs2_rw_unlock(inode, rw_level);
2379 2390
2380out_sems: 2391out_sems:
2381 if (have_alloc_sem) { 2392 if (have_alloc_sem)
2382 up_read(&inode->i_alloc_sem);
2383 ocfs2_iocb_clear_sem_locked(iocb); 2393 ocfs2_iocb_clear_sem_locked(iocb);
2384 }
2385 2394
2386 mutex_unlock(&inode->i_mutex); 2395 mutex_unlock(&inode->i_mutex);
2387 2396
@@ -2531,7 +2540,6 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2531 * need locks to protect pending reads from racing with truncate. 2540 * need locks to protect pending reads from racing with truncate.
2532 */ 2541 */
2533 if (filp->f_flags & O_DIRECT) { 2542 if (filp->f_flags & O_DIRECT) {
2534 down_read(&inode->i_alloc_sem);
2535 have_alloc_sem = 1; 2543 have_alloc_sem = 1;
2536 ocfs2_iocb_set_sem_locked(iocb); 2544 ocfs2_iocb_set_sem_locked(iocb);
2537 2545
@@ -2574,10 +2582,9 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
2574 } 2582 }
2575 2583
2576bail: 2584bail:
2577 if (have_alloc_sem) { 2585 if (have_alloc_sem)
2578 up_read(&inode->i_alloc_sem);
2579 ocfs2_iocb_clear_sem_locked(iocb); 2586 ocfs2_iocb_clear_sem_locked(iocb);
2580 } 2587
2581 if (rw_level != -1) 2588 if (rw_level != -1)
2582 ocfs2_rw_unlock(inode, rw_level); 2589 ocfs2_rw_unlock(inode, rw_level);
2583 2590
@@ -2593,12 +2600,14 @@ const struct inode_operations ocfs2_file_iops = {
2593 .listxattr = ocfs2_listxattr, 2600 .listxattr = ocfs2_listxattr,
2594 .removexattr = generic_removexattr, 2601 .removexattr = generic_removexattr,
2595 .fiemap = ocfs2_fiemap, 2602 .fiemap = ocfs2_fiemap,
2603 .check_acl = ocfs2_check_acl,
2596}; 2604};
2597 2605
2598const struct inode_operations ocfs2_special_file_iops = { 2606const struct inode_operations ocfs2_special_file_iops = {
2599 .setattr = ocfs2_setattr, 2607 .setattr = ocfs2_setattr,
2600 .getattr = ocfs2_getattr, 2608 .getattr = ocfs2_getattr,
2601 .permission = ocfs2_permission, 2609 .permission = ocfs2_permission,
2610 .check_acl = ocfs2_check_acl,
2602}; 2611};
2603 2612
2604/* 2613/*
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index f5afbbef6703..97bf761c9e7c 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -61,7 +61,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
61int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 61int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
62int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, 62int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
63 struct kstat *stat); 63 struct kstat *stat);
64int ocfs2_permission(struct inode *inode, int mask, unsigned int flags); 64int ocfs2_permission(struct inode *inode, int mask);
65 65
66int ocfs2_should_update_atime(struct inode *inode, 66int ocfs2_should_update_atime(struct inode *inode,
67 struct vfsmount *vfsmnt); 67 struct vfsmount *vfsmnt);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index e5d738cd9cc0..33889dc52dd7 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2498,4 +2498,5 @@ const struct inode_operations ocfs2_dir_iops = {
2498 .listxattr = ocfs2_listxattr, 2498 .listxattr = ocfs2_listxattr,
2499 .removexattr = generic_removexattr, 2499 .removexattr = generic_removexattr,
2500 .fiemap = ocfs2_fiemap, 2500 .fiemap = ocfs2_fiemap,
2501 .check_acl = ocfs2_check_acl,
2501}; 2502};
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ebfd3825f12a..cf7823382664 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4368,25 +4368,6 @@ static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
4368 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 4368 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
4369} 4369}
4370 4370
4371/* copied from user_path_parent. */
4372static int ocfs2_user_path_parent(const char __user *path,
4373 struct nameidata *nd, char **name)
4374{
4375 char *s = getname(path);
4376 int error;
4377
4378 if (IS_ERR(s))
4379 return PTR_ERR(s);
4380
4381 error = kern_path_parent(s, nd);
4382 if (error)
4383 putname(s);
4384 else
4385 *name = s;
4386
4387 return error;
4388}
4389
4390/** 4371/**
4391 * ocfs2_vfs_reflink - Create a reference-counted link 4372 * ocfs2_vfs_reflink - Create a reference-counted link
4392 * 4373 *
@@ -4460,10 +4441,8 @@ int ocfs2_reflink_ioctl(struct inode *inode,
4460 bool preserve) 4441 bool preserve)
4461{ 4442{
4462 struct dentry *new_dentry; 4443 struct dentry *new_dentry;
4463 struct nameidata nd; 4444 struct path old_path, new_path;
4464 struct path old_path;
4465 int error; 4445 int error;
4466 char *to = NULL;
4467 4446
4468 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) 4447 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
4469 return -EOPNOTSUPP; 4448 return -EOPNOTSUPP;
@@ -4474,39 +4453,33 @@ int ocfs2_reflink_ioctl(struct inode *inode,
4474 return error; 4453 return error;
4475 } 4454 }
4476 4455
4477 error = ocfs2_user_path_parent(newname, &nd, &to); 4456 new_dentry = user_path_create(AT_FDCWD, newname, &new_path, 0);
4478 if (error) { 4457 error = PTR_ERR(new_dentry);
4458 if (IS_ERR(new_dentry)) {
4479 mlog_errno(error); 4459 mlog_errno(error);
4480 goto out; 4460 goto out;
4481 } 4461 }
4482 4462
4483 error = -EXDEV; 4463 error = -EXDEV;
4484 if (old_path.mnt != nd.path.mnt) 4464 if (old_path.mnt != new_path.mnt) {
4485 goto out_release;
4486 new_dentry = lookup_create(&nd, 0);
4487 error = PTR_ERR(new_dentry);
4488 if (IS_ERR(new_dentry)) {
4489 mlog_errno(error); 4465 mlog_errno(error);
4490 goto out_unlock; 4466 goto out_dput;
4491 } 4467 }
4492 4468
4493 error = mnt_want_write(nd.path.mnt); 4469 error = mnt_want_write(new_path.mnt);
4494 if (error) { 4470 if (error) {
4495 mlog_errno(error); 4471 mlog_errno(error);
4496 goto out_dput; 4472 goto out_dput;
4497 } 4473 }
4498 4474
4499 error = ocfs2_vfs_reflink(old_path.dentry, 4475 error = ocfs2_vfs_reflink(old_path.dentry,
4500 nd.path.dentry->d_inode, 4476 new_path.dentry->d_inode,
4501 new_dentry, preserve); 4477 new_dentry, preserve);
4502 mnt_drop_write(nd.path.mnt); 4478 mnt_drop_write(new_path.mnt);
4503out_dput: 4479out_dput:
4504 dput(new_dentry); 4480 dput(new_dentry);
4505out_unlock: 4481 mutex_unlock(&new_path.dentry->d_inode->i_mutex);
4506 mutex_unlock(&nd.path.dentry->d_inode->i_mutex); 4482 path_put(&new_path);
4507out_release:
4508 path_put(&nd.path);
4509 putname(to);
4510out: 4483out:
4511 path_put(&old_path); 4484 path_put(&old_path);
4512 4485
diff --git a/fs/open.c b/fs/open.c
index b52cf013ffa1..739b751aa73e 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -793,7 +793,7 @@ out:
793 return nd->intent.open.file; 793 return nd->intent.open.file;
794out_err: 794out_err:
795 release_open_intent(nd); 795 release_open_intent(nd);
796 nd->intent.open.file = (struct file *)dentry; 796 nd->intent.open.file = ERR_CAST(dentry);
797 goto out; 797 goto out;
798} 798}
799EXPORT_SYMBOL_GPL(lookup_instantiate_filp); 799EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 9b45ee84fbcc..3a1dafd228d1 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -172,7 +172,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
172 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; 172 task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0;
173 tpid = 0; 173 tpid = 0;
174 if (pid_alive(p)) { 174 if (pid_alive(p)) {
175 struct task_struct *tracer = tracehook_tracer_task(p); 175 struct task_struct *tracer = ptrace_parent(p);
176 if (tracer) 176 if (tracer)
177 tpid = task_pid_nr_ns(tracer, ns); 177 tpid = task_pid_nr_ns(tracer, ns);
178 } 178 }
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fc5bc2767692..91fb655a5cbf 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -216,7 +216,7 @@ static struct mm_struct *__check_mem_permission(struct task_struct *task)
216 if (task_is_stopped_or_traced(task)) { 216 if (task_is_stopped_or_traced(task)) {
217 int match; 217 int match;
218 rcu_read_lock(); 218 rcu_read_lock();
219 match = (tracehook_tracer_task(task) == current); 219 match = (ptrace_parent(task) == current);
220 rcu_read_unlock(); 220 rcu_read_unlock();
221 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) 221 if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH))
222 return mm; 222 return mm;
@@ -673,7 +673,7 @@ static int mounts_open_common(struct inode *inode, struct file *file,
673 p->m.private = p; 673 p->m.private = p;
674 p->ns = ns; 674 p->ns = ns;
675 p->root = root; 675 p->root = root;
676 p->event = ns->event; 676 p->m.poll_event = ns->event;
677 677
678 return 0; 678 return 0;
679 679
@@ -2167,9 +2167,9 @@ static const struct file_operations proc_fd_operations = {
2167 * /proc/pid/fd needs a special permission handler so that a process can still 2167 * /proc/pid/fd needs a special permission handler so that a process can still
2168 * access /proc/self/fd after it has executed a setuid(). 2168 * access /proc/self/fd after it has executed a setuid().
2169 */ 2169 */
2170static int proc_fd_permission(struct inode *inode, int mask, unsigned int flags) 2170static int proc_fd_permission(struct inode *inode, int mask)
2171{ 2171{
2172 int rv = generic_permission(inode, mask, flags, NULL); 2172 int rv = generic_permission(inode, mask);
2173 if (rv == 0) 2173 if (rv == 0)
2174 return 0; 2174 return 0;
2175 if (task_pid(current) == proc_pid(inode)) 2175 if (task_pid(current) == proc_pid(inode))
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index d167de365a8d..1a77dbef226f 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -294,7 +294,7 @@ out:
294 return ret; 294 return ret;
295} 295}
296 296
297static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags) 297static int proc_sys_permission(struct inode *inode, int mask)
298{ 298{
299 /* 299 /*
300 * sysctl entries that are not writeable, 300 * sysctl entries that are not writeable,
@@ -316,7 +316,7 @@ static int proc_sys_permission(struct inode *inode, int mask,unsigned int flags)
316 if (!table) /* global root - r-xr-xr-x */ 316 if (!table) /* global root - r-xr-xr-x */
317 error = mask & MAY_WRITE ? -EACCES : 0; 317 error = mask & MAY_WRITE ? -EACCES : 0;
318 else /* Use the permissions on the sysctl table entry */ 318 else /* Use the permissions on the sysctl table entry */
319 error = sysctl_perm(head->root, table, mask); 319 error = sysctl_perm(head->root, table, mask & ~MAY_NOT_BLOCK);
320 320
321 sysctl_head_finish(head); 321 sysctl_head_finish(head);
322 return error; 322 return error;
diff --git a/fs/read_write.c b/fs/read_write.c
index 5520f8ad5504..5907b49e4d7e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -64,6 +64,23 @@ generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
64 return file->f_pos; 64 return file->f_pos;
65 offset += file->f_pos; 65 offset += file->f_pos;
66 break; 66 break;
67 case SEEK_DATA:
68 /*
69 * In the generic case the entire file is data, so as long as
70 * offset isn't at the end of the file then the offset is data.
71 */
72 if (offset >= inode->i_size)
73 return -ENXIO;
74 break;
75 case SEEK_HOLE:
76 /*
77 * There is a virtual hole at the end of the file, so as long as
78 * offset isn't i_size or larger, return i_size.
79 */
80 if (offset >= inode->i_size)
81 return -ENXIO;
82 offset = inode->i_size;
83 break;
67 } 84 }
68 85
69 if (offset < 0 && !unsigned_offsets(file)) 86 if (offset < 0 && !unsigned_offsets(file))
@@ -128,12 +145,13 @@ EXPORT_SYMBOL(no_llseek);
128 145
129loff_t default_llseek(struct file *file, loff_t offset, int origin) 146loff_t default_llseek(struct file *file, loff_t offset, int origin)
130{ 147{
148 struct inode *inode = file->f_path.dentry->d_inode;
131 loff_t retval; 149 loff_t retval;
132 150
133 mutex_lock(&file->f_dentry->d_inode->i_mutex); 151 mutex_lock(&inode->i_mutex);
134 switch (origin) { 152 switch (origin) {
135 case SEEK_END: 153 case SEEK_END:
136 offset += i_size_read(file->f_path.dentry->d_inode); 154 offset += i_size_read(inode);
137 break; 155 break;
138 case SEEK_CUR: 156 case SEEK_CUR:
139 if (offset == 0) { 157 if (offset == 0) {
@@ -141,6 +159,26 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
141 goto out; 159 goto out;
142 } 160 }
143 offset += file->f_pos; 161 offset += file->f_pos;
162 break;
163 case SEEK_DATA:
164 /*
165 * In the generic case the entire file is data, so as
166 * long as offset isn't at the end of the file then the
167 * offset is data.
168 */
169 if (offset >= inode->i_size)
170 return -ENXIO;
171 break;
172 case SEEK_HOLE:
173 /*
174 * There is a virtual hole at the end of the file, so
175 * as long as offset isn't i_size or larger, return
176 * i_size.
177 */
178 if (offset >= inode->i_size)
179 return -ENXIO;
180 offset = inode->i_size;
181 break;
144 } 182 }
145 retval = -EINVAL; 183 retval = -EINVAL;
146 if (offset >= 0 || unsigned_offsets(file)) { 184 if (offset >= 0 || unsigned_offsets(file)) {
@@ -151,7 +189,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
151 retval = offset; 189 retval = offset;
152 } 190 }
153out: 191out:
154 mutex_unlock(&file->f_dentry->d_inode->i_mutex); 192 mutex_unlock(&inode->i_mutex);
155 return retval; 193 return retval;
156} 194}
157EXPORT_SYMBOL(default_llseek); 195EXPORT_SYMBOL(default_llseek);
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 198dabf1b2bb..133e9355dc6f 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -14,7 +14,8 @@
14extern const struct reiserfs_key MIN_KEY; 14extern const struct reiserfs_key MIN_KEY;
15 15
16static int reiserfs_readdir(struct file *, void *, filldir_t); 16static int reiserfs_readdir(struct file *, void *, filldir_t);
17static int reiserfs_dir_fsync(struct file *filp, int datasync); 17static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
18 int datasync);
18 19
19const struct file_operations reiserfs_dir_operations = { 20const struct file_operations reiserfs_dir_operations = {
20 .llseek = generic_file_llseek, 21 .llseek = generic_file_llseek,
@@ -27,13 +28,21 @@ const struct file_operations reiserfs_dir_operations = {
27#endif 28#endif
28}; 29};
29 30
30static int reiserfs_dir_fsync(struct file *filp, int datasync) 31static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
32 int datasync)
31{ 33{
32 struct inode *inode = filp->f_mapping->host; 34 struct inode *inode = filp->f_mapping->host;
33 int err; 35 int err;
36
37 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
38 if (err)
39 return err;
40
41 mutex_lock(&inode->i_mutex);
34 reiserfs_write_lock(inode->i_sb); 42 reiserfs_write_lock(inode->i_sb);
35 err = reiserfs_commit_for_inode(inode); 43 err = reiserfs_commit_for_inode(inode);
36 reiserfs_write_unlock(inode->i_sb); 44 reiserfs_write_unlock(inode->i_sb);
45 mutex_unlock(&inode->i_mutex);
37 if (err < 0) 46 if (err < 0)
38 return err; 47 return err;
39 return 0; 48 return 0;
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 91f080cc76c8..c7156dc39ce7 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -140,12 +140,18 @@ static void reiserfs_vfs_truncate_file(struct inode *inode)
140 * be removed... 140 * be removed...
141 */ 141 */
142 142
143static int reiserfs_sync_file(struct file *filp, int datasync) 143static int reiserfs_sync_file(struct file *filp, loff_t start, loff_t end,
144 int datasync)
144{ 145{
145 struct inode *inode = filp->f_mapping->host; 146 struct inode *inode = filp->f_mapping->host;
146 int err; 147 int err;
147 int barrier_done; 148 int barrier_done;
148 149
150 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
151 if (err)
152 return err;
153
154 mutex_lock(&inode->i_mutex);
149 BUG_ON(!S_ISREG(inode->i_mode)); 155 BUG_ON(!S_ISREG(inode->i_mode));
150 err = sync_mapping_buffers(inode->i_mapping); 156 err = sync_mapping_buffers(inode->i_mapping);
151 reiserfs_write_lock(inode->i_sb); 157 reiserfs_write_lock(inode->i_sb);
@@ -153,6 +159,7 @@ static int reiserfs_sync_file(struct file *filp, int datasync)
153 reiserfs_write_unlock(inode->i_sb); 159 reiserfs_write_unlock(inode->i_sb);
154 if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) 160 if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb))
155 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 161 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
162 mutex_unlock(&inode->i_mutex);
156 if (barrier_done < 0) 163 if (barrier_done < 0)
157 return barrier_done; 164 return barrier_done;
158 return (err < 0) ? -EIO : 0; 165 return (err < 0) ? -EIO : 0;
@@ -312,4 +319,5 @@ const struct inode_operations reiserfs_file_inode_operations = {
312 .listxattr = reiserfs_listxattr, 319 .listxattr = reiserfs_listxattr,
313 .removexattr = reiserfs_removexattr, 320 .removexattr = reiserfs_removexattr,
314 .permission = reiserfs_permission, 321 .permission = reiserfs_permission,
322 .check_acl = reiserfs_check_acl,
315}; 323};
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 4fd5bb33dbb5..2922b90ceac1 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3068,9 +3068,8 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
3068 struct inode *inode = file->f_mapping->host; 3068 struct inode *inode = file->f_mapping->host;
3069 ssize_t ret; 3069 ssize_t ret;
3070 3070
3071 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 3071 ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
3072 offset, nr_segs, 3072 reiserfs_get_blocks_direct_io);
3073 reiserfs_get_blocks_direct_io, NULL);
3074 3073
3075 /* 3074 /*
3076 * In case of error extending write may have instantiated a few 3075 * In case of error extending write may have instantiated a few
@@ -3114,6 +3113,9 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
3114 error = -EFBIG; 3113 error = -EFBIG;
3115 goto out; 3114 goto out;
3116 } 3115 }
3116
3117 inode_dio_wait(inode);
3118
3117 /* fill in hole pointers in the expanding truncate case. */ 3119 /* fill in hole pointers in the expanding truncate case. */
3118 if (attr->ia_size > inode->i_size) { 3120 if (attr->ia_size > inode->i_size) {
3119 error = generic_cont_expand_simple(inode, attr->ia_size); 3121 error = generic_cont_expand_simple(inode, attr->ia_size);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 118662690cdf..551f1b79dbc4 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1529,6 +1529,7 @@ const struct inode_operations reiserfs_dir_inode_operations = {
1529 .listxattr = reiserfs_listxattr, 1529 .listxattr = reiserfs_listxattr,
1530 .removexattr = reiserfs_removexattr, 1530 .removexattr = reiserfs_removexattr,
1531 .permission = reiserfs_permission, 1531 .permission = reiserfs_permission,
1532 .check_acl = reiserfs_check_acl,
1532}; 1533};
1533 1534
1534/* 1535/*
@@ -1545,6 +1546,7 @@ const struct inode_operations reiserfs_symlink_inode_operations = {
1545 .listxattr = reiserfs_listxattr, 1546 .listxattr = reiserfs_listxattr,
1546 .removexattr = reiserfs_removexattr, 1547 .removexattr = reiserfs_removexattr,
1547 .permission = reiserfs_permission, 1548 .permission = reiserfs_permission,
1549 .check_acl = reiserfs_check_acl,
1548 1550
1549}; 1551};
1550 1552
@@ -1558,5 +1560,5 @@ const struct inode_operations reiserfs_special_inode_operations = {
1558 .listxattr = reiserfs_listxattr, 1560 .listxattr = reiserfs_listxattr,
1559 .removexattr = reiserfs_removexattr, 1561 .removexattr = reiserfs_removexattr,
1560 .permission = reiserfs_permission, 1562 .permission = reiserfs_permission,
1561 1563 .check_acl = reiserfs_check_acl,
1562}; 1564};
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index aa91089162cb..14363b96b6af 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -1643,6 +1643,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent)
1643 /* Set default values for options: non-aggressive tails, RO on errors */ 1643 /* Set default values for options: non-aggressive tails, RO on errors */
1644 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); 1644 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL);
1645 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO); 1645 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO);
1646 REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH);
1646 /* no preallocation minimum, be smart in 1647 /* no preallocation minimum, be smart in
1647 reiserfs_file_write instead */ 1648 reiserfs_file_write instead */
1648 REISERFS_SB(s)->s_alloc_options.preallocmin = 0; 1649 REISERFS_SB(s)->s_alloc_options.preallocmin = 0;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index d78089690965..6938d8c68d6e 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -555,11 +555,10 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
555 555
556 reiserfs_write_unlock(inode->i_sb); 556 reiserfs_write_unlock(inode->i_sb);
557 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR); 557 mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
558 down_write(&dentry->d_inode->i_alloc_sem); 558 inode_dio_wait(dentry->d_inode);
559 reiserfs_write_lock(inode->i_sb); 559 reiserfs_write_lock(inode->i_sb);
560 560
561 err = reiserfs_setattr(dentry, &newattrs); 561 err = reiserfs_setattr(dentry, &newattrs);
562 up_write(&dentry->d_inode->i_alloc_sem);
563 mutex_unlock(&dentry->d_inode->i_mutex); 562 mutex_unlock(&dentry->d_inode->i_mutex);
564 } else 563 } else
565 update_ctime(inode); 564 update_ctime(inode);
@@ -868,12 +867,18 @@ out:
868 return err; 867 return err;
869} 868}
870 869
871static int reiserfs_check_acl(struct inode *inode, int mask, unsigned int flags) 870int reiserfs_check_acl(struct inode *inode, int mask)
872{ 871{
873 struct posix_acl *acl; 872 struct posix_acl *acl;
874 int error = -EAGAIN; /* do regular unix permission checks by default */ 873 int error = -EAGAIN; /* do regular unix permission checks by default */
875 874
876 if (flags & IPERM_FLAG_RCU) 875 /*
876 * Stat data v1 doesn't support ACLs.
877 */
878 if (get_inode_sd_version(inode) == STAT_DATA_V1)
879 return -EAGAIN;
880
881 if (mask & MAY_NOT_BLOCK)
877 return -ECHILD; 882 return -ECHILD;
878 883
879 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); 884 acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
@@ -952,7 +957,7 @@ static int xattr_mount_check(struct super_block *s)
952 return 0; 957 return 0;
953} 958}
954 959
955int reiserfs_permission(struct inode *inode, int mask, unsigned int flags) 960int reiserfs_permission(struct inode *inode, int mask)
956{ 961{
957 /* 962 /*
958 * We don't do permission checks on the internal objects. 963 * We don't do permission checks on the internal objects.
@@ -961,15 +966,7 @@ int reiserfs_permission(struct inode *inode, int mask, unsigned int flags)
961 if (IS_PRIVATE(inode)) 966 if (IS_PRIVATE(inode))
962 return 0; 967 return 0;
963 968
964#ifdef CONFIG_REISERFS_FS_XATTR 969 return generic_permission(inode, mask);
965 /*
966 * Stat data v1 doesn't support ACLs.
967 */
968 if (get_inode_sd_version(inode) != STAT_DATA_V1)
969 return generic_permission(inode, mask, flags,
970 reiserfs_check_acl);
971#endif
972 return generic_permission(inode, mask, flags, NULL);
973} 970}
974 971
975static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) 972static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 4bc63ac64bc0..0682b38d7e31 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -220,11 +220,6 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
220 blk, off, ino_num); 220 blk, off, ino_num);
221 221
222 inode = squashfs_iget(dir->i_sb, ino, ino_num); 222 inode = squashfs_iget(dir->i_sb, ino, ino_num);
223 if (IS_ERR(inode)) {
224 err = PTR_ERR(inode);
225 goto failed;
226 }
227
228 goto exit_lookup; 223 goto exit_lookup;
229 } 224 }
230 } 225 }
@@ -232,10 +227,7 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
232 227
233exit_lookup: 228exit_lookup:
234 kfree(dire); 229 kfree(dire);
235 if (inode) 230 return d_splice_alias(inode, dentry);
236 return d_splice_alias(inode, dentry);
237 d_add(dentry, inode);
238 return ERR_PTR(0);
239 231
240data_error: 232data_error:
241 err = -EIO; 233 err = -EIO;
diff --git a/fs/super.c b/fs/super.c
index ab3d672db0de..7943f04cb3a9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -38,6 +38,69 @@
38LIST_HEAD(super_blocks); 38LIST_HEAD(super_blocks);
39DEFINE_SPINLOCK(sb_lock); 39DEFINE_SPINLOCK(sb_lock);
40 40
41/*
42 * One thing we have to be careful of with a per-sb shrinker is that we don't
43 * drop the last active reference to the superblock from within the shrinker.
44 * If that happens we could trigger unregistering the shrinker from within the
45 * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
46 * take a passive reference to the superblock to avoid this from occurring.
47 */
48static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
49{
50 struct super_block *sb;
51 int fs_objects = 0;
52 int total_objects;
53
54 sb = container_of(shrink, struct super_block, s_shrink);
55
56 /*
57 * Deadlock avoidance. We may hold various FS locks, and we don't want
58 * to recurse into the FS that called us in clear_inode() and friends..
59 */
60 if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS))
61 return -1;
62
63 if (!grab_super_passive(sb))
64 return -1;
65
66 if (sb->s_op && sb->s_op->nr_cached_objects)
67 fs_objects = sb->s_op->nr_cached_objects(sb);
68
69 total_objects = sb->s_nr_dentry_unused +
70 sb->s_nr_inodes_unused + fs_objects + 1;
71
72 if (sc->nr_to_scan) {
73 int dentries;
74 int inodes;
75
76 /* proportion the scan between the caches */
77 dentries = (sc->nr_to_scan * sb->s_nr_dentry_unused) /
78 total_objects;
79 inodes = (sc->nr_to_scan * sb->s_nr_inodes_unused) /
80 total_objects;
81 if (fs_objects)
82 fs_objects = (sc->nr_to_scan * fs_objects) /
83 total_objects;
84 /*
85 * prune the dcache first as the icache is pinned by it, then
86 * prune the icache, followed by the filesystem specific caches
87 */
88 prune_dcache_sb(sb, dentries);
89 prune_icache_sb(sb, inodes);
90
91 if (fs_objects && sb->s_op->free_cached_objects) {
92 sb->s_op->free_cached_objects(sb, fs_objects);
93 fs_objects = sb->s_op->nr_cached_objects(sb);
94 }
95 total_objects = sb->s_nr_dentry_unused +
96 sb->s_nr_inodes_unused + fs_objects;
97 }
98
99 total_objects = (total_objects / 100) * sysctl_vfs_cache_pressure;
100 drop_super(sb);
101 return total_objects;
102}
103
41/** 104/**
42 * alloc_super - create new superblock 105 * alloc_super - create new superblock
43 * @type: filesystem type superblock should belong to 106 * @type: filesystem type superblock should belong to
@@ -77,6 +140,8 @@ static struct super_block *alloc_super(struct file_system_type *type)
77 INIT_HLIST_BL_HEAD(&s->s_anon); 140 INIT_HLIST_BL_HEAD(&s->s_anon);
78 INIT_LIST_HEAD(&s->s_inodes); 141 INIT_LIST_HEAD(&s->s_inodes);
79 INIT_LIST_HEAD(&s->s_dentry_lru); 142 INIT_LIST_HEAD(&s->s_dentry_lru);
143 INIT_LIST_HEAD(&s->s_inode_lru);
144 spin_lock_init(&s->s_inode_lru_lock);
80 init_rwsem(&s->s_umount); 145 init_rwsem(&s->s_umount);
81 mutex_init(&s->s_lock); 146 mutex_init(&s->s_lock);
82 lockdep_set_class(&s->s_umount, &type->s_umount_key); 147 lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -114,6 +179,10 @@ static struct super_block *alloc_super(struct file_system_type *type)
114 s->s_op = &default_op; 179 s->s_op = &default_op;
115 s->s_time_gran = 1000000000; 180 s->s_time_gran = 1000000000;
116 s->cleancache_poolid = -1; 181 s->cleancache_poolid = -1;
182
183 s->s_shrink.seeks = DEFAULT_SEEKS;
184 s->s_shrink.shrink = prune_super;
185 s->s_shrink.batch = 1024;
117 } 186 }
118out: 187out:
119 return s; 188 return s;
@@ -181,6 +250,10 @@ void deactivate_locked_super(struct super_block *s)
181 if (atomic_dec_and_test(&s->s_active)) { 250 if (atomic_dec_and_test(&s->s_active)) {
182 cleancache_flush_fs(s); 251 cleancache_flush_fs(s);
183 fs->kill_sb(s); 252 fs->kill_sb(s);
253
254 /* caches are now gone, we can safely kill the shrinker now */
255 unregister_shrinker(&s->s_shrink);
256
184 /* 257 /*
185 * We need to call rcu_barrier so all the delayed rcu free 258 * We need to call rcu_barrier so all the delayed rcu free
186 * inodes are flushed before we release the fs module. 259 * inodes are flushed before we release the fs module.
@@ -241,6 +314,39 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
241} 314}
242 315
243/* 316/*
317 * grab_super_passive - acquire a passive reference
318 * @s: reference we are trying to grab
319 *
320 * Tries to acquire a passive reference. This is used in places where we
321 * cannot take an active reference but we need to ensure that the
322 * superblock does not go away while we are working on it. It returns
323 * false if a reference was not gained, and returns true with the s_umount
324 * lock held in read mode if a reference is gained. On successful return,
325 * the caller must drop the s_umount lock and the passive reference when
326 * done.
327 */
328bool grab_super_passive(struct super_block *sb)
329{
330 spin_lock(&sb_lock);
331 if (list_empty(&sb->s_instances)) {
332 spin_unlock(&sb_lock);
333 return false;
334 }
335
336 sb->s_count++;
337 spin_unlock(&sb_lock);
338
339 if (down_read_trylock(&sb->s_umount)) {
340 if (sb->s_root)
341 return true;
342 up_read(&sb->s_umount);
343 }
344
345 put_super(sb);
346 return false;
347}
348
349/*
244 * Superblock locking. We really ought to get rid of these two. 350 * Superblock locking. We really ought to get rid of these two.
245 */ 351 */
246void lock_super(struct super_block * sb) 352void lock_super(struct super_block * sb)
@@ -276,7 +382,6 @@ void generic_shutdown_super(struct super_block *sb)
276{ 382{
277 const struct super_operations *sop = sb->s_op; 383 const struct super_operations *sop = sb->s_op;
278 384
279
280 if (sb->s_root) { 385 if (sb->s_root) {
281 shrink_dcache_for_umount(sb); 386 shrink_dcache_for_umount(sb);
282 sync_filesystem(sb); 387 sync_filesystem(sb);
@@ -364,6 +469,7 @@ retry:
364 list_add(&s->s_instances, &type->fs_supers); 469 list_add(&s->s_instances, &type->fs_supers);
365 spin_unlock(&sb_lock); 470 spin_unlock(&sb_lock);
366 get_filesystem(type); 471 get_filesystem(type);
472 register_shrinker(&s->s_shrink);
367 return s; 473 return s;
368} 474}
369 475
@@ -452,6 +558,42 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
452} 558}
453 559
454/** 560/**
561 * iterate_supers_type - call function for superblocks of given type
562 * @type: fs type
563 * @f: function to call
564 * @arg: argument to pass to it
565 *
566 * Scans the superblock list and calls given function, passing it
567 * locked superblock and given argument.
568 */
569void iterate_supers_type(struct file_system_type *type,
570 void (*f)(struct super_block *, void *), void *arg)
571{
572 struct super_block *sb, *p = NULL;
573
574 spin_lock(&sb_lock);
575 list_for_each_entry(sb, &type->fs_supers, s_instances) {
576 sb->s_count++;
577 spin_unlock(&sb_lock);
578
579 down_read(&sb->s_umount);
580 if (sb->s_root)
581 f(sb, arg);
582 up_read(&sb->s_umount);
583
584 spin_lock(&sb_lock);
585 if (p)
586 __put_super(p);
587 p = sb;
588 }
589 if (p)
590 __put_super(p);
591 spin_unlock(&sb_lock);
592}
593
594EXPORT_SYMBOL(iterate_supers_type);
595
596/**
455 * get_super - get the superblock of a device 597 * get_super - get the superblock of a device
456 * @bdev: device to get the superblock for 598 * @bdev: device to get the superblock for
457 * 599 *
@@ -657,7 +799,7 @@ static DEFINE_IDA(unnamed_dev_ida);
657static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ 799static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
658static int unnamed_dev_start = 0; /* don't bother trying below it */ 800static int unnamed_dev_start = 0; /* don't bother trying below it */
659 801
660int set_anon_super(struct super_block *s, void *data) 802int get_anon_bdev(dev_t *p)
661{ 803{
662 int dev; 804 int dev;
663 int error; 805 int error;
@@ -684,24 +826,38 @@ int set_anon_super(struct super_block *s, void *data)
684 spin_unlock(&unnamed_dev_lock); 826 spin_unlock(&unnamed_dev_lock);
685 return -EMFILE; 827 return -EMFILE;
686 } 828 }
687 s->s_dev = MKDEV(0, dev & MINORMASK); 829 *p = MKDEV(0, dev & MINORMASK);
688 s->s_bdi = &noop_backing_dev_info;
689 return 0; 830 return 0;
690} 831}
832EXPORT_SYMBOL(get_anon_bdev);
691 833
692EXPORT_SYMBOL(set_anon_super); 834void free_anon_bdev(dev_t dev)
693
694void kill_anon_super(struct super_block *sb)
695{ 835{
696 int slot = MINOR(sb->s_dev); 836 int slot = MINOR(dev);
697
698 generic_shutdown_super(sb);
699 spin_lock(&unnamed_dev_lock); 837 spin_lock(&unnamed_dev_lock);
700 ida_remove(&unnamed_dev_ida, slot); 838 ida_remove(&unnamed_dev_ida, slot);
701 if (slot < unnamed_dev_start) 839 if (slot < unnamed_dev_start)
702 unnamed_dev_start = slot; 840 unnamed_dev_start = slot;
703 spin_unlock(&unnamed_dev_lock); 841 spin_unlock(&unnamed_dev_lock);
704} 842}
843EXPORT_SYMBOL(free_anon_bdev);
844
845int set_anon_super(struct super_block *s, void *data)
846{
847 int error = get_anon_bdev(&s->s_dev);
848 if (!error)
849 s->s_bdi = &noop_backing_dev_info;
850 return error;
851}
852
853EXPORT_SYMBOL(set_anon_super);
854
855void kill_anon_super(struct super_block *sb)
856{
857 dev_t dev = sb->s_dev;
858 generic_shutdown_super(sb);
859 free_anon_bdev(dev);
860}
705 861
706EXPORT_SYMBOL(kill_anon_super); 862EXPORT_SYMBOL(kill_anon_super);
707 863
diff --git a/fs/sync.c b/fs/sync.c
index c38ec163da6c..c98a7477edfd 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -165,28 +165,9 @@ SYSCALL_DEFINE1(syncfs, int, fd)
165 */ 165 */
166int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) 166int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
167{ 167{
168 struct address_space *mapping = file->f_mapping; 168 if (!file->f_op || !file->f_op->fsync)
169 int err, ret; 169 return -EINVAL;
170 170 return file->f_op->fsync(file, start, end, datasync);
171 if (!file->f_op || !file->f_op->fsync) {
172 ret = -EINVAL;
173 goto out;
174 }
175
176 ret = filemap_write_and_wait_range(mapping, start, end);
177
178 /*
179 * We need to protect against concurrent writers, which could cause
180 * livelocks in fsync_buffers_list().
181 */
182 mutex_lock(&mapping->host->i_mutex);
183 err = file->f_op->fsync(file, datasync);
184 if (!ret)
185 ret = err;
186 mutex_unlock(&mapping->host->i_mutex);
187
188out:
189 return ret;
190} 171}
191EXPORT_SYMBOL(vfs_fsync_range); 172EXPORT_SYMBOL(vfs_fsync_range);
192 173
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 0a12eb89cd32..e3f091a81c72 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -349,11 +349,11 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const cha
349 return -ENOENT; 349 return -ENOENT;
350} 350}
351 351
352int sysfs_permission(struct inode *inode, int mask, unsigned int flags) 352int sysfs_permission(struct inode *inode, int mask)
353{ 353{
354 struct sysfs_dirent *sd; 354 struct sysfs_dirent *sd;
355 355
356 if (flags & IPERM_FLAG_RCU) 356 if (mask & MAY_NOT_BLOCK)
357 return -ECHILD; 357 return -ECHILD;
358 358
359 sd = inode->i_private; 359 sd = inode->i_private;
@@ -362,5 +362,5 @@ int sysfs_permission(struct inode *inode, int mask, unsigned int flags)
362 sysfs_refresh_inode(sd, inode); 362 sysfs_refresh_inode(sd, inode);
363 mutex_unlock(&sysfs_mutex); 363 mutex_unlock(&sysfs_mutex);
364 364
365 return generic_permission(inode, mask, flags, NULL); 365 return generic_permission(inode, mask);
366} 366}
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 2ed2404f3113..845ab3ad229d 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -201,7 +201,7 @@ static inline void __sysfs_put(struct sysfs_dirent *sd)
201struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd); 201struct inode *sysfs_get_inode(struct super_block *sb, struct sysfs_dirent *sd);
202void sysfs_evict_inode(struct inode *inode); 202void sysfs_evict_inode(struct inode *inode);
203int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr); 203int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr *iattr);
204int sysfs_permission(struct inode *inode, int mask, unsigned int flags); 204int sysfs_permission(struct inode *inode, int mask);
205int sysfs_setattr(struct dentry *dentry, struct iattr *iattr); 205int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
206int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); 206int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
207int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, 207int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 87cd0ead8633..fb3b5c813a30 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -78,7 +78,7 @@ static int nothing_to_commit(struct ubifs_info *c)
78 * If the root TNC node is dirty, we definitely have something to 78 * If the root TNC node is dirty, we definitely have something to
79 * commit. 79 * commit.
80 */ 80 */
81 if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags)) 81 if (c->zroot.znode && ubifs_zn_dirty(c->zroot.znode))
82 return 0; 82 return 0;
83 83
84 /* 84 /*
@@ -418,7 +418,7 @@ int ubifs_run_commit(struct ubifs_info *c)
418 418
419 spin_lock(&c->cs_lock); 419 spin_lock(&c->cs_lock);
420 if (c->cmt_state == COMMIT_BROKEN) { 420 if (c->cmt_state == COMMIT_BROKEN) {
421 err = -EINVAL; 421 err = -EROFS;
422 goto out; 422 goto out;
423 } 423 }
424 424
@@ -444,7 +444,7 @@ int ubifs_run_commit(struct ubifs_info *c)
444 * re-check it. 444 * re-check it.
445 */ 445 */
446 if (c->cmt_state == COMMIT_BROKEN) { 446 if (c->cmt_state == COMMIT_BROKEN) {
447 err = -EINVAL; 447 err = -EROFS;
448 goto out_cmt_unlock; 448 goto out_cmt_unlock;
449 } 449 }
450 450
@@ -576,7 +576,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
576 struct idx_node *i; 576 struct idx_node *i;
577 size_t sz; 577 size_t sz;
578 578
579 if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) 579 if (!dbg_is_chk_index(c))
580 return 0; 580 return 0;
581 581
582 INIT_LIST_HEAD(&list); 582 INIT_LIST_HEAD(&list);
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 0bb2bcef0de9..eef109a1a927 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -27,13 +27,12 @@
27 * various local functions of those subsystems. 27 * various local functions of those subsystems.
28 */ 28 */
29 29
30#define UBIFS_DBG_PRESERVE_UBI
31
32#include "ubifs.h"
33#include <linux/module.h> 30#include <linux/module.h>
34#include <linux/moduleparam.h>
35#include <linux/debugfs.h> 31#include <linux/debugfs.h>
36#include <linux/math64.h> 32#include <linux/math64.h>
33#include <linux/uaccess.h>
34#include <linux/random.h>
35#include "ubifs.h"
37 36
38#ifdef CONFIG_UBIFS_FS_DEBUG 37#ifdef CONFIG_UBIFS_FS_DEBUG
39 38
@@ -42,15 +41,6 @@ DEFINE_SPINLOCK(dbg_lock);
42static char dbg_key_buf0[128]; 41static char dbg_key_buf0[128];
43static char dbg_key_buf1[128]; 42static char dbg_key_buf1[128];
44 43
45unsigned int ubifs_chk_flags;
46unsigned int ubifs_tst_flags;
47
48module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
49module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
50
51MODULE_PARM_DESC(debug_chks, "Debug check flags");
52MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
53
54static const char *get_key_fmt(int fmt) 44static const char *get_key_fmt(int fmt)
55{ 45{
56 switch (fmt) { 46 switch (fmt) {
@@ -91,6 +81,28 @@ static const char *get_key_type(int type)
91 } 81 }
92} 82}
93 83
84static const char *get_dent_type(int type)
85{
86 switch (type) {
87 case UBIFS_ITYPE_REG:
88 return "file";
89 case UBIFS_ITYPE_DIR:
90 return "dir";
91 case UBIFS_ITYPE_LNK:
92 return "symlink";
93 case UBIFS_ITYPE_BLK:
94 return "blkdev";
95 case UBIFS_ITYPE_CHR:
96 return "char dev";
97 case UBIFS_ITYPE_FIFO:
98 return "fifo";
99 case UBIFS_ITYPE_SOCK:
100 return "socket";
101 default:
102 return "unknown/invalid type";
103 }
104}
105
94static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key, 106static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key,
95 char *buffer) 107 char *buffer)
96{ 108{
@@ -234,9 +246,13 @@ static void dump_ch(const struct ubifs_ch *ch)
234 printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len)); 246 printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len));
235} 247}
236 248
237void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) 249void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode)
238{ 250{
239 const struct ubifs_inode *ui = ubifs_inode(inode); 251 const struct ubifs_inode *ui = ubifs_inode(inode);
252 struct qstr nm = { .name = NULL };
253 union ubifs_key key;
254 struct ubifs_dent_node *dent, *pdent = NULL;
255 int count = 2;
240 256
241 printk(KERN_DEBUG "Dump in-memory inode:"); 257 printk(KERN_DEBUG "Dump in-memory inode:");
242 printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); 258 printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino);
@@ -270,6 +286,32 @@ void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode)
270 printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); 286 printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read);
271 printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); 287 printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row);
272 printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); 288 printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len);
289
290 if (!S_ISDIR(inode->i_mode))
291 return;
292
293 printk(KERN_DEBUG "List of directory entries:\n");
294 ubifs_assert(!mutex_is_locked(&c->tnc_mutex));
295
296 lowest_dent_key(c, &key, inode->i_ino);
297 while (1) {
298 dent = ubifs_tnc_next_ent(c, &key, &nm);
299 if (IS_ERR(dent)) {
300 if (PTR_ERR(dent) != -ENOENT)
301 printk(KERN_DEBUG "error %ld\n", PTR_ERR(dent));
302 break;
303 }
304
305 printk(KERN_DEBUG "\t%d: %s (%s)\n",
306 count++, dent->name, get_dent_type(dent->type));
307
308 nm.name = dent->name;
309 nm.len = le16_to_cpu(dent->nlen);
310 kfree(pdent);
311 pdent = dent;
312 key_read(c, &dent->key, &key);
313 }
314 kfree(pdent);
273} 315}
274 316
275void dbg_dump_node(const struct ubifs_info *c, const void *node) 317void dbg_dump_node(const struct ubifs_info *c, const void *node)
@@ -278,7 +320,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
278 union ubifs_key key; 320 union ubifs_key key;
279 const struct ubifs_ch *ch = node; 321 const struct ubifs_ch *ch = node;
280 322
281 if (dbg_failure_mode) 323 if (dbg_is_tst_rcvry(c))
282 return; 324 return;
283 325
284 /* If the magic is incorrect, just hexdump the first bytes */ 326 /* If the magic is incorrect, just hexdump the first bytes */
@@ -834,7 +876,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
834 struct ubifs_scan_node *snod; 876 struct ubifs_scan_node *snod;
835 void *buf; 877 void *buf;
836 878
837 if (dbg_failure_mode) 879 if (dbg_is_tst_rcvry(c))
838 return; 880 return;
839 881
840 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 882 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
@@ -1080,6 +1122,7 @@ out:
1080 1122
1081/** 1123/**
1082 * dbg_check_synced_i_size - check synchronized inode size. 1124 * dbg_check_synced_i_size - check synchronized inode size.
1125 * @c: UBIFS file-system description object
1083 * @inode: inode to check 1126 * @inode: inode to check
1084 * 1127 *
1085 * If inode is clean, synchronized inode size has to be equivalent to current 1128 * If inode is clean, synchronized inode size has to be equivalent to current
@@ -1087,12 +1130,12 @@ out:
1087 * has to be locked). Returns %0 if synchronized inode size if correct, and 1130 * has to be locked). Returns %0 if synchronized inode size if correct, and
1088 * %-EINVAL if not. 1131 * %-EINVAL if not.
1089 */ 1132 */
1090int dbg_check_synced_i_size(struct inode *inode) 1133int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode)
1091{ 1134{
1092 int err = 0; 1135 int err = 0;
1093 struct ubifs_inode *ui = ubifs_inode(inode); 1136 struct ubifs_inode *ui = ubifs_inode(inode);
1094 1137
1095 if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) 1138 if (!dbg_is_chk_gen(c))
1096 return 0; 1139 return 0;
1097 if (!S_ISREG(inode->i_mode)) 1140 if (!S_ISREG(inode->i_mode))
1098 return 0; 1141 return 0;
@@ -1125,7 +1168,7 @@ int dbg_check_synced_i_size(struct inode *inode)
1125 * Note, it is good idea to make sure the @dir->i_mutex is locked before 1168 * Note, it is good idea to make sure the @dir->i_mutex is locked before
1126 * calling this function. 1169 * calling this function.
1127 */ 1170 */
1128int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir) 1171int dbg_check_dir(struct ubifs_info *c, const struct inode *dir)
1129{ 1172{
1130 unsigned int nlink = 2; 1173 unsigned int nlink = 2;
1131 union ubifs_key key; 1174 union ubifs_key key;
@@ -1133,7 +1176,7 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir)
1133 struct qstr nm = { .name = NULL }; 1176 struct qstr nm = { .name = NULL };
1134 loff_t size = UBIFS_INO_NODE_SZ; 1177 loff_t size = UBIFS_INO_NODE_SZ;
1135 1178
1136 if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) 1179 if (!dbg_is_chk_gen(c))
1137 return 0; 1180 return 0;
1138 1181
1139 if (!S_ISDIR(dir->i_mode)) 1182 if (!S_ISDIR(dir->i_mode))
@@ -1167,12 +1210,14 @@ int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir)
1167 "but calculated size is %llu", dir->i_ino, 1210 "but calculated size is %llu", dir->i_ino,
1168 (unsigned long long)i_size_read(dir), 1211 (unsigned long long)i_size_read(dir),
1169 (unsigned long long)size); 1212 (unsigned long long)size);
1213 dbg_dump_inode(c, dir);
1170 dump_stack(); 1214 dump_stack();
1171 return -EINVAL; 1215 return -EINVAL;
1172 } 1216 }
1173 if (dir->i_nlink != nlink) { 1217 if (dir->i_nlink != nlink) {
1174 ubifs_err("directory inode %lu has nlink %u, but calculated " 1218 ubifs_err("directory inode %lu has nlink %u, but calculated "
1175 "nlink is %u", dir->i_ino, dir->i_nlink, nlink); 1219 "nlink is %u", dir->i_ino, dir->i_nlink, nlink);
1220 dbg_dump_inode(c, dir);
1176 dump_stack(); 1221 dump_stack();
1177 return -EINVAL; 1222 return -EINVAL;
1178 } 1223 }
@@ -1489,7 +1534,7 @@ int dbg_check_tnc(struct ubifs_info *c, int extra)
1489 long clean_cnt = 0, dirty_cnt = 0; 1534 long clean_cnt = 0, dirty_cnt = 0;
1490 int err, last; 1535 int err, last;
1491 1536
1492 if (!(ubifs_chk_flags & UBIFS_CHK_TNC)) 1537 if (!dbg_is_chk_index(c))
1493 return 0; 1538 return 0;
1494 1539
1495 ubifs_assert(mutex_is_locked(&c->tnc_mutex)); 1540 ubifs_assert(mutex_is_locked(&c->tnc_mutex));
@@ -1736,7 +1781,7 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
1736 int err; 1781 int err;
1737 long long calc = 0; 1782 long long calc = 0;
1738 1783
1739 if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ)) 1784 if (!dbg_is_chk_index(c))
1740 return 0; 1785 return 0;
1741 1786
1742 err = dbg_walk_index(c, NULL, add_size, &calc); 1787 err = dbg_walk_index(c, NULL, add_size, &calc);
@@ -2312,7 +2357,7 @@ int dbg_check_filesystem(struct ubifs_info *c)
2312 int err; 2357 int err;
2313 struct fsck_data fsckd; 2358 struct fsck_data fsckd;
2314 2359
2315 if (!(ubifs_chk_flags & UBIFS_CHK_FS)) 2360 if (!dbg_is_chk_fs(c))
2316 return 0; 2361 return 0;
2317 2362
2318 fsckd.inodes = RB_ROOT; 2363 fsckd.inodes = RB_ROOT;
@@ -2347,7 +2392,7 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
2347 struct list_head *cur; 2392 struct list_head *cur;
2348 struct ubifs_scan_node *sa, *sb; 2393 struct ubifs_scan_node *sa, *sb;
2349 2394
2350 if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) 2395 if (!dbg_is_chk_gen(c))
2351 return 0; 2396 return 0;
2352 2397
2353 for (cur = head->next; cur->next != head; cur = cur->next) { 2398 for (cur = head->next; cur->next != head; cur = cur->next) {
@@ -2414,7 +2459,7 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2414 struct list_head *cur; 2459 struct list_head *cur;
2415 struct ubifs_scan_node *sa, *sb; 2460 struct ubifs_scan_node *sa, *sb;
2416 2461
2417 if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) 2462 if (!dbg_is_chk_gen(c))
2418 return 0; 2463 return 0;
2419 2464
2420 for (cur = head->next; cur->next != head; cur = cur->next) { 2465 for (cur = head->next; cur->next != head; cur = cur->next) {
@@ -2491,214 +2536,141 @@ error_dump:
2491 return 0; 2536 return 0;
2492} 2537}
2493 2538
2494int dbg_force_in_the_gaps(void) 2539static inline int chance(unsigned int n, unsigned int out_of)
2495{ 2540{
2496 if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) 2541 return !!((random32() % out_of) + 1 <= n);
2497 return 0;
2498 2542
2499 return !(random32() & 7);
2500} 2543}
2501 2544
2502/* Failure mode for recovery testing */ 2545static int power_cut_emulated(struct ubifs_info *c, int lnum, int write)
2503
2504#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d))
2505
2506struct failure_mode_info {
2507 struct list_head list;
2508 struct ubifs_info *c;
2509};
2510
2511static LIST_HEAD(fmi_list);
2512static DEFINE_SPINLOCK(fmi_lock);
2513
2514static unsigned int next;
2515
2516static int simple_rand(void)
2517{
2518 if (next == 0)
2519 next = current->pid;
2520 next = next * 1103515245 + 12345;
2521 return (next >> 16) & 32767;
2522}
2523
2524static void failure_mode_init(struct ubifs_info *c)
2525{
2526 struct failure_mode_info *fmi;
2527
2528 fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS);
2529 if (!fmi) {
2530 ubifs_err("Failed to register failure mode - no memory");
2531 return;
2532 }
2533 fmi->c = c;
2534 spin_lock(&fmi_lock);
2535 list_add_tail(&fmi->list, &fmi_list);
2536 spin_unlock(&fmi_lock);
2537}
2538
2539static void failure_mode_exit(struct ubifs_info *c)
2540{ 2546{
2541 struct failure_mode_info *fmi, *tmp; 2547 struct ubifs_debug_info *d = c->dbg;
2542
2543 spin_lock(&fmi_lock);
2544 list_for_each_entry_safe(fmi, tmp, &fmi_list, list)
2545 if (fmi->c == c) {
2546 list_del(&fmi->list);
2547 kfree(fmi);
2548 }
2549 spin_unlock(&fmi_lock);
2550}
2551
2552static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc)
2553{
2554 struct failure_mode_info *fmi;
2555
2556 spin_lock(&fmi_lock);
2557 list_for_each_entry(fmi, &fmi_list, list)
2558 if (fmi->c->ubi == desc) {
2559 struct ubifs_info *c = fmi->c;
2560
2561 spin_unlock(&fmi_lock);
2562 return c;
2563 }
2564 spin_unlock(&fmi_lock);
2565 return NULL;
2566}
2567
2568static int in_failure_mode(struct ubi_volume_desc *desc)
2569{
2570 struct ubifs_info *c = dbg_find_info(desc);
2571
2572 if (c && dbg_failure_mode)
2573 return c->dbg->failure_mode;
2574 return 0;
2575}
2576 2548
2577static int do_fail(struct ubi_volume_desc *desc, int lnum, int write) 2549 ubifs_assert(dbg_is_tst_rcvry(c));
2578{
2579 struct ubifs_info *c = dbg_find_info(desc);
2580 struct ubifs_debug_info *d;
2581 2550
2582 if (!c || !dbg_failure_mode) 2551 if (!d->pc_cnt) {
2583 return 0; 2552 /* First call - decide delay to the power cut */
2584 d = c->dbg;
2585 if (d->failure_mode)
2586 return 1;
2587 if (!d->fail_cnt) {
2588 /* First call - decide delay to failure */
2589 if (chance(1, 2)) { 2553 if (chance(1, 2)) {
2590 unsigned int delay = 1 << (simple_rand() >> 11); 2554 unsigned long delay;
2591 2555
2592 if (chance(1, 2)) { 2556 if (chance(1, 2)) {
2593 d->fail_delay = 1; 2557 d->pc_delay = 1;
2594 d->fail_timeout = jiffies + 2558 /* Fail withing 1 minute */
2595 msecs_to_jiffies(delay); 2559 delay = random32() % 60000;
2596 dbg_rcvry("failing after %ums", delay); 2560 d->pc_timeout = jiffies;
2561 d->pc_timeout += msecs_to_jiffies(delay);
2562 ubifs_warn("failing after %lums", delay);
2597 } else { 2563 } else {
2598 d->fail_delay = 2; 2564 d->pc_delay = 2;
2599 d->fail_cnt_max = delay; 2565 delay = random32() % 10000;
2600 dbg_rcvry("failing after %u calls", delay); 2566 /* Fail within 10000 operations */
2567 d->pc_cnt_max = delay;
2568 ubifs_warn("failing after %lu calls", delay);
2601 } 2569 }
2602 } 2570 }
2603 d->fail_cnt += 1; 2571
2572 d->pc_cnt += 1;
2604 } 2573 }
2574
2605 /* Determine if failure delay has expired */ 2575 /* Determine if failure delay has expired */
2606 if (d->fail_delay == 1) { 2576 if (d->pc_delay == 1 && time_before(jiffies, d->pc_timeout))
2607 if (time_before(jiffies, d->fail_timeout))
2608 return 0; 2577 return 0;
2609 } else if (d->fail_delay == 2) 2578 if (d->pc_delay == 2 && d->pc_cnt++ < d->pc_cnt_max)
2610 if (d->fail_cnt++ < d->fail_cnt_max)
2611 return 0; 2579 return 0;
2580
2612 if (lnum == UBIFS_SB_LNUM) { 2581 if (lnum == UBIFS_SB_LNUM) {
2613 if (write) { 2582 if (write && chance(1, 2))
2614 if (chance(1, 2)) 2583 return 0;
2615 return 0; 2584 if (chance(19, 20))
2616 } else if (chance(19, 20))
2617 return 0; 2585 return 0;
2618 dbg_rcvry("failing in super block LEB %d", lnum); 2586 ubifs_warn("failing in super block LEB %d", lnum);
2619 } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) { 2587 } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) {
2620 if (chance(19, 20)) 2588 if (chance(19, 20))
2621 return 0; 2589 return 0;
2622 dbg_rcvry("failing in master LEB %d", lnum); 2590 ubifs_warn("failing in master LEB %d", lnum);
2623 } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) { 2591 } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) {
2624 if (write) { 2592 if (write && chance(99, 100))
2625 if (chance(99, 100))
2626 return 0;
2627 } else if (chance(399, 400))
2628 return 0; 2593 return 0;
2629 dbg_rcvry("failing in log LEB %d", lnum); 2594 if (chance(399, 400))
2595 return 0;
2596 ubifs_warn("failing in log LEB %d", lnum);
2630 } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) { 2597 } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) {
2631 if (write) { 2598 if (write && chance(7, 8))
2632 if (chance(7, 8))
2633 return 0;
2634 } else if (chance(19, 20))
2635 return 0; 2599 return 0;
2636 dbg_rcvry("failing in LPT LEB %d", lnum); 2600 if (chance(19, 20))
2601 return 0;
2602 ubifs_warn("failing in LPT LEB %d", lnum);
2637 } else if (lnum >= c->orph_first && lnum <= c->orph_last) { 2603 } else if (lnum >= c->orph_first && lnum <= c->orph_last) {
2638 if (write) { 2604 if (write && chance(1, 2))
2639 if (chance(1, 2)) 2605 return 0;
2640 return 0; 2606 if (chance(9, 10))
2641 } else if (chance(9, 10))
2642 return 0; 2607 return 0;
2643 dbg_rcvry("failing in orphan LEB %d", lnum); 2608 ubifs_warn("failing in orphan LEB %d", lnum);
2644 } else if (lnum == c->ihead_lnum) { 2609 } else if (lnum == c->ihead_lnum) {
2645 if (chance(99, 100)) 2610 if (chance(99, 100))
2646 return 0; 2611 return 0;
2647 dbg_rcvry("failing in index head LEB %d", lnum); 2612 ubifs_warn("failing in index head LEB %d", lnum);
2648 } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) { 2613 } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) {
2649 if (chance(9, 10)) 2614 if (chance(9, 10))
2650 return 0; 2615 return 0;
2651 dbg_rcvry("failing in GC head LEB %d", lnum); 2616 ubifs_warn("failing in GC head LEB %d", lnum);
2652 } else if (write && !RB_EMPTY_ROOT(&c->buds) && 2617 } else if (write && !RB_EMPTY_ROOT(&c->buds) &&
2653 !ubifs_search_bud(c, lnum)) { 2618 !ubifs_search_bud(c, lnum)) {
2654 if (chance(19, 20)) 2619 if (chance(19, 20))
2655 return 0; 2620 return 0;
2656 dbg_rcvry("failing in non-bud LEB %d", lnum); 2621 ubifs_warn("failing in non-bud LEB %d", lnum);
2657 } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND || 2622 } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND ||
2658 c->cmt_state == COMMIT_RUNNING_REQUIRED) { 2623 c->cmt_state == COMMIT_RUNNING_REQUIRED) {
2659 if (chance(999, 1000)) 2624 if (chance(999, 1000))
2660 return 0; 2625 return 0;
2661 dbg_rcvry("failing in bud LEB %d commit running", lnum); 2626 ubifs_warn("failing in bud LEB %d commit running", lnum);
2662 } else { 2627 } else {
2663 if (chance(9999, 10000)) 2628 if (chance(9999, 10000))
2664 return 0; 2629 return 0;
2665 dbg_rcvry("failing in bud LEB %d commit not running", lnum); 2630 ubifs_warn("failing in bud LEB %d commit not running", lnum);
2666 } 2631 }
2667 ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum); 2632
2668 d->failure_mode = 1; 2633 d->pc_happened = 1;
2634 ubifs_warn("========== Power cut emulated ==========");
2669 dump_stack(); 2635 dump_stack();
2670 return 1; 2636 return 1;
2671} 2637}
2672 2638
2673static void cut_data(const void *buf, int len) 2639static void cut_data(const void *buf, unsigned int len)
2674{ 2640{
2675 int flen, i; 2641 unsigned int from, to, i, ffs = chance(1, 2);
2676 unsigned char *p = (void *)buf; 2642 unsigned char *p = (void *)buf;
2677 2643
2678 flen = (len * (long long)simple_rand()) >> 15; 2644 from = random32() % (len + 1);
2679 for (i = flen; i < len; i++) 2645 if (chance(1, 2))
2680 p[i] = 0xff; 2646 to = random32() % (len - from + 1);
2681} 2647 else
2648 to = len;
2682 2649
2683int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, 2650 if (from < to)
2684 int len, int check) 2651 ubifs_warn("filled bytes %u-%u with %s", from, to - 1,
2685{ 2652 ffs ? "0xFFs" : "random data");
2686 if (in_failure_mode(desc)) 2653
2687 return -EROFS; 2654 if (ffs)
2688 return ubi_leb_read(desc, lnum, buf, offset, len, check); 2655 for (i = from; i < to; i++)
2656 p[i] = 0xFF;
2657 else
2658 for (i = from; i < to; i++)
2659 p[i] = random32() % 0x100;
2689} 2660}
2690 2661
2691int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, 2662int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf,
2692 int offset, int len, int dtype) 2663 int offs, int len, int dtype)
2693{ 2664{
2694 int err, failing; 2665 int err, failing;
2695 2666
2696 if (in_failure_mode(desc)) 2667 if (c->dbg->pc_happened)
2697 return -EROFS; 2668 return -EROFS;
2698 failing = do_fail(desc, lnum, 1); 2669
2670 failing = power_cut_emulated(c, lnum, 1);
2699 if (failing) 2671 if (failing)
2700 cut_data(buf, len); 2672 cut_data(buf, len);
2701 err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); 2673 err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
2702 if (err) 2674 if (err)
2703 return err; 2675 return err;
2704 if (failing) 2676 if (failing)
@@ -2706,162 +2678,207 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
2706 return 0; 2678 return 0;
2707} 2679}
2708 2680
2709int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, 2681int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf,
2710 int len, int dtype) 2682 int len, int dtype)
2711{ 2683{
2712 int err; 2684 int err;
2713 2685
2714 if (do_fail(desc, lnum, 1)) 2686 if (c->dbg->pc_happened)
2715 return -EROFS; 2687 return -EROFS;
2716 err = ubi_leb_change(desc, lnum, buf, len, dtype); 2688 if (power_cut_emulated(c, lnum, 1))
2689 return -EROFS;
2690 err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
2717 if (err) 2691 if (err)
2718 return err; 2692 return err;
2719 if (do_fail(desc, lnum, 1)) 2693 if (power_cut_emulated(c, lnum, 1))
2720 return -EROFS; 2694 return -EROFS;
2721 return 0; 2695 return 0;
2722} 2696}
2723 2697
2724int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) 2698int dbg_leb_unmap(struct ubifs_info *c, int lnum)
2725{ 2699{
2726 int err; 2700 int err;
2727 2701
2728 if (do_fail(desc, lnum, 0)) 2702 if (c->dbg->pc_happened)
2703 return -EROFS;
2704 if (power_cut_emulated(c, lnum, 0))
2729 return -EROFS; 2705 return -EROFS;
2730 err = ubi_leb_erase(desc, lnum); 2706 err = ubi_leb_unmap(c->ubi, lnum);
2731 if (err) 2707 if (err)
2732 return err; 2708 return err;
2733 if (do_fail(desc, lnum, 0)) 2709 if (power_cut_emulated(c, lnum, 0))
2734 return -EROFS; 2710 return -EROFS;
2735 return 0; 2711 return 0;
2736} 2712}
2737 2713
2738int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) 2714int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype)
2739{ 2715{
2740 int err; 2716 int err;
2741 2717
2742 if (do_fail(desc, lnum, 0)) 2718 if (c->dbg->pc_happened)
2743 return -EROFS; 2719 return -EROFS;
2744 err = ubi_leb_unmap(desc, lnum); 2720 if (power_cut_emulated(c, lnum, 0))
2721 return -EROFS;
2722 err = ubi_leb_map(c->ubi, lnum, dtype);
2745 if (err) 2723 if (err)
2746 return err; 2724 return err;
2747 if (do_fail(desc, lnum, 0)) 2725 if (power_cut_emulated(c, lnum, 0))
2748 return -EROFS; 2726 return -EROFS;
2749 return 0; 2727 return 0;
2750} 2728}
2751 2729
2752int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) 2730/*
2753{ 2731 * Root directory for UBIFS stuff in debugfs. Contains sub-directories which
2754 if (in_failure_mode(desc)) 2732 * contain the stuff specific to particular file-system mounts.
2755 return -EROFS; 2733 */
2756 return ubi_is_mapped(desc, lnum); 2734static struct dentry *dfs_rootdir;
2757}
2758 2735
2759int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) 2736static int dfs_file_open(struct inode *inode, struct file *file)
2760{ 2737{
2761 int err; 2738 file->private_data = inode->i_private;
2762 2739 return nonseekable_open(inode, file);
2763 if (do_fail(desc, lnum, 0))
2764 return -EROFS;
2765 err = ubi_leb_map(desc, lnum, dtype);
2766 if (err)
2767 return err;
2768 if (do_fail(desc, lnum, 0))
2769 return -EROFS;
2770 return 0;
2771} 2740}
2772 2741
2773/** 2742/**
2774 * ubifs_debugging_init - initialize UBIFS debugging. 2743 * provide_user_output - provide output to the user reading a debugfs file.
2775 * @c: UBIFS file-system description object 2744 * @val: boolean value for the answer
2745 * @u: the buffer to store the answer at
2746 * @count: size of the buffer
2747 * @ppos: position in the @u output buffer
2776 * 2748 *
2777 * This function initializes debugging-related data for the file system. 2749 * This is a simple helper function which stores @val boolean value in the user
2778 * Returns zero in case of success and a negative error code in case of 2750 * buffer when the user reads one of UBIFS debugfs files. Returns amount of
2751 * bytes written to @u in case of success and a negative error code in case of
2779 * failure. 2752 * failure.
2780 */ 2753 */
2781int ubifs_debugging_init(struct ubifs_info *c) 2754static int provide_user_output(int val, char __user *u, size_t count,
2755 loff_t *ppos)
2782{ 2756{
2783 c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL); 2757 char buf[3];
2784 if (!c->dbg)
2785 return -ENOMEM;
2786 2758
2787 failure_mode_init(c); 2759 if (val)
2788 return 0; 2760 buf[0] = '1';
2761 else
2762 buf[0] = '0';
2763 buf[1] = '\n';
2764 buf[2] = 0x00;
2765
2766 return simple_read_from_buffer(u, count, ppos, buf, 2);
2789} 2767}
2790 2768
2791/** 2769static ssize_t dfs_file_read(struct file *file, char __user *u, size_t count,
2792 * ubifs_debugging_exit - free debugging data. 2770 loff_t *ppos)
2793 * @c: UBIFS file-system description object
2794 */
2795void ubifs_debugging_exit(struct ubifs_info *c)
2796{ 2771{
2797 failure_mode_exit(c); 2772 struct dentry *dent = file->f_path.dentry;
2798 kfree(c->dbg); 2773 struct ubifs_info *c = file->private_data;
2799} 2774 struct ubifs_debug_info *d = c->dbg;
2775 int val;
2776
2777 if (dent == d->dfs_chk_gen)
2778 val = d->chk_gen;
2779 else if (dent == d->dfs_chk_index)
2780 val = d->chk_index;
2781 else if (dent == d->dfs_chk_orph)
2782 val = d->chk_orph;
2783 else if (dent == d->dfs_chk_lprops)
2784 val = d->chk_lprops;
2785 else if (dent == d->dfs_chk_fs)
2786 val = d->chk_fs;
2787 else if (dent == d->dfs_tst_rcvry)
2788 val = d->tst_rcvry;
2789 else
2790 return -EINVAL;
2800 2791
2801/* 2792 return provide_user_output(val, u, count, ppos);
2802 * Root directory for UBIFS stuff in debugfs. Contains sub-directories which 2793}
2803 * contain the stuff specific to particular file-system mounts.
2804 */
2805static struct dentry *dfs_rootdir;
2806 2794
2807/** 2795/**
2808 * dbg_debugfs_init - initialize debugfs file-system. 2796 * interpret_user_input - interpret user debugfs file input.
2797 * @u: user-provided buffer with the input
2798 * @count: buffer size
2809 * 2799 *
2810 * UBIFS uses debugfs file-system to expose various debugging knobs to 2800 * This is a helper function which interpret user input to a boolean UBIFS
2811 * user-space. This function creates "ubifs" directory in the debugfs 2801 * debugfs file. Returns %0 or %1 in case of success and a negative error code
2812 * file-system. Returns zero in case of success and a negative error code in 2802 * in case of failure.
2813 * case of failure.
2814 */ 2803 */
2815int dbg_debugfs_init(void) 2804static int interpret_user_input(const char __user *u, size_t count)
2816{ 2805{
2817 dfs_rootdir = debugfs_create_dir("ubifs", NULL); 2806 size_t buf_size;
2818 if (IS_ERR(dfs_rootdir)) { 2807 char buf[8];
2819 int err = PTR_ERR(dfs_rootdir);
2820 ubifs_err("cannot create \"ubifs\" debugfs directory, "
2821 "error %d\n", err);
2822 return err;
2823 }
2824 2808
2825 return 0; 2809 buf_size = min_t(size_t, count, (sizeof(buf) - 1));
2826} 2810 if (copy_from_user(buf, u, buf_size))
2811 return -EFAULT;
2827 2812
2828/** 2813 if (buf[0] == '1')
2829 * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system. 2814 return 1;
2830 */ 2815 else if (buf[0] == '0')
2831void dbg_debugfs_exit(void) 2816 return 0;
2832{
2833 debugfs_remove(dfs_rootdir);
2834}
2835 2817
2836static int open_debugfs_file(struct inode *inode, struct file *file) 2818 return -EINVAL;
2837{
2838 file->private_data = inode->i_private;
2839 return nonseekable_open(inode, file);
2840} 2819}
2841 2820
2842static ssize_t write_debugfs_file(struct file *file, const char __user *buf, 2821static ssize_t dfs_file_write(struct file *file, const char __user *u,
2843 size_t count, loff_t *ppos) 2822 size_t count, loff_t *ppos)
2844{ 2823{
2845 struct ubifs_info *c = file->private_data; 2824 struct ubifs_info *c = file->private_data;
2846 struct ubifs_debug_info *d = c->dbg; 2825 struct ubifs_debug_info *d = c->dbg;
2826 struct dentry *dent = file->f_path.dentry;
2827 int val;
2847 2828
2848 if (file->f_path.dentry == d->dfs_dump_lprops) 2829 /*
2830 * TODO: this is racy - the file-system might have already been
2831 * unmounted and we'd oops in this case. The plan is to fix it with
2832 * help of 'iterate_supers_type()' which we should have in v3.0: when
2833 * a debugfs opened, we rember FS's UUID in file->private_data. Then
2834 * whenever we access the FS via a debugfs file, we iterate all UBIFS
2835 * superblocks and fine the one with the same UUID, and take the
2836 * locking right.
2837 *
2838 * The other way to go suggested by Al Viro is to create a separate
2839 * 'ubifs-debug' file-system instead.
2840 */
2841 if (file->f_path.dentry == d->dfs_dump_lprops) {
2849 dbg_dump_lprops(c); 2842 dbg_dump_lprops(c);
2850 else if (file->f_path.dentry == d->dfs_dump_budg) 2843 return count;
2844 }
2845 if (file->f_path.dentry == d->dfs_dump_budg) {
2851 dbg_dump_budg(c, &c->bi); 2846 dbg_dump_budg(c, &c->bi);
2852 else if (file->f_path.dentry == d->dfs_dump_tnc) { 2847 return count;
2848 }
2849 if (file->f_path.dentry == d->dfs_dump_tnc) {
2853 mutex_lock(&c->tnc_mutex); 2850 mutex_lock(&c->tnc_mutex);
2854 dbg_dump_tnc(c); 2851 dbg_dump_tnc(c);
2855 mutex_unlock(&c->tnc_mutex); 2852 mutex_unlock(&c->tnc_mutex);
2856 } else 2853 return count;
2854 }
2855
2856 val = interpret_user_input(u, count);
2857 if (val < 0)
2858 return val;
2859
2860 if (dent == d->dfs_chk_gen)
2861 d->chk_gen = val;
2862 else if (dent == d->dfs_chk_index)
2863 d->chk_index = val;
2864 else if (dent == d->dfs_chk_orph)
2865 d->chk_orph = val;
2866 else if (dent == d->dfs_chk_lprops)
2867 d->chk_lprops = val;
2868 else if (dent == d->dfs_chk_fs)
2869 d->chk_fs = val;
2870 else if (dent == d->dfs_tst_rcvry)
2871 d->tst_rcvry = val;
2872 else
2857 return -EINVAL; 2873 return -EINVAL;
2858 2874
2859 return count; 2875 return count;
2860} 2876}
2861 2877
2862static const struct file_operations dfs_fops = { 2878static const struct file_operations dfs_fops = {
2863 .open = open_debugfs_file, 2879 .open = dfs_file_open,
2864 .write = write_debugfs_file, 2880 .read = dfs_file_read,
2881 .write = dfs_file_write,
2865 .owner = THIS_MODULE, 2882 .owner = THIS_MODULE,
2866 .llseek = no_llseek, 2883 .llseek = no_llseek,
2867}; 2884};
@@ -2880,12 +2897,20 @@ static const struct file_operations dfs_fops = {
2880 */ 2897 */
2881int dbg_debugfs_init_fs(struct ubifs_info *c) 2898int dbg_debugfs_init_fs(struct ubifs_info *c)
2882{ 2899{
2883 int err; 2900 int err, n;
2884 const char *fname; 2901 const char *fname;
2885 struct dentry *dent; 2902 struct dentry *dent;
2886 struct ubifs_debug_info *d = c->dbg; 2903 struct ubifs_debug_info *d = c->dbg;
2887 2904
2888 sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); 2905 n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME,
2906 c->vi.ubi_num, c->vi.vol_id);
2907 if (n == UBIFS_DFS_DIR_LEN) {
2908 /* The array size is too small */
2909 fname = UBIFS_DFS_DIR_NAME;
2910 dent = ERR_PTR(-EINVAL);
2911 goto out;
2912 }
2913
2889 fname = d->dfs_dir_name; 2914 fname = d->dfs_dir_name;
2890 dent = debugfs_create_dir(fname, dfs_rootdir); 2915 dent = debugfs_create_dir(fname, dfs_rootdir);
2891 if (IS_ERR_OR_NULL(dent)) 2916 if (IS_ERR_OR_NULL(dent))
@@ -2910,13 +2935,55 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
2910 goto out_remove; 2935 goto out_remove;
2911 d->dfs_dump_tnc = dent; 2936 d->dfs_dump_tnc = dent;
2912 2937
2938 fname = "chk_general";
2939 dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
2940 &dfs_fops);
2941 if (IS_ERR_OR_NULL(dent))
2942 goto out_remove;
2943 d->dfs_chk_gen = dent;
2944
2945 fname = "chk_index";
2946 dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
2947 &dfs_fops);
2948 if (IS_ERR_OR_NULL(dent))
2949 goto out_remove;
2950 d->dfs_chk_index = dent;
2951
2952 fname = "chk_orphans";
2953 dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
2954 &dfs_fops);
2955 if (IS_ERR_OR_NULL(dent))
2956 goto out_remove;
2957 d->dfs_chk_orph = dent;
2958
2959 fname = "chk_lprops";
2960 dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
2961 &dfs_fops);
2962 if (IS_ERR_OR_NULL(dent))
2963 goto out_remove;
2964 d->dfs_chk_lprops = dent;
2965
2966 fname = "chk_fs";
2967 dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
2968 &dfs_fops);
2969 if (IS_ERR_OR_NULL(dent))
2970 goto out_remove;
2971 d->dfs_chk_fs = dent;
2972
2973 fname = "tst_recovery";
2974 dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, d->dfs_dir, c,
2975 &dfs_fops);
2976 if (IS_ERR_OR_NULL(dent))
2977 goto out_remove;
2978 d->dfs_tst_rcvry = dent;
2979
2913 return 0; 2980 return 0;
2914 2981
2915out_remove: 2982out_remove:
2916 debugfs_remove_recursive(d->dfs_dir); 2983 debugfs_remove_recursive(d->dfs_dir);
2917out: 2984out:
2918 err = dent ? PTR_ERR(dent) : -ENODEV; 2985 err = dent ? PTR_ERR(dent) : -ENODEV;
2919 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n", 2986 ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n",
2920 fname, err); 2987 fname, err);
2921 return err; 2988 return err;
2922} 2989}
@@ -2930,4 +2997,179 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c)
2930 debugfs_remove_recursive(c->dbg->dfs_dir); 2997 debugfs_remove_recursive(c->dbg->dfs_dir);
2931} 2998}
2932 2999
3000struct ubifs_global_debug_info ubifs_dbg;
3001
3002static struct dentry *dfs_chk_gen;
3003static struct dentry *dfs_chk_index;
3004static struct dentry *dfs_chk_orph;
3005static struct dentry *dfs_chk_lprops;
3006static struct dentry *dfs_chk_fs;
3007static struct dentry *dfs_tst_rcvry;
3008
3009static ssize_t dfs_global_file_read(struct file *file, char __user *u,
3010 size_t count, loff_t *ppos)
3011{
3012 struct dentry *dent = file->f_path.dentry;
3013 int val;
3014
3015 if (dent == dfs_chk_gen)
3016 val = ubifs_dbg.chk_gen;
3017 else if (dent == dfs_chk_index)
3018 val = ubifs_dbg.chk_index;
3019 else if (dent == dfs_chk_orph)
3020 val = ubifs_dbg.chk_orph;
3021 else if (dent == dfs_chk_lprops)
3022 val = ubifs_dbg.chk_lprops;
3023 else if (dent == dfs_chk_fs)
3024 val = ubifs_dbg.chk_fs;
3025 else if (dent == dfs_tst_rcvry)
3026 val = ubifs_dbg.tst_rcvry;
3027 else
3028 return -EINVAL;
3029
3030 return provide_user_output(val, u, count, ppos);
3031}
3032
3033static ssize_t dfs_global_file_write(struct file *file, const char __user *u,
3034 size_t count, loff_t *ppos)
3035{
3036 struct dentry *dent = file->f_path.dentry;
3037 int val;
3038
3039 val = interpret_user_input(u, count);
3040 if (val < 0)
3041 return val;
3042
3043 if (dent == dfs_chk_gen)
3044 ubifs_dbg.chk_gen = val;
3045 else if (dent == dfs_chk_index)
3046 ubifs_dbg.chk_index = val;
3047 else if (dent == dfs_chk_orph)
3048 ubifs_dbg.chk_orph = val;
3049 else if (dent == dfs_chk_lprops)
3050 ubifs_dbg.chk_lprops = val;
3051 else if (dent == dfs_chk_fs)
3052 ubifs_dbg.chk_fs = val;
3053 else if (dent == dfs_tst_rcvry)
3054 ubifs_dbg.tst_rcvry = val;
3055 else
3056 return -EINVAL;
3057
3058 return count;
3059}
3060
3061static const struct file_operations dfs_global_fops = {
3062 .read = dfs_global_file_read,
3063 .write = dfs_global_file_write,
3064 .owner = THIS_MODULE,
3065 .llseek = no_llseek,
3066};
3067
3068/**
3069 * dbg_debugfs_init - initialize debugfs file-system.
3070 *
3071 * UBIFS uses debugfs file-system to expose various debugging knobs to
3072 * user-space. This function creates "ubifs" directory in the debugfs
3073 * file-system. Returns zero in case of success and a negative error code in
3074 * case of failure.
3075 */
3076int dbg_debugfs_init(void)
3077{
3078 int err;
3079 const char *fname;
3080 struct dentry *dent;
3081
3082 fname = "ubifs";
3083 dent = debugfs_create_dir(fname, NULL);
3084 if (IS_ERR_OR_NULL(dent))
3085 goto out;
3086 dfs_rootdir = dent;
3087
3088 fname = "chk_general";
3089 dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
3090 &dfs_global_fops);
3091 if (IS_ERR_OR_NULL(dent))
3092 goto out_remove;
3093 dfs_chk_gen = dent;
3094
3095 fname = "chk_index";
3096 dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
3097 &dfs_global_fops);
3098 if (IS_ERR_OR_NULL(dent))
3099 goto out_remove;
3100 dfs_chk_index = dent;
3101
3102 fname = "chk_orphans";
3103 dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
3104 &dfs_global_fops);
3105 if (IS_ERR_OR_NULL(dent))
3106 goto out_remove;
3107 dfs_chk_orph = dent;
3108
3109 fname = "chk_lprops";
3110 dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
3111 &dfs_global_fops);
3112 if (IS_ERR_OR_NULL(dent))
3113 goto out_remove;
3114 dfs_chk_lprops = dent;
3115
3116 fname = "chk_fs";
3117 dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
3118 &dfs_global_fops);
3119 if (IS_ERR_OR_NULL(dent))
3120 goto out_remove;
3121 dfs_chk_fs = dent;
3122
3123 fname = "tst_recovery";
3124 dent = debugfs_create_file(fname, S_IRUSR | S_IWUSR, dfs_rootdir, NULL,
3125 &dfs_global_fops);
3126 if (IS_ERR_OR_NULL(dent))
3127 goto out_remove;
3128 dfs_tst_rcvry = dent;
3129
3130 return 0;
3131
3132out_remove:
3133 debugfs_remove_recursive(dfs_rootdir);
3134out:
3135 err = dent ? PTR_ERR(dent) : -ENODEV;
3136 ubifs_err("cannot create \"%s\" debugfs file or directory, error %d\n",
3137 fname, err);
3138 return err;
3139}
3140
3141/**
3142 * dbg_debugfs_exit - remove the "ubifs" directory from debugfs file-system.
3143 */
3144void dbg_debugfs_exit(void)
3145{
3146 debugfs_remove_recursive(dfs_rootdir);
3147}
3148
3149/**
3150 * ubifs_debugging_init - initialize UBIFS debugging.
3151 * @c: UBIFS file-system description object
3152 *
3153 * This function initializes debugging-related data for the file system.
3154 * Returns zero in case of success and a negative error code in case of
3155 * failure.
3156 */
3157int ubifs_debugging_init(struct ubifs_info *c)
3158{
3159 c->dbg = kzalloc(sizeof(struct ubifs_debug_info), GFP_KERNEL);
3160 if (!c->dbg)
3161 return -ENOMEM;
3162
3163 return 0;
3164}
3165
3166/**
3167 * ubifs_debugging_exit - free debugging data.
3168 * @c: UBIFS file-system description object
3169 */
3170void ubifs_debugging_exit(struct ubifs_info *c)
3171{
3172 kfree(c->dbg);
3173}
3174
2933#endif /* CONFIG_UBIFS_FS_DEBUG */ 3175#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index a811ac4a26bb..45174b534377 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -31,18 +31,25 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
31 31
32#ifdef CONFIG_UBIFS_FS_DEBUG 32#ifdef CONFIG_UBIFS_FS_DEBUG
33 33
34#include <linux/random.h> 34/*
35 * The UBIFS debugfs directory name pattern and maximum name length (3 for "ubi"
36 * + 1 for "_" and plus 2x2 for 2 UBI numbers and 1 for the trailing zero byte.
37 */
38#define UBIFS_DFS_DIR_NAME "ubi%d_%d"
39#define UBIFS_DFS_DIR_LEN (3 + 1 + 2*2 + 1)
35 40
36/** 41/**
37 * ubifs_debug_info - per-FS debugging information. 42 * ubifs_debug_info - per-FS debugging information.
38 * @old_zroot: old index root - used by 'dbg_check_old_index()' 43 * @old_zroot: old index root - used by 'dbg_check_old_index()'
39 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' 44 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
40 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' 45 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
41 * @failure_mode: failure mode for recovery testing 46 *
42 * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls 47 * @pc_happened: non-zero if an emulated power cut happened
43 * @fail_timeout: time in jiffies when delay of failure mode expires 48 * @pc_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
44 * @fail_cnt: current number of calls to failure mode I/O functions 49 * @pc_timeout: time in jiffies when delay of failure mode expires
45 * @fail_cnt_max: number of calls by which to delay failure mode 50 * @pc_cnt: current number of calls to failure mode I/O functions
51 * @pc_cnt_max: number of calls by which to delay failure mode
52 *
46 * @chk_lpt_sz: used by LPT tree size checker 53 * @chk_lpt_sz: used by LPT tree size checker
47 * @chk_lpt_sz2: used by LPT tree size checker 54 * @chk_lpt_sz2: used by LPT tree size checker
48 * @chk_lpt_wastage: used by LPT tree size checker 55 * @chk_lpt_wastage: used by LPT tree size checker
@@ -56,21 +63,36 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c,
56 * @saved_free: saved amount of free space 63 * @saved_free: saved amount of free space
57 * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt 64 * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
58 * 65 *
66 * @chk_gen: if general extra checks are enabled
67 * @chk_index: if index xtra checks are enabled
68 * @chk_orph: if orphans extra checks are enabled
69 * @chk_lprops: if lprops extra checks are enabled
70 * @chk_fs: if UBIFS contents extra checks are enabled
71 * @tst_rcvry: if UBIFS recovery testing mode enabled
72 *
59 * @dfs_dir_name: name of debugfs directory containing this file-system's files 73 * @dfs_dir_name: name of debugfs directory containing this file-system's files
60 * @dfs_dir: direntry object of the file-system debugfs directory 74 * @dfs_dir: direntry object of the file-system debugfs directory
61 * @dfs_dump_lprops: "dump lprops" debugfs knob 75 * @dfs_dump_lprops: "dump lprops" debugfs knob
62 * @dfs_dump_budg: "dump budgeting information" debugfs knob 76 * @dfs_dump_budg: "dump budgeting information" debugfs knob
63 * @dfs_dump_tnc: "dump TNC" debugfs knob 77 * @dfs_dump_tnc: "dump TNC" debugfs knob
78 * @dfs_chk_gen: debugfs knob to enable UBIFS general extra checks
79 * @dfs_chk_index: debugfs knob to enable UBIFS index extra checks
80 * @dfs_chk_orph: debugfs knob to enable UBIFS orphans extra checks
81 * @dfs_chk_lprops: debugfs knob to enable UBIFS LEP properties extra checks
82 * @dfs_chk_fs: debugfs knob to enable UBIFS contents extra checks
83 * @dfs_tst_rcvry: debugfs knob to enable UBIFS recovery testing
64 */ 84 */
65struct ubifs_debug_info { 85struct ubifs_debug_info {
66 struct ubifs_zbranch old_zroot; 86 struct ubifs_zbranch old_zroot;
67 int old_zroot_level; 87 int old_zroot_level;
68 unsigned long long old_zroot_sqnum; 88 unsigned long long old_zroot_sqnum;
69 int failure_mode; 89
70 int fail_delay; 90 int pc_happened;
71 unsigned long fail_timeout; 91 int pc_delay;
72 unsigned int fail_cnt; 92 unsigned long pc_timeout;
73 unsigned int fail_cnt_max; 93 unsigned int pc_cnt;
94 unsigned int pc_cnt_max;
95
74 long long chk_lpt_sz; 96 long long chk_lpt_sz;
75 long long chk_lpt_sz2; 97 long long chk_lpt_sz2;
76 long long chk_lpt_wastage; 98 long long chk_lpt_wastage;
@@ -84,11 +106,43 @@ struct ubifs_debug_info {
84 long long saved_free; 106 long long saved_free;
85 int saved_idx_gc_cnt; 107 int saved_idx_gc_cnt;
86 108
87 char dfs_dir_name[100]; 109 unsigned int chk_gen:1;
110 unsigned int chk_index:1;
111 unsigned int chk_orph:1;
112 unsigned int chk_lprops:1;
113 unsigned int chk_fs:1;
114 unsigned int tst_rcvry:1;
115
116 char dfs_dir_name[UBIFS_DFS_DIR_LEN + 1];
88 struct dentry *dfs_dir; 117 struct dentry *dfs_dir;
89 struct dentry *dfs_dump_lprops; 118 struct dentry *dfs_dump_lprops;
90 struct dentry *dfs_dump_budg; 119 struct dentry *dfs_dump_budg;
91 struct dentry *dfs_dump_tnc; 120 struct dentry *dfs_dump_tnc;
121 struct dentry *dfs_chk_gen;
122 struct dentry *dfs_chk_index;
123 struct dentry *dfs_chk_orph;
124 struct dentry *dfs_chk_lprops;
125 struct dentry *dfs_chk_fs;
126 struct dentry *dfs_tst_rcvry;
127};
128
129/**
130 * ubifs_global_debug_info - global (not per-FS) UBIFS debugging information.
131 *
132 * @chk_gen: if general extra checks are enabled
133 * @chk_index: if index xtra checks are enabled
134 * @chk_orph: if orphans extra checks are enabled
135 * @chk_lprops: if lprops extra checks are enabled
136 * @chk_fs: if UBIFS contents extra checks are enabled
137 * @tst_rcvry: if UBIFS recovery testing mode enabled
138 */
139struct ubifs_global_debug_info {
140 unsigned int chk_gen:1;
141 unsigned int chk_index:1;
142 unsigned int chk_orph:1;
143 unsigned int chk_lprops:1;
144 unsigned int chk_fs:1;
145 unsigned int tst_rcvry:1;
92}; 146};
93 147
94#define ubifs_assert(expr) do { \ 148#define ubifs_assert(expr) do { \
@@ -127,6 +181,8 @@ const char *dbg_key_str1(const struct ubifs_info *c,
127#define DBGKEY(key) dbg_key_str0(c, (key)) 181#define DBGKEY(key) dbg_key_str0(c, (key))
128#define DBGKEY1(key) dbg_key_str1(c, (key)) 182#define DBGKEY1(key) dbg_key_str1(c, (key))
129 183
184extern spinlock_t dbg_lock;
185
130#define ubifs_dbg_msg(type, fmt, ...) do { \ 186#define ubifs_dbg_msg(type, fmt, ...) do { \
131 spin_lock(&dbg_lock); \ 187 spin_lock(&dbg_lock); \
132 pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \ 188 pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
@@ -162,41 +218,36 @@ const char *dbg_key_str1(const struct ubifs_info *c,
162/* Additional recovery messages */ 218/* Additional recovery messages */
163#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) 219#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
164 220
165/* 221extern struct ubifs_global_debug_info ubifs_dbg;
166 * Debugging check flags.
167 *
168 * UBIFS_CHK_GEN: general checks
169 * UBIFS_CHK_TNC: check TNC
170 * UBIFS_CHK_IDX_SZ: check index size
171 * UBIFS_CHK_ORPH: check orphans
172 * UBIFS_CHK_OLD_IDX: check the old index
173 * UBIFS_CHK_LPROPS: check lprops
174 * UBIFS_CHK_FS: check the file-system
175 */
176enum {
177 UBIFS_CHK_GEN = 0x1,
178 UBIFS_CHK_TNC = 0x2,
179 UBIFS_CHK_IDX_SZ = 0x4,
180 UBIFS_CHK_ORPH = 0x8,
181 UBIFS_CHK_OLD_IDX = 0x10,
182 UBIFS_CHK_LPROPS = 0x20,
183 UBIFS_CHK_FS = 0x40,
184};
185
186/*
187 * Special testing flags.
188 *
189 * UBIFS_TST_RCVRY: failure mode for recovery testing
190 */
191enum {
192 UBIFS_TST_RCVRY = 0x4,
193};
194
195extern spinlock_t dbg_lock;
196 222
197extern unsigned int ubifs_msg_flags; 223static inline int dbg_is_chk_gen(const struct ubifs_info *c)
198extern unsigned int ubifs_chk_flags; 224{
199extern unsigned int ubifs_tst_flags; 225 return !!(ubifs_dbg.chk_gen || c->dbg->chk_gen);
226}
227static inline int dbg_is_chk_index(const struct ubifs_info *c)
228{
229 return !!(ubifs_dbg.chk_index || c->dbg->chk_index);
230}
231static inline int dbg_is_chk_orph(const struct ubifs_info *c)
232{
233 return !!(ubifs_dbg.chk_orph || c->dbg->chk_orph);
234}
235static inline int dbg_is_chk_lprops(const struct ubifs_info *c)
236{
237 return !!(ubifs_dbg.chk_lprops || c->dbg->chk_lprops);
238}
239static inline int dbg_is_chk_fs(const struct ubifs_info *c)
240{
241 return !!(ubifs_dbg.chk_fs || c->dbg->chk_fs);
242}
243static inline int dbg_is_tst_rcvry(const struct ubifs_info *c)
244{
245 return !!(ubifs_dbg.tst_rcvry || c->dbg->tst_rcvry);
246}
247static inline int dbg_is_power_cut(const struct ubifs_info *c)
248{
249 return !!c->dbg->pc_happened;
250}
200 251
201int ubifs_debugging_init(struct ubifs_info *c); 252int ubifs_debugging_init(struct ubifs_info *c);
202void ubifs_debugging_exit(struct ubifs_info *c); 253void ubifs_debugging_exit(struct ubifs_info *c);
@@ -207,7 +258,7 @@ const char *dbg_cstate(int cmt_state);
207const char *dbg_jhead(int jhead); 258const char *dbg_jhead(int jhead);
208const char *dbg_get_key_dump(const struct ubifs_info *c, 259const char *dbg_get_key_dump(const struct ubifs_info *c,
209 const union ubifs_key *key); 260 const union ubifs_key *key);
210void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); 261void dbg_dump_inode(struct ubifs_info *c, const struct inode *inode);
211void dbg_dump_node(const struct ubifs_info *c, const void *node); 262void dbg_dump_node(const struct ubifs_info *c, const void *node);
212void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, 263void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
213 int offs); 264 int offs);
@@ -240,8 +291,8 @@ int dbg_check_cats(struct ubifs_info *c);
240int dbg_check_ltab(struct ubifs_info *c); 291int dbg_check_ltab(struct ubifs_info *c);
241int dbg_chk_lpt_free_spc(struct ubifs_info *c); 292int dbg_chk_lpt_free_spc(struct ubifs_info *c);
242int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len); 293int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len);
243int dbg_check_synced_i_size(struct inode *inode); 294int dbg_check_synced_i_size(const struct ubifs_info *c, struct inode *inode);
244int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); 295int dbg_check_dir(struct ubifs_info *c, const struct inode *dir);
245int dbg_check_tnc(struct ubifs_info *c, int extra); 296int dbg_check_tnc(struct ubifs_info *c, int extra);
246int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); 297int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
247int dbg_check_filesystem(struct ubifs_info *c); 298int dbg_check_filesystem(struct ubifs_info *c);
@@ -254,54 +305,12 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
254int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); 305int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
255int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); 306int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
256 307
257/* Force the use of in-the-gaps method for testing */ 308int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
258static inline int dbg_force_in_the_gaps_enabled(void) 309 int len, int dtype);
259{ 310int dbg_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len,
260 return ubifs_chk_flags & UBIFS_CHK_GEN; 311 int dtype);
261} 312int dbg_leb_unmap(struct ubifs_info *c, int lnum);
262int dbg_force_in_the_gaps(void); 313int dbg_leb_map(struct ubifs_info *c, int lnum, int dtype);
263
264/* Failure mode for recovery testing */
265#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
266
267#ifndef UBIFS_DBG_PRESERVE_UBI
268#define ubi_leb_read dbg_leb_read
269#define ubi_leb_write dbg_leb_write
270#define ubi_leb_change dbg_leb_change
271#define ubi_leb_erase dbg_leb_erase
272#define ubi_leb_unmap dbg_leb_unmap
273#define ubi_is_mapped dbg_is_mapped
274#define ubi_leb_map dbg_leb_map
275#endif
276
277int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
278 int len, int check);
279int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
280 int offset, int len, int dtype);
281int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
282 int len, int dtype);
283int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum);
284int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum);
285int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum);
286int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
287
288static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf,
289 int offset, int len)
290{
291 return dbg_leb_read(desc, lnum, buf, offset, len, 0);
292}
293
294static inline int dbg_write(struct ubi_volume_desc *desc, int lnum,
295 const void *buf, int offset, int len)
296{
297 return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN);
298}
299
300static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
301 const void *buf, int len)
302{
303 return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN);
304}
305 314
306/* Debugfs-related stuff */ 315/* Debugfs-related stuff */
307int dbg_debugfs_init(void); 316int dbg_debugfs_init(void);
@@ -313,7 +322,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
313 322
314/* Use "if (0)" to make compiler check arguments even if debugging is off */ 323/* Use "if (0)" to make compiler check arguments even if debugging is off */
315#define ubifs_assert(expr) do { \ 324#define ubifs_assert(expr) do { \
316 if (0 && (expr)) \ 325 if (0) \
317 printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ 326 printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
318 __func__, __LINE__, current->pid); \ 327 __func__, __LINE__, current->pid); \
319} while (0) 328} while (0)
@@ -323,6 +332,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
323 ubifs_err(fmt, ##__VA_ARGS__); \ 332 ubifs_err(fmt, ##__VA_ARGS__); \
324} while (0) 333} while (0)
325 334
335#define DBGKEY(key) ((char *)(key))
336#define DBGKEY1(key) ((char *)(key))
337
326#define ubifs_dbg_msg(fmt, ...) do { \ 338#define ubifs_dbg_msg(fmt, ...) do { \
327 if (0) \ 339 if (0) \
328 pr_debug(fmt "\n", ##__VA_ARGS__); \ 340 pr_debug(fmt "\n", ##__VA_ARGS__); \
@@ -346,9 +358,6 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
346#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 358#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
347#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) 359#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
348 360
349#define DBGKEY(key) ((char *)(key))
350#define DBGKEY1(key) ((char *)(key))
351
352static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; } 361static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; }
353static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; } 362static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; }
354static inline const char *dbg_ntype(int type) { return ""; } 363static inline const char *dbg_ntype(int type) { return ""; }
@@ -357,7 +366,7 @@ static inline const char *dbg_jhead(int jhead) { return ""; }
357static inline const char * 366static inline const char *
358dbg_get_key_dump(const struct ubifs_info *c, 367dbg_get_key_dump(const struct ubifs_info *c,
359 const union ubifs_key *key) { return ""; } 368 const union ubifs_key *key) { return ""; }
360static inline void dbg_dump_inode(const struct ubifs_info *c, 369static inline void dbg_dump_inode(struct ubifs_info *c,
361 const struct inode *inode) { return; } 370 const struct inode *inode) { return; }
362static inline void dbg_dump_node(const struct ubifs_info *c, 371static inline void dbg_dump_node(const struct ubifs_info *c,
363 const void *node) { return; } 372 const void *node) { return; }
@@ -409,9 +418,11 @@ static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; }
409static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; } 418static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; }
410static inline int dbg_chk_lpt_sz(struct ubifs_info *c, 419static inline int dbg_chk_lpt_sz(struct ubifs_info *c,
411 int action, int len) { return 0; } 420 int action, int len) { return 0; }
412static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; } 421static inline int
413static inline int dbg_check_dir_size(struct ubifs_info *c, 422dbg_check_synced_i_size(const struct ubifs_info *c,
414 const struct inode *dir) { return 0; } 423 struct inode *inode) { return 0; }
424static inline int dbg_check_dir(struct ubifs_info *c,
425 const struct inode *dir) { return 0; }
415static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; } 426static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; }
416static inline int dbg_check_idx_size(struct ubifs_info *c, 427static inline int dbg_check_idx_size(struct ubifs_info *c,
417 long long idx_size) { return 0; } 428 long long idx_size) { return 0; }
@@ -431,9 +442,23 @@ static inline int
431dbg_check_nondata_nodes_order(struct ubifs_info *c, 442dbg_check_nondata_nodes_order(struct ubifs_info *c,
432 struct list_head *head) { return 0; } 443 struct list_head *head) { return 0; }
433 444
434static inline int dbg_force_in_the_gaps(void) { return 0; } 445static inline int dbg_leb_write(struct ubifs_info *c, int lnum,
435#define dbg_force_in_the_gaps_enabled() 0 446 const void *buf, int offset,
436#define dbg_failure_mode 0 447 int len, int dtype) { return 0; }
448static inline int dbg_leb_change(struct ubifs_info *c, int lnum,
449 const void *buf, int len,
450 int dtype) { return 0; }
451static inline int dbg_leb_unmap(struct ubifs_info *c, int lnum) { return 0; }
452static inline int dbg_leb_map(struct ubifs_info *c, int lnum,
453 int dtype) { return 0; }
454
455static inline int dbg_is_chk_gen(const struct ubifs_info *c) { return 0; }
456static inline int dbg_is_chk_index(const struct ubifs_info *c) { return 0; }
457static inline int dbg_is_chk_orph(const struct ubifs_info *c) { return 0; }
458static inline int dbg_is_chk_lprops(const struct ubifs_info *c) { return 0; }
459static inline int dbg_is_chk_fs(const struct ubifs_info *c) { return 0; }
460static inline int dbg_is_tst_rcvry(const struct ubifs_info *c) { return 0; }
461static inline int dbg_is_power_cut(const struct ubifs_info *c) { return 0; }
437 462
438static inline int dbg_debugfs_init(void) { return 0; } 463static inline int dbg_debugfs_init(void) { return 0; }
439static inline void dbg_debugfs_exit(void) { return; } 464static inline void dbg_debugfs_exit(void) { return; }
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index ef5abd38f0bf..683492043317 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -102,7 +102,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
102 * UBIFS has to fully control "clean <-> dirty" transitions of inodes 102 * UBIFS has to fully control "clean <-> dirty" transitions of inodes
103 * to make budgeting work. 103 * to make budgeting work.
104 */ 104 */
105 inode->i_flags |= (S_NOCMTIME); 105 inode->i_flags |= S_NOCMTIME;
106 106
107 inode_init_owner(inode, dir, mode); 107 inode_init_owner(inode, dir, mode);
108 inode->i_mtime = inode->i_atime = inode->i_ctime = 108 inode->i_mtime = inode->i_atime = inode->i_ctime =
@@ -172,9 +172,11 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
172 172
173#ifdef CONFIG_UBIFS_FS_DEBUG 173#ifdef CONFIG_UBIFS_FS_DEBUG
174 174
175static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm) 175static int dbg_check_name(const struct ubifs_info *c,
176 const struct ubifs_dent_node *dent,
177 const struct qstr *nm)
176{ 178{
177 if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) 179 if (!dbg_is_chk_gen(c))
178 return 0; 180 return 0;
179 if (le16_to_cpu(dent->nlen) != nm->len) 181 if (le16_to_cpu(dent->nlen) != nm->len)
180 return -EINVAL; 182 return -EINVAL;
@@ -185,7 +187,7 @@ static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm)
185 187
186#else 188#else
187 189
188#define dbg_check_name(dent, nm) 0 190#define dbg_check_name(c, dent, nm) 0
189 191
190#endif 192#endif
191 193
@@ -219,7 +221,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
219 goto out; 221 goto out;
220 } 222 }
221 223
222 if (dbg_check_name(dent, &dentry->d_name)) { 224 if (dbg_check_name(c, dent, &dentry->d_name)) {
223 err = -EINVAL; 225 err = -EINVAL;
224 goto out; 226 goto out;
225 } 227 }
@@ -522,7 +524,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
522 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 524 ubifs_assert(mutex_is_locked(&dir->i_mutex));
523 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 525 ubifs_assert(mutex_is_locked(&inode->i_mutex));
524 526
525 err = dbg_check_synced_i_size(inode); 527 err = dbg_check_synced_i_size(c, inode);
526 if (err) 528 if (err)
527 return err; 529 return err;
528 530
@@ -577,7 +579,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
577 inode->i_nlink, dir->i_ino); 579 inode->i_nlink, dir->i_ino);
578 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 580 ubifs_assert(mutex_is_locked(&dir->i_mutex));
579 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 581 ubifs_assert(mutex_is_locked(&inode->i_mutex));
580 err = dbg_check_synced_i_size(inode); 582 err = dbg_check_synced_i_size(c, inode);
581 if (err) 583 if (err)
582 return err; 584 return err;
583 585
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 5e7fccfc4b29..f9c234bf33d3 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1263,7 +1263,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
1263 if (err) 1263 if (err)
1264 return err; 1264 return err;
1265 1265
1266 err = dbg_check_synced_i_size(inode); 1266 err = dbg_check_synced_i_size(c, inode);
1267 if (err) 1267 if (err)
1268 return err; 1268 return err;
1269 1269
@@ -1304,7 +1304,7 @@ static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd)
1304 return NULL; 1304 return NULL;
1305} 1305}
1306 1306
1307int ubifs_fsync(struct file *file, int datasync) 1307int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1308{ 1308{
1309 struct inode *inode = file->f_mapping->host; 1309 struct inode *inode = file->f_mapping->host;
1310 struct ubifs_info *c = inode->i_sb->s_fs_info; 1310 struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -1319,14 +1319,16 @@ int ubifs_fsync(struct file *file, int datasync)
1319 */ 1319 */
1320 return 0; 1320 return 0;
1321 1321
1322 /* 1322 err = filemap_write_and_wait_range(inode->i_mapping, start, end);
1323 * VFS has already synchronized dirty pages for this inode. Synchronize 1323 if (err)
1324 * the inode unless this is a 'datasync()' call. 1324 return err;
1325 */ 1325 mutex_lock(&inode->i_mutex);
1326
1327 /* Synchronize the inode unless this is a 'datasync()' call. */
1326 if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { 1328 if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
1327 err = inode->i_sb->s_op->write_inode(inode, NULL); 1329 err = inode->i_sb->s_op->write_inode(inode, NULL);
1328 if (err) 1330 if (err)
1329 return err; 1331 goto out;
1330 } 1332 }
1331 1333
1332 /* 1334 /*
@@ -1334,10 +1336,9 @@ int ubifs_fsync(struct file *file, int datasync)
1334 * them. 1336 * them.
1335 */ 1337 */
1336 err = ubifs_sync_wbufs_by_inode(c, inode); 1338 err = ubifs_sync_wbufs_by_inode(c, inode);
1337 if (err) 1339out:
1338 return err; 1340 mutex_unlock(&inode->i_mutex);
1339 1341 return err;
1340 return 0;
1341} 1342}
1342 1343
1343/** 1344/**
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 3be645e012c9..9228950a658f 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -86,8 +86,125 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
86 c->no_chk_data_crc = 0; 86 c->no_chk_data_crc = 0;
87 c->vfs_sb->s_flags |= MS_RDONLY; 87 c->vfs_sb->s_flags |= MS_RDONLY;
88 ubifs_warn("switched to read-only mode, error %d", err); 88 ubifs_warn("switched to read-only mode, error %d", err);
89 dump_stack();
90 }
91}
92
93/*
94 * Below are simple wrappers over UBI I/O functions which include some
95 * additional checks and UBIFS debugging stuff. See corresponding UBI function
96 * for more information.
97 */
98
99int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
100 int len, int even_ebadmsg)
101{
102 int err;
103
104 err = ubi_read(c->ubi, lnum, buf, offs, len);
105 /*
106 * In case of %-EBADMSG print the error message only if the
107 * @even_ebadmsg is true.
108 */
109 if (err && (err != -EBADMSG || even_ebadmsg)) {
110 ubifs_err("reading %d bytes from LEB %d:%d failed, error %d",
111 len, lnum, offs, err);
112 dbg_dump_stack();
113 }
114 return err;
115}
116
117int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
118 int len, int dtype)
119{
120 int err;
121
122 ubifs_assert(!c->ro_media && !c->ro_mount);
123 if (c->ro_error)
124 return -EROFS;
125 if (!dbg_is_tst_rcvry(c))
126 err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
127 else
128 err = dbg_leb_write(c, lnum, buf, offs, len, dtype);
129 if (err) {
130 ubifs_err("writing %d bytes to LEB %d:%d failed, error %d",
131 len, lnum, offs, err);
132 ubifs_ro_mode(c, err);
133 dbg_dump_stack();
134 }
135 return err;
136}
137
138int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len,
139 int dtype)
140{
141 int err;
142
143 ubifs_assert(!c->ro_media && !c->ro_mount);
144 if (c->ro_error)
145 return -EROFS;
146 if (!dbg_is_tst_rcvry(c))
147 err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
148 else
149 err = dbg_leb_change(c, lnum, buf, len, dtype);
150 if (err) {
151 ubifs_err("changing %d bytes in LEB %d failed, error %d",
152 len, lnum, err);
153 ubifs_ro_mode(c, err);
154 dbg_dump_stack();
155 }
156 return err;
157}
158
159int ubifs_leb_unmap(struct ubifs_info *c, int lnum)
160{
161 int err;
162
163 ubifs_assert(!c->ro_media && !c->ro_mount);
164 if (c->ro_error)
165 return -EROFS;
166 if (!dbg_is_tst_rcvry(c))
167 err = ubi_leb_unmap(c->ubi, lnum);
168 else
169 err = dbg_leb_unmap(c, lnum);
170 if (err) {
171 ubifs_err("unmap LEB %d failed, error %d", lnum, err);
172 ubifs_ro_mode(c, err);
173 dbg_dump_stack();
174 }
175 return err;
176}
177
178int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype)
179{
180 int err;
181
182 ubifs_assert(!c->ro_media && !c->ro_mount);
183 if (c->ro_error)
184 return -EROFS;
185 if (!dbg_is_tst_rcvry(c))
186 err = ubi_leb_map(c->ubi, lnum, dtype);
187 else
188 err = dbg_leb_map(c, lnum, dtype);
189 if (err) {
190 ubifs_err("mapping LEB %d failed, error %d", lnum, err);
191 ubifs_ro_mode(c, err);
192 dbg_dump_stack();
193 }
194 return err;
195}
196
197int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
198{
199 int err;
200
201 err = ubi_is_mapped(c->ubi, lnum);
202 if (err < 0) {
203 ubifs_err("ubi_is_mapped failed for LEB %d, error %d",
204 lnum, err);
89 dbg_dump_stack(); 205 dbg_dump_stack();
90 } 206 }
207 return err;
91} 208}
92 209
93/** 210/**
@@ -406,14 +523,10 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
406 dirt = sync_len - wbuf->used; 523 dirt = sync_len - wbuf->used;
407 if (dirt) 524 if (dirt)
408 ubifs_pad(c, wbuf->buf + wbuf->used, dirt); 525 ubifs_pad(c, wbuf->buf + wbuf->used, dirt);
409 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 526 err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, sync_len,
410 sync_len, wbuf->dtype); 527 wbuf->dtype);
411 if (err) { 528 if (err)
412 ubifs_err("cannot write %d bytes to LEB %d:%d",
413 sync_len, wbuf->lnum, wbuf->offs);
414 dbg_dump_stack();
415 return err; 529 return err;
416 }
417 530
418 spin_lock(&wbuf->lock); 531 spin_lock(&wbuf->lock);
419 wbuf->offs += sync_len; 532 wbuf->offs += sync_len;
@@ -605,9 +718,9 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
605 if (aligned_len == wbuf->avail) { 718 if (aligned_len == wbuf->avail) {
606 dbg_io("flush jhead %s wbuf to LEB %d:%d", 719 dbg_io("flush jhead %s wbuf to LEB %d:%d",
607 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 720 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
608 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, 721 err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf,
609 wbuf->offs, wbuf->size, 722 wbuf->offs, wbuf->size,
610 wbuf->dtype); 723 wbuf->dtype);
611 if (err) 724 if (err)
612 goto out; 725 goto out;
613 726
@@ -642,8 +755,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
642 dbg_io("flush jhead %s wbuf to LEB %d:%d", 755 dbg_io("flush jhead %s wbuf to LEB %d:%d",
643 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 756 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
644 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); 757 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
645 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 758 err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs,
646 wbuf->size, wbuf->dtype); 759 wbuf->size, wbuf->dtype);
647 if (err) 760 if (err)
648 goto out; 761 goto out;
649 762
@@ -661,8 +774,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
661 */ 774 */
662 dbg_io("write %d bytes to LEB %d:%d", 775 dbg_io("write %d bytes to LEB %d:%d",
663 wbuf->size, wbuf->lnum, wbuf->offs); 776 wbuf->size, wbuf->lnum, wbuf->offs);
664 err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs, 777 err = ubifs_leb_write(c, wbuf->lnum, buf, wbuf->offs,
665 wbuf->size, wbuf->dtype); 778 wbuf->size, wbuf->dtype);
666 if (err) 779 if (err)
667 goto out; 780 goto out;
668 781
@@ -683,8 +796,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
683 n <<= c->max_write_shift; 796 n <<= c->max_write_shift;
684 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, 797 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
685 wbuf->offs); 798 wbuf->offs);
686 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, 799 err = ubifs_leb_write(c, wbuf->lnum, buf + written,
687 wbuf->offs, n, wbuf->dtype); 800 wbuf->offs, n, wbuf->dtype);
688 if (err) 801 if (err)
689 goto out; 802 goto out;
690 wbuf->offs += n; 803 wbuf->offs += n;
@@ -766,13 +879,9 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
766 return -EROFS; 879 return -EROFS;
767 880
768 ubifs_prepare_node(c, buf, len, 1); 881 ubifs_prepare_node(c, buf, len, 1);
769 err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype); 882 err = ubifs_leb_write(c, lnum, buf, offs, buf_len, dtype);
770 if (err) { 883 if (err)
771 ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
772 buf_len, lnum, offs, err);
773 dbg_dump_node(c, buf); 884 dbg_dump_node(c, buf);
774 dbg_dump_stack();
775 }
776 885
777 return err; 886 return err;
778} 887}
@@ -824,13 +933,9 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
824 933
825 if (rlen > 0) { 934 if (rlen > 0) {
826 /* Read everything that goes before write-buffer */ 935 /* Read everything that goes before write-buffer */
827 err = ubi_read(c->ubi, lnum, buf, offs, rlen); 936 err = ubifs_leb_read(c, lnum, buf, offs, rlen, 0);
828 if (err && err != -EBADMSG) { 937 if (err && err != -EBADMSG)
829 ubifs_err("failed to read node %d from LEB %d:%d, "
830 "error %d", type, lnum, offs, err);
831 dbg_dump_stack();
832 return err; 938 return err;
833 }
834 } 939 }
835 940
836 if (type != ch->node_type) { 941 if (type != ch->node_type) {
@@ -885,12 +990,9 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
885 ubifs_assert(!(offs & 7) && offs < c->leb_size); 990 ubifs_assert(!(offs & 7) && offs < c->leb_size);
886 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); 991 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
887 992
888 err = ubi_read(c->ubi, lnum, buf, offs, len); 993 err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
889 if (err && err != -EBADMSG) { 994 if (err && err != -EBADMSG)
890 ubifs_err("cannot read node %d from LEB %d:%d, error %d",
891 type, lnum, offs, err);
892 return err; 995 return err;
893 }
894 996
895 if (type != ch->node_type) { 997 if (type != ch->node_type) {
896 ubifs_err("bad node type (%d but expected %d)", 998 ubifs_err("bad node type (%d but expected %d)",
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index affea9494ae2..f9fd068d1ae0 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -262,7 +262,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
262 * an unclean reboot, because the target LEB might have been 262 * an unclean reboot, because the target LEB might have been
263 * unmapped, but not yet physically erased. 263 * unmapped, but not yet physically erased.
264 */ 264 */
265 err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM); 265 err = ubifs_leb_map(c, bud->lnum, UBI_SHORTTERM);
266 if (err) 266 if (err)
267 goto out_unlock; 267 goto out_unlock;
268 } 268 }
@@ -283,8 +283,6 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
283 return 0; 283 return 0;
284 284
285out_unlock: 285out_unlock:
286 if (err != -EAGAIN)
287 ubifs_ro_mode(c, err);
288 mutex_unlock(&c->log_mutex); 286 mutex_unlock(&c->log_mutex);
289 kfree(ref); 287 kfree(ref);
290 kfree(bud); 288 kfree(bud);
@@ -752,7 +750,7 @@ static int dbg_check_bud_bytes(struct ubifs_info *c)
752 struct ubifs_bud *bud; 750 struct ubifs_bud *bud;
753 long long bud_bytes = 0; 751 long long bud_bytes = 0;
754 752
755 if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) 753 if (!dbg_is_chk_gen(c))
756 return 0; 754 return 0;
757 755
758 spin_lock(&c->buds_lock); 756 spin_lock(&c->buds_lock);
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 667884f4a615..f8a181e647cc 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -504,7 +504,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops)
504 pnode = (struct ubifs_pnode *)container_of(lprops - pos, 504 pnode = (struct ubifs_pnode *)container_of(lprops - pos,
505 struct ubifs_pnode, 505 struct ubifs_pnode,
506 lprops[0]); 506 lprops[0]);
507 return !test_bit(COW_ZNODE, &pnode->flags) && 507 return !test_bit(COW_CNODE, &pnode->flags) &&
508 test_bit(DIRTY_CNODE, &pnode->flags); 508 test_bit(DIRTY_CNODE, &pnode->flags);
509} 509}
510 510
@@ -860,7 +860,7 @@ int dbg_check_cats(struct ubifs_info *c)
860 struct list_head *pos; 860 struct list_head *pos;
861 int i, cat; 861 int i, cat;
862 862
863 if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) 863 if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c))
864 return 0; 864 return 0;
865 865
866 list_for_each_entry(lprops, &c->empty_list, list) { 866 list_for_each_entry(lprops, &c->empty_list, list) {
@@ -958,7 +958,7 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
958{ 958{
959 int i = 0, j, err = 0; 959 int i = 0, j, err = 0;
960 960
961 if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS))) 961 if (!dbg_is_chk_gen(c) && !dbg_is_chk_lprops(c))
962 return; 962 return;
963 963
964 for (i = 0; i < heap->cnt; i++) { 964 for (i = 0; i < heap->cnt; i++) {
@@ -1262,7 +1262,7 @@ int dbg_check_lprops(struct ubifs_info *c)
1262 int i, err; 1262 int i, err;
1263 struct ubifs_lp_stats lst; 1263 struct ubifs_lp_stats lst;
1264 1264
1265 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1265 if (!dbg_is_chk_lprops(c))
1266 return 0; 1266 return 0;
1267 1267
1268 /* 1268 /*
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index ef5155e109a2..6189c74d97f0 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -701,8 +701,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
701 alen = ALIGN(len, c->min_io_size); 701 alen = ALIGN(len, c->min_io_size);
702 set_ltab(c, lnum, c->leb_size - alen, alen - len); 702 set_ltab(c, lnum, c->leb_size - alen, alen - len);
703 memset(p, 0xff, alen - len); 703 memset(p, 0xff, alen - len);
704 err = ubi_leb_change(c->ubi, lnum++, buf, alen, 704 err = ubifs_leb_change(c, lnum++, buf, alen,
705 UBI_SHORTTERM); 705 UBI_SHORTTERM);
706 if (err) 706 if (err)
707 goto out; 707 goto out;
708 p = buf; 708 p = buf;
@@ -732,8 +732,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
732 set_ltab(c, lnum, c->leb_size - alen, 732 set_ltab(c, lnum, c->leb_size - alen,
733 alen - len); 733 alen - len);
734 memset(p, 0xff, alen - len); 734 memset(p, 0xff, alen - len);
735 err = ubi_leb_change(c->ubi, lnum++, buf, alen, 735 err = ubifs_leb_change(c, lnum++, buf, alen,
736 UBI_SHORTTERM); 736 UBI_SHORTTERM);
737 if (err) 737 if (err)
738 goto out; 738 goto out;
739 p = buf; 739 p = buf;
@@ -780,8 +780,8 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
780 alen = ALIGN(len, c->min_io_size); 780 alen = ALIGN(len, c->min_io_size);
781 set_ltab(c, lnum, c->leb_size - alen, alen - len); 781 set_ltab(c, lnum, c->leb_size - alen, alen - len);
782 memset(p, 0xff, alen - len); 782 memset(p, 0xff, alen - len);
783 err = ubi_leb_change(c->ubi, lnum++, buf, alen, 783 err = ubifs_leb_change(c, lnum++, buf, alen,
784 UBI_SHORTTERM); 784 UBI_SHORTTERM);
785 if (err) 785 if (err)
786 goto out; 786 goto out;
787 p = buf; 787 p = buf;
@@ -806,7 +806,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
806 alen = ALIGN(len, c->min_io_size); 806 alen = ALIGN(len, c->min_io_size);
807 set_ltab(c, lnum, c->leb_size - alen, alen - len); 807 set_ltab(c, lnum, c->leb_size - alen, alen - len);
808 memset(p, 0xff, alen - len); 808 memset(p, 0xff, alen - len);
809 err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM); 809 err = ubifs_leb_change(c, lnum++, buf, alen, UBI_SHORTTERM);
810 if (err) 810 if (err)
811 goto out; 811 goto out;
812 p = buf; 812 p = buf;
@@ -826,7 +826,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
826 826
827 /* Write remaining buffer */ 827 /* Write remaining buffer */
828 memset(p, 0xff, alen - len); 828 memset(p, 0xff, alen - len);
829 err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM); 829 err = ubifs_leb_change(c, lnum, buf, alen, UBI_SHORTTERM);
830 if (err) 830 if (err)
831 goto out; 831 goto out;
832 832
@@ -1222,7 +1222,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1222 if (c->big_lpt) 1222 if (c->big_lpt)
1223 nnode->num = calc_nnode_num_from_parent(c, parent, iip); 1223 nnode->num = calc_nnode_num_from_parent(c, parent, iip);
1224 } else { 1224 } else {
1225 err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz); 1225 err = ubifs_leb_read(c, lnum, buf, offs, c->nnode_sz, 1);
1226 if (err) 1226 if (err)
1227 goto out; 1227 goto out;
1228 err = ubifs_unpack_nnode(c, buf, nnode); 1228 err = ubifs_unpack_nnode(c, buf, nnode);
@@ -1247,6 +1247,7 @@ int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1247 1247
1248out: 1248out:
1249 ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs); 1249 ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs);
1250 dbg_dump_stack();
1250 kfree(nnode); 1251 kfree(nnode);
1251 return err; 1252 return err;
1252} 1253}
@@ -1290,7 +1291,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1290 lprops->flags = ubifs_categorize_lprops(c, lprops); 1291 lprops->flags = ubifs_categorize_lprops(c, lprops);
1291 } 1292 }
1292 } else { 1293 } else {
1293 err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz); 1294 err = ubifs_leb_read(c, lnum, buf, offs, c->pnode_sz, 1);
1294 if (err) 1295 if (err)
1295 goto out; 1296 goto out;
1296 err = unpack_pnode(c, buf, pnode); 1297 err = unpack_pnode(c, buf, pnode);
@@ -1312,6 +1313,7 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1312out: 1313out:
1313 ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs); 1314 ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs);
1314 dbg_dump_pnode(c, pnode, parent, iip); 1315 dbg_dump_pnode(c, pnode, parent, iip);
1316 dbg_dump_stack();
1315 dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip)); 1317 dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
1316 kfree(pnode); 1318 kfree(pnode);
1317 return err; 1319 return err;
@@ -1331,7 +1333,7 @@ static int read_ltab(struct ubifs_info *c)
1331 buf = vmalloc(c->ltab_sz); 1333 buf = vmalloc(c->ltab_sz);
1332 if (!buf) 1334 if (!buf)
1333 return -ENOMEM; 1335 return -ENOMEM;
1334 err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz); 1336 err = ubifs_leb_read(c, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz, 1);
1335 if (err) 1337 if (err)
1336 goto out; 1338 goto out;
1337 err = unpack_ltab(c, buf); 1339 err = unpack_ltab(c, buf);
@@ -1354,7 +1356,8 @@ static int read_lsave(struct ubifs_info *c)
1354 buf = vmalloc(c->lsave_sz); 1356 buf = vmalloc(c->lsave_sz);
1355 if (!buf) 1357 if (!buf)
1356 return -ENOMEM; 1358 return -ENOMEM;
1357 err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz); 1359 err = ubifs_leb_read(c, c->lsave_lnum, buf, c->lsave_offs,
1360 c->lsave_sz, 1);
1358 if (err) 1361 if (err)
1359 goto out; 1362 goto out;
1360 err = unpack_lsave(c, buf); 1363 err = unpack_lsave(c, buf);
@@ -1814,8 +1817,8 @@ static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c,
1814 if (c->big_lpt) 1817 if (c->big_lpt)
1815 nnode->num = calc_nnode_num_from_parent(c, parent, iip); 1818 nnode->num = calc_nnode_num_from_parent(c, parent, iip);
1816 } else { 1819 } else {
1817 err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, 1820 err = ubifs_leb_read(c, branch->lnum, buf, branch->offs,
1818 c->nnode_sz); 1821 c->nnode_sz, 1);
1819 if (err) 1822 if (err)
1820 return ERR_PTR(err); 1823 return ERR_PTR(err);
1821 err = ubifs_unpack_nnode(c, buf, nnode); 1824 err = ubifs_unpack_nnode(c, buf, nnode);
@@ -1883,8 +1886,8 @@ static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c,
1883 ubifs_assert(branch->lnum >= c->lpt_first && 1886 ubifs_assert(branch->lnum >= c->lpt_first &&
1884 branch->lnum <= c->lpt_last); 1887 branch->lnum <= c->lpt_last);
1885 ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size); 1888 ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size);
1886 err = ubi_read(c->ubi, branch->lnum, buf, branch->offs, 1889 err = ubifs_leb_read(c, branch->lnum, buf, branch->offs,
1887 c->pnode_sz); 1890 c->pnode_sz, 1);
1888 if (err) 1891 if (err)
1889 return ERR_PTR(err); 1892 return ERR_PTR(err);
1890 err = unpack_pnode(c, buf, pnode); 1893 err = unpack_pnode(c, buf, pnode);
@@ -2224,7 +2227,7 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
2224 struct ubifs_cnode *cn; 2227 struct ubifs_cnode *cn;
2225 int num, iip = 0, err; 2228 int num, iip = 0, err;
2226 2229
2227 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 2230 if (!dbg_is_chk_lprops(c))
2228 return 0; 2231 return 0;
2229 2232
2230 while (cnode) { 2233 while (cnode) {
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index dfcb5748a7dc..cddd6bd214f4 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -27,6 +27,7 @@
27 27
28#include <linux/crc16.h> 28#include <linux/crc16.h>
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include <linux/random.h>
30#include "ubifs.h" 31#include "ubifs.h"
31 32
32#ifdef CONFIG_UBIFS_FS_DEBUG 33#ifdef CONFIG_UBIFS_FS_DEBUG
@@ -116,8 +117,8 @@ static int get_cnodes_to_commit(struct ubifs_info *c)
116 return 0; 117 return 0;
117 cnt += 1; 118 cnt += 1;
118 while (1) { 119 while (1) {
119 ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags)); 120 ubifs_assert(!test_bit(COW_CNODE, &cnode->flags));
120 __set_bit(COW_ZNODE, &cnode->flags); 121 __set_bit(COW_CNODE, &cnode->flags);
121 cnext = next_dirty_cnode(cnode); 122 cnext = next_dirty_cnode(cnode);
122 if (!cnext) { 123 if (!cnext) {
123 cnode->cnext = c->lpt_cnext; 124 cnode->cnext = c->lpt_cnext;
@@ -465,7 +466,7 @@ static int write_cnodes(struct ubifs_info *c)
465 */ 466 */
466 clear_bit(DIRTY_CNODE, &cnode->flags); 467 clear_bit(DIRTY_CNODE, &cnode->flags);
467 smp_mb__before_clear_bit(); 468 smp_mb__before_clear_bit();
468 clear_bit(COW_ZNODE, &cnode->flags); 469 clear_bit(COW_CNODE, &cnode->flags);
469 smp_mb__after_clear_bit(); 470 smp_mb__after_clear_bit();
470 offs += len; 471 offs += len;
471 dbg_chk_lpt_sz(c, 1, len); 472 dbg_chk_lpt_sz(c, 1, len);
@@ -1160,11 +1161,11 @@ static int lpt_gc_lnum(struct ubifs_info *c, int lnum)
1160 void *buf = c->lpt_buf; 1161 void *buf = c->lpt_buf;
1161 1162
1162 dbg_lp("LEB %d", lnum); 1163 dbg_lp("LEB %d", lnum);
1163 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); 1164
1164 if (err) { 1165 err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
1165 ubifs_err("cannot read LEB %d, error %d", lnum, err); 1166 if (err)
1166 return err; 1167 return err;
1167 } 1168
1168 while (1) { 1169 while (1) {
1169 if (!is_a_node(c, buf, len)) { 1170 if (!is_a_node(c, buf, len)) {
1170 int pad_len; 1171 int pad_len;
@@ -1640,7 +1641,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1640 int ret; 1641 int ret;
1641 void *buf, *p; 1642 void *buf, *p;
1642 1643
1643 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1644 if (!dbg_is_chk_lprops(c))
1644 return 0; 1645 return 0;
1645 1646
1646 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); 1647 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
@@ -1650,11 +1651,11 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1650 } 1651 }
1651 1652
1652 dbg_lp("LEB %d", lnum); 1653 dbg_lp("LEB %d", lnum);
1653 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); 1654
1654 if (err) { 1655 err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
1655 dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); 1656 if (err)
1656 goto out; 1657 goto out;
1657 } 1658
1658 while (1) { 1659 while (1) {
1659 if (!is_a_node(c, p, len)) { 1660 if (!is_a_node(c, p, len)) {
1660 int i, pad_len; 1661 int i, pad_len;
@@ -1711,7 +1712,7 @@ int dbg_check_ltab(struct ubifs_info *c)
1711{ 1712{
1712 int lnum, err, i, cnt; 1713 int lnum, err, i, cnt;
1713 1714
1714 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1715 if (!dbg_is_chk_lprops(c))
1715 return 0; 1716 return 0;
1716 1717
1717 /* Bring the entire tree into memory */ 1718 /* Bring the entire tree into memory */
@@ -1754,7 +1755,7 @@ int dbg_chk_lpt_free_spc(struct ubifs_info *c)
1754 long long free = 0; 1755 long long free = 0;
1755 int i; 1756 int i;
1756 1757
1757 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1758 if (!dbg_is_chk_lprops(c))
1758 return 0; 1759 return 0;
1759 1760
1760 for (i = 0; i < c->lpt_lebs; i++) { 1761 for (i = 0; i < c->lpt_lebs; i++) {
@@ -1796,7 +1797,7 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1796 long long chk_lpt_sz, lpt_sz; 1797 long long chk_lpt_sz, lpt_sz;
1797 int err = 0; 1798 int err = 0;
1798 1799
1799 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1800 if (!dbg_is_chk_lprops(c))
1800 return 0; 1801 return 0;
1801 1802
1802 switch (action) { 1803 switch (action) {
@@ -1901,11 +1902,10 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1901 return; 1902 return;
1902 } 1903 }
1903 1904
1904 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); 1905 err = ubifs_leb_read(c, lnum, buf, 0, c->leb_size, 1);
1905 if (err) { 1906 if (err)
1906 ubifs_err("cannot read LEB %d, error %d", lnum, err);
1907 goto out; 1907 goto out;
1908 } 1908
1909 while (1) { 1909 while (1) {
1910 offs = c->leb_size - len; 1910 offs = c->leb_size - len;
1911 if (!is_a_node(c, p, len)) { 1911 if (!is_a_node(c, p, len)) {
@@ -2019,7 +2019,7 @@ static int dbg_populate_lsave(struct ubifs_info *c)
2019 struct ubifs_lpt_heap *heap; 2019 struct ubifs_lpt_heap *heap;
2020 int i; 2020 int i;
2021 2021
2022 if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) 2022 if (!dbg_is_chk_gen(c))
2023 return 0; 2023 return 0;
2024 if (random32() & 3) 2024 if (random32() & 3)
2025 return 0; 2025 return 0;
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 0b5296a9a4c5..ee7cb5ebb6e8 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -39,6 +39,29 @@ static inline int ubifs_zn_dirty(const struct ubifs_znode *znode)
39} 39}
40 40
41/** 41/**
42 * ubifs_zn_obsolete - check if znode is obsolete.
43 * @znode: znode to check
44 *
45 * This helper function returns %1 if @znode is obsolete and %0 otherwise.
46 */
47static inline int ubifs_zn_obsolete(const struct ubifs_znode *znode)
48{
49 return !!test_bit(OBSOLETE_ZNODE, &znode->flags);
50}
51
52/**
53 * ubifs_zn_cow - check if znode has to be copied on write.
54 * @znode: znode to check
55 *
56 * This helper function returns %1 if @znode is has COW flag set and %0
57 * otherwise.
58 */
59static inline int ubifs_zn_cow(const struct ubifs_znode *znode)
60{
61 return !!test_bit(COW_ZNODE, &znode->flags);
62}
63
64/**
42 * ubifs_wake_up_bgt - wake up background thread. 65 * ubifs_wake_up_bgt - wake up background thread.
43 * @c: UBIFS file-system description object 66 * @c: UBIFS file-system description object
44 */ 67 */
@@ -122,86 +145,6 @@ static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf)
122} 145}
123 146
124/** 147/**
125 * ubifs_leb_unmap - unmap an LEB.
126 * @c: UBIFS file-system description object
127 * @lnum: LEB number to unmap
128 *
129 * This function returns %0 on success and a negative error code on failure.
130 */
131static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum)
132{
133 int err;
134
135 ubifs_assert(!c->ro_media && !c->ro_mount);
136 if (c->ro_error)
137 return -EROFS;
138 err = ubi_leb_unmap(c->ubi, lnum);
139 if (err) {
140 ubifs_err("unmap LEB %d failed, error %d", lnum, err);
141 return err;
142 }
143
144 return 0;
145}
146
147/**
148 * ubifs_leb_write - write to a LEB.
149 * @c: UBIFS file-system description object
150 * @lnum: LEB number to write
151 * @buf: buffer to write from
152 * @offs: offset within LEB to write to
153 * @len: length to write
154 * @dtype: data type
155 *
156 * This function returns %0 on success and a negative error code on failure.
157 */
158static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum,
159 const void *buf, int offs, int len, int dtype)
160{
161 int err;
162
163 ubifs_assert(!c->ro_media && !c->ro_mount);
164 if (c->ro_error)
165 return -EROFS;
166 err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
167 if (err) {
168 ubifs_err("writing %d bytes at %d:%d, error %d",
169 len, lnum, offs, err);
170 return err;
171 }
172
173 return 0;
174}
175
176/**
177 * ubifs_leb_change - atomic LEB change.
178 * @c: UBIFS file-system description object
179 * @lnum: LEB number to write
180 * @buf: buffer to write from
181 * @len: length to write
182 * @dtype: data type
183 *
184 * This function returns %0 on success and a negative error code on failure.
185 */
186static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum,
187 const void *buf, int len, int dtype)
188{
189 int err;
190
191 ubifs_assert(!c->ro_media && !c->ro_mount);
192 if (c->ro_error)
193 return -EROFS;
194 err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
195 if (err) {
196 ubifs_err("changing %d bytes in LEB %d, error %d",
197 len, lnum, err);
198 return err;
199 }
200
201 return 0;
202}
203
204/**
205 * ubifs_encode_dev - encode device node IDs. 148 * ubifs_encode_dev - encode device node IDs.
206 * @dev: UBIFS device node information 149 * @dev: UBIFS device node information
207 * @rdev: device IDs to encode 150 * @rdev: device IDs to encode
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index a5422fffbd69..c542c73cfa3c 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -929,7 +929,7 @@ static int dbg_check_orphans(struct ubifs_info *c)
929 struct check_info ci; 929 struct check_info ci;
930 int err; 930 int err;
931 931
932 if (!(ubifs_chk_flags & UBIFS_CHK_ORPH)) 932 if (!dbg_is_chk_orph(c))
933 return 0; 933 return 0;
934 934
935 ci.last_ino = 0; 935 ci.last_ino = 0;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 783d8e0beb76..af02790d9328 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -117,7 +117,7 @@ static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
117 if (!sbuf) 117 if (!sbuf)
118 return -ENOMEM; 118 return -ENOMEM;
119 119
120 err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size); 120 err = ubifs_leb_read(c, lnum, sbuf, 0, c->leb_size, 0);
121 if (err && err != -EBADMSG) 121 if (err && err != -EBADMSG)
122 goto out_free; 122 goto out_free;
123 123
@@ -213,10 +213,10 @@ static int write_rcvrd_mst_node(struct ubifs_info *c,
213 mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY); 213 mst->flags |= cpu_to_le32(UBIFS_MST_RCVRY);
214 214
215 ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1); 215 ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
216 err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM); 216 err = ubifs_leb_change(c, lnum, mst, sz, UBI_SHORTTERM);
217 if (err) 217 if (err)
218 goto out; 218 goto out;
219 err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM); 219 err = ubifs_leb_change(c, lnum + 1, mst, sz, UBI_SHORTTERM);
220 if (err) 220 if (err)
221 goto out; 221 goto out;
222out: 222out:
@@ -274,7 +274,8 @@ int ubifs_recover_master_node(struct ubifs_info *c)
274 if (cor1) 274 if (cor1)
275 goto out_err; 275 goto out_err;
276 mst = mst1; 276 mst = mst1;
277 } else if (offs1 == 0 && offs2 + sz >= c->leb_size) { 277 } else if (offs1 == 0 &&
278 c->leb_size - offs2 - sz < sz) {
278 /* 1st LEB was unmapped and written, 2nd not */ 279 /* 1st LEB was unmapped and written, 2nd not */
279 if (cor1) 280 if (cor1)
280 goto out_err; 281 goto out_err;
@@ -539,8 +540,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
539 int len = ALIGN(endpt, c->min_io_size); 540 int len = ALIGN(endpt, c->min_io_size);
540 541
541 if (start) { 542 if (start) {
542 err = ubi_read(c->ubi, lnum, sleb->buf, 0, 543 err = ubifs_leb_read(c, lnum, sleb->buf, 0,
543 start); 544 start, 1);
544 if (err) 545 if (err)
545 return err; 546 return err;
546 } 547 }
@@ -554,8 +555,8 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
554 ubifs_pad(c, buf, pad_len); 555 ubifs_pad(c, buf, pad_len);
555 } 556 }
556 } 557 }
557 err = ubi_leb_change(c->ubi, lnum, sleb->buf, len, 558 err = ubifs_leb_change(c, lnum, sleb->buf, len,
558 UBI_UNKNOWN); 559 UBI_UNKNOWN);
559 if (err) 560 if (err)
560 return err; 561 return err;
561 } 562 }
@@ -819,7 +820,8 @@ static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
819 return -ENOMEM; 820 return -ENOMEM;
820 if (c->leb_size - offs < UBIFS_CS_NODE_SZ) 821 if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
821 goto out_err; 822 goto out_err;
822 err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ); 823 err = ubifs_leb_read(c, lnum, (void *)cs_node, offs,
824 UBIFS_CS_NODE_SZ, 0);
823 if (err && err != -EBADMSG) 825 if (err && err != -EBADMSG)
824 goto out_free; 826 goto out_free;
825 ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0); 827 ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
@@ -919,8 +921,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
919 * 921 *
920 * This function returns %0 on success and a negative error code on failure. 922 * This function returns %0 on success and a negative error code on failure.
921 */ 923 */
922static int recover_head(const struct ubifs_info *c, int lnum, int offs, 924static int recover_head(struct ubifs_info *c, int lnum, int offs, void *sbuf)
923 void *sbuf)
924{ 925{
925 int len = c->max_write_size, err; 926 int len = c->max_write_size, err;
926 927
@@ -931,15 +932,15 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs,
931 return 0; 932 return 0;
932 933
933 /* Read at the head location and check it is empty flash */ 934 /* Read at the head location and check it is empty flash */
934 err = ubi_read(c->ubi, lnum, sbuf, offs, len); 935 err = ubifs_leb_read(c, lnum, sbuf, offs, len, 1);
935 if (err || !is_empty(sbuf, len)) { 936 if (err || !is_empty(sbuf, len)) {
936 dbg_rcvry("cleaning head at %d:%d", lnum, offs); 937 dbg_rcvry("cleaning head at %d:%d", lnum, offs);
937 if (offs == 0) 938 if (offs == 0)
938 return ubifs_leb_unmap(c, lnum); 939 return ubifs_leb_unmap(c, lnum);
939 err = ubi_read(c->ubi, lnum, sbuf, 0, offs); 940 err = ubifs_leb_read(c, lnum, sbuf, 0, offs, 1);
940 if (err) 941 if (err)
941 return err; 942 return err;
942 return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN); 943 return ubifs_leb_change(c, lnum, sbuf, offs, UBI_UNKNOWN);
943 } 944 }
944 945
945 return 0; 946 return 0;
@@ -962,7 +963,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs,
962 * 963 *
963 * This function returns %0 on success and a negative error code on failure. 964 * This function returns %0 on success and a negative error code on failure.
964 */ 965 */
965int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) 966int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf)
966{ 967{
967 int err; 968 int err;
968 969
@@ -993,7 +994,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
993 * 994 *
994 * This function returns %0 on success and a negative error code on failure. 995 * This function returns %0 on success and a negative error code on failure.
995 */ 996 */
996static int clean_an_unclean_leb(const struct ubifs_info *c, 997static int clean_an_unclean_leb(struct ubifs_info *c,
997 struct ubifs_unclean_leb *ucleb, void *sbuf) 998 struct ubifs_unclean_leb *ucleb, void *sbuf)
998{ 999{
999 int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1; 1000 int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
@@ -1009,7 +1010,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c,
1009 return 0; 1010 return 0;
1010 } 1011 }
1011 1012
1012 err = ubi_read(c->ubi, lnum, buf, offs, len); 1013 err = ubifs_leb_read(c, lnum, buf, offs, len, 0);
1013 if (err && err != -EBADMSG) 1014 if (err && err != -EBADMSG)
1014 return err; 1015 return err;
1015 1016
@@ -1069,7 +1070,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c,
1069 } 1070 }
1070 1071
1071 /* Write back the LEB atomically */ 1072 /* Write back the LEB atomically */
1072 err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN); 1073 err = ubifs_leb_change(c, lnum, sbuf, len, UBI_UNKNOWN);
1073 if (err) 1074 if (err)
1074 return err; 1075 return err;
1075 1076
@@ -1089,7 +1090,7 @@ static int clean_an_unclean_leb(const struct ubifs_info *c,
1089 * 1090 *
1090 * This function returns %0 on success and a negative error code on failure. 1091 * This function returns %0 on success and a negative error code on failure.
1091 */ 1092 */
1092int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) 1093int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf)
1093{ 1094{
1094 dbg_rcvry("recovery"); 1095 dbg_rcvry("recovery");
1095 while (!list_empty(&c->unclean_leb_list)) { 1096 while (!list_empty(&c->unclean_leb_list)) {
@@ -1454,7 +1455,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1454 if (i_size >= e->d_size) 1455 if (i_size >= e->d_size)
1455 return 0; 1456 return 0;
1456 /* Read the LEB */ 1457 /* Read the LEB */
1457 err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size); 1458 err = ubifs_leb_read(c, lnum, c->sbuf, 0, c->leb_size, 1);
1458 if (err) 1459 if (err)
1459 goto out; 1460 goto out;
1460 /* Change the size field and recalculate the CRC */ 1461 /* Change the size field and recalculate the CRC */
@@ -1470,7 +1471,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1470 len -= 1; 1471 len -= 1;
1471 len = ALIGN(len + 1, c->min_io_size); 1472 len = ALIGN(len + 1, c->min_io_size);
1472 /* Atomically write the fixed LEB back again */ 1473 /* Atomically write the fixed LEB back again */
1473 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); 1474 err = ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN);
1474 if (err) 1475 if (err)
1475 goto out; 1476 goto out;
1476 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", 1477 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 5e97161ce4d3..ccabaf1164b3 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -523,8 +523,7 @@ static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
523 if (!list_is_last(&next->list, &jh->buds_list)) 523 if (!list_is_last(&next->list, &jh->buds_list))
524 return 0; 524 return 0;
525 525
526 err = ubi_read(c->ubi, next->lnum, (char *)&data, 526 err = ubifs_leb_read(c, next->lnum, (char *)&data, next->start, 4, 1);
527 next->start, 4);
528 if (err) 527 if (err)
529 return 0; 528 return 0;
530 529
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index c606f010e8df..93d938ad3d2a 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -674,15 +674,15 @@ static int fixup_leb(struct ubifs_info *c, int lnum, int len)
674 674
675 if (len == 0) { 675 if (len == 0) {
676 dbg_mnt("unmap empty LEB %d", lnum); 676 dbg_mnt("unmap empty LEB %d", lnum);
677 return ubi_leb_unmap(c->ubi, lnum); 677 return ubifs_leb_unmap(c, lnum);
678 } 678 }
679 679
680 dbg_mnt("fixup LEB %d, data len %d", lnum, len); 680 dbg_mnt("fixup LEB %d, data len %d", lnum, len);
681 err = ubi_read(c->ubi, lnum, c->sbuf, 0, len); 681 err = ubifs_leb_read(c, lnum, c->sbuf, 0, len, 1);
682 if (err) 682 if (err)
683 return err; 683 return err;
684 684
685 return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); 685 return ubifs_leb_change(c, lnum, c->sbuf, len, UBI_UNKNOWN);
686} 686}
687 687
688/** 688/**
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 36216b46f772..37383e8011b1 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -148,7 +148,7 @@ struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
148 INIT_LIST_HEAD(&sleb->nodes); 148 INIT_LIST_HEAD(&sleb->nodes);
149 sleb->buf = sbuf; 149 sleb->buf = sbuf;
150 150
151 err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs); 151 err = ubifs_leb_read(c, lnum, sbuf + offs, offs, c->leb_size - offs, 0);
152 if (err && err != -EBADMSG) { 152 if (err && err != -EBADMSG) {
153 ubifs_err("cannot read %d bytes from LEB %d:%d," 153 ubifs_err("cannot read %d bytes from LEB %d:%d,"
154 " error %d", c->leb_size - offs, lnum, offs, err); 154 " error %d", c->leb_size - offs, lnum, offs, err);
@@ -240,7 +240,7 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
240 int len; 240 int len;
241 241
242 ubifs_err("corruption at LEB %d:%d", lnum, offs); 242 ubifs_err("corruption at LEB %d:%d", lnum, offs);
243 if (dbg_failure_mode) 243 if (dbg_is_tst_rcvry(c))
244 return; 244 return;
245 len = c->leb_size - offs; 245 len = c->leb_size - offs;
246 if (len > 8192) 246 if (len > 8192)
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 529be0582029..b28121278d46 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -85,7 +85,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
85 if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA) 85 if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA)
86 return 4; 86 return 4;
87 87
88 if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG) 88 if (ui->xattr && !S_ISREG(inode->i_mode))
89 return 5; 89 return 5;
90 90
91 if (!ubifs_compr_present(ui->compr_type)) { 91 if (!ubifs_compr_present(ui->compr_type)) {
@@ -94,7 +94,7 @@ static int validate_inode(struct ubifs_info *c, const struct inode *inode)
94 ubifs_compr_name(ui->compr_type)); 94 ubifs_compr_name(ui->compr_type));
95 } 95 }
96 96
97 err = dbg_check_dir_size(c, inode); 97 err = dbg_check_dir(c, inode);
98 return err; 98 return err;
99} 99}
100 100
@@ -914,7 +914,7 @@ static int check_volume_empty(struct ubifs_info *c)
914 914
915 c->empty = 1; 915 c->empty = 1;
916 for (lnum = 0; lnum < c->leb_cnt; lnum++) { 916 for (lnum = 0; lnum < c->leb_cnt; lnum++) {
917 err = ubi_is_mapped(c->ubi, lnum); 917 err = ubifs_is_mapped(c, lnum);
918 if (unlikely(err < 0)) 918 if (unlikely(err < 0))
919 return err; 919 return err;
920 if (err == 1) { 920 if (err == 1) {
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 91b4213dde84..066738647685 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -223,7 +223,7 @@ static struct ubifs_znode *copy_znode(struct ubifs_info *c,
223 __set_bit(DIRTY_ZNODE, &zn->flags); 223 __set_bit(DIRTY_ZNODE, &zn->flags);
224 __clear_bit(COW_ZNODE, &zn->flags); 224 __clear_bit(COW_ZNODE, &zn->flags);
225 225
226 ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); 226 ubifs_assert(!ubifs_zn_obsolete(znode));
227 __set_bit(OBSOLETE_ZNODE, &znode->flags); 227 __set_bit(OBSOLETE_ZNODE, &znode->flags);
228 228
229 if (znode->level != 0) { 229 if (znode->level != 0) {
@@ -271,7 +271,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
271 struct ubifs_znode *zn; 271 struct ubifs_znode *zn;
272 int err; 272 int err;
273 273
274 if (!test_bit(COW_ZNODE, &znode->flags)) { 274 if (!ubifs_zn_cow(znode)) {
275 /* znode is not being committed */ 275 /* znode is not being committed */
276 if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) { 276 if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) {
277 atomic_long_inc(&c->dirty_zn_cnt); 277 atomic_long_inc(&c->dirty_zn_cnt);
@@ -462,7 +462,7 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
462 462
463 dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); 463 dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
464 464
465 err = ubi_read(c->ubi, lnum, buf, offs, len); 465 err = ubifs_leb_read(c, lnum, buf, offs, len, 1);
466 if (err) { 466 if (err) {
467 ubifs_err("cannot read node type %d from LEB %d:%d, error %d", 467 ubifs_err("cannot read node type %d from LEB %d:%d, error %d",
468 type, lnum, offs, err); 468 type, lnum, offs, err);
@@ -1666,7 +1666,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum,
1666 if (!overlap) { 1666 if (!overlap) {
1667 /* We may safely unlock the write-buffer and read the data */ 1667 /* We may safely unlock the write-buffer and read the data */
1668 spin_unlock(&wbuf->lock); 1668 spin_unlock(&wbuf->lock);
1669 return ubi_read(c->ubi, lnum, buf, offs, len); 1669 return ubifs_leb_read(c, lnum, buf, offs, len, 0);
1670 } 1670 }
1671 1671
1672 /* Don't read under wbuf */ 1672 /* Don't read under wbuf */
@@ -1680,7 +1680,7 @@ static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum,
1680 1680
1681 if (rlen > 0) 1681 if (rlen > 0)
1682 /* Read everything that goes before write-buffer */ 1682 /* Read everything that goes before write-buffer */
1683 return ubi_read(c->ubi, lnum, buf, offs, rlen); 1683 return ubifs_leb_read(c, lnum, buf, offs, rlen, 0);
1684 1684
1685 return 0; 1685 return 0;
1686} 1686}
@@ -1767,7 +1767,7 @@ int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
1767 if (wbuf) 1767 if (wbuf)
1768 err = read_wbuf(wbuf, bu->buf, len, lnum, offs); 1768 err = read_wbuf(wbuf, bu->buf, len, lnum, offs);
1769 else 1769 else
1770 err = ubi_read(c->ubi, lnum, bu->buf, offs, len); 1770 err = ubifs_leb_read(c, lnum, bu->buf, offs, len, 0);
1771 1771
1772 /* Check for a race with GC */ 1772 /* Check for a race with GC */
1773 if (maybe_leb_gced(c, lnum, bu->gc_seq)) 1773 if (maybe_leb_gced(c, lnum, bu->gc_seq))
@@ -2423,7 +2423,7 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
2423 */ 2423 */
2424 2424
2425 do { 2425 do {
2426 ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags)); 2426 ubifs_assert(!ubifs_zn_obsolete(znode));
2427 ubifs_assert(ubifs_zn_dirty(znode)); 2427 ubifs_assert(ubifs_zn_dirty(znode));
2428 2428
2429 zp = znode->parent; 2429 zp = znode->parent;
@@ -2479,9 +2479,8 @@ static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
2479 c->zroot.offs = zbr->offs; 2479 c->zroot.offs = zbr->offs;
2480 c->zroot.len = zbr->len; 2480 c->zroot.len = zbr->len;
2481 c->zroot.znode = znode; 2481 c->zroot.znode = znode;
2482 ubifs_assert(!test_bit(OBSOLETE_ZNODE, 2482 ubifs_assert(!ubifs_zn_obsolete(zp));
2483 &zp->flags)); 2483 ubifs_assert(ubifs_zn_dirty(zp));
2484 ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags));
2485 atomic_long_dec(&c->dirty_zn_cnt); 2484 atomic_long_dec(&c->dirty_zn_cnt);
2486 2485
2487 if (zp->cnext) { 2486 if (zp->cnext) {
@@ -2865,7 +2864,7 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
2865 struct ubifs_znode *znode = cnext; 2864 struct ubifs_znode *znode = cnext;
2866 2865
2867 cnext = cnext->cnext; 2866 cnext = cnext->cnext;
2868 if (test_bit(OBSOLETE_ZNODE, &znode->flags)) 2867 if (ubifs_zn_obsolete(znode))
2869 kfree(znode); 2868 kfree(znode);
2870 } while (cnext && cnext != c->cnext); 2869 } while (cnext && cnext != c->cnext);
2871} 2870}
@@ -3301,7 +3300,7 @@ int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
3301 3300
3302 if (!S_ISREG(inode->i_mode)) 3301 if (!S_ISREG(inode->i_mode))
3303 return 0; 3302 return 0;
3304 if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) 3303 if (!dbg_is_chk_gen(c))
3305 return 0; 3304 return 0;
3306 3305
3307 block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; 3306 block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
@@ -3337,9 +3336,10 @@ out_dump:
3337 ubifs_err("inode %lu has size %lld, but there are data at offset %lld " 3336 ubifs_err("inode %lu has size %lld, but there are data at offset %lld "
3338 "(data key %s)", (unsigned long)inode->i_ino, size, 3337 "(data key %s)", (unsigned long)inode->i_ino, size,
3339 ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); 3338 ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key));
3339 mutex_unlock(&c->tnc_mutex);
3340 dbg_dump_inode(c, inode); 3340 dbg_dump_inode(c, inode);
3341 dbg_dump_stack(); 3341 dbg_dump_stack();
3342 err = -EINVAL; 3342 return -EINVAL;
3343 3343
3344out_unlock: 3344out_unlock:
3345 mutex_unlock(&c->tnc_mutex); 3345 mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 41920f357bbf..4c15f07a8bb2 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -22,6 +22,7 @@
22 22
23/* This file implements TNC functions for committing */ 23/* This file implements TNC functions for committing */
24 24
25#include <linux/random.h>
25#include "ubifs.h" 26#include "ubifs.h"
26 27
27/** 28/**
@@ -87,8 +88,12 @@ static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
87 atomic_long_dec(&c->dirty_zn_cnt); 88 atomic_long_dec(&c->dirty_zn_cnt);
88 89
89 ubifs_assert(ubifs_zn_dirty(znode)); 90 ubifs_assert(ubifs_zn_dirty(znode));
90 ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); 91 ubifs_assert(ubifs_zn_cow(znode));
91 92
93 /*
94 * Note, unlike 'write_index()' we do not add memory barriers here
95 * because this function is called with @c->tnc_mutex locked.
96 */
92 __clear_bit(DIRTY_ZNODE, &znode->flags); 97 __clear_bit(DIRTY_ZNODE, &znode->flags);
93 __clear_bit(COW_ZNODE, &znode->flags); 98 __clear_bit(COW_ZNODE, &znode->flags);
94 99
@@ -377,7 +382,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
377 c->gap_lebs = NULL; 382 c->gap_lebs = NULL;
378 return err; 383 return err;
379 } 384 }
380 if (dbg_force_in_the_gaps_enabled()) { 385 if (!dbg_is_chk_index(c)) {
381 /* 386 /*
382 * Do not print scary warnings if the debugging 387 * Do not print scary warnings if the debugging
383 * option which forces in-the-gaps is enabled. 388 * option which forces in-the-gaps is enabled.
@@ -491,25 +496,6 @@ static int layout_in_empty_space(struct ubifs_info *c)
491 else 496 else
492 next_len = ubifs_idx_node_sz(c, cnext->child_cnt); 497 next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
493 498
494 if (c->min_io_size == 1) {
495 buf_offs += ALIGN(len, 8);
496 if (next_len) {
497 if (buf_offs + next_len <= c->leb_size)
498 continue;
499 err = ubifs_update_one_lp(c, lnum, 0,
500 c->leb_size - buf_offs, 0, 0);
501 if (err)
502 return err;
503 lnum = -1;
504 continue;
505 }
506 err = ubifs_update_one_lp(c, lnum,
507 c->leb_size - buf_offs, 0, 0, 0);
508 if (err)
509 return err;
510 break;
511 }
512
513 /* Update buffer positions */ 499 /* Update buffer positions */
514 wlen = used + len; 500 wlen = used + len;
515 used += ALIGN(len, 8); 501 used += ALIGN(len, 8);
@@ -658,7 +644,7 @@ static int get_znodes_to_commit(struct ubifs_info *c)
658 } 644 }
659 cnt += 1; 645 cnt += 1;
660 while (1) { 646 while (1) {
661 ubifs_assert(!test_bit(COW_ZNODE, &znode->flags)); 647 ubifs_assert(!ubifs_zn_cow(znode));
662 __set_bit(COW_ZNODE, &znode->flags); 648 __set_bit(COW_ZNODE, &znode->flags);
663 znode->alt = 0; 649 znode->alt = 0;
664 cnext = find_next_dirty(znode); 650 cnext = find_next_dirty(znode);
@@ -704,7 +690,7 @@ static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
704 c->ilebs[c->ileb_cnt++] = lnum; 690 c->ilebs[c->ileb_cnt++] = lnum;
705 dbg_cmt("LEB %d", lnum); 691 dbg_cmt("LEB %d", lnum);
706 } 692 }
707 if (dbg_force_in_the_gaps()) 693 if (dbg_is_chk_index(c) && !(random32() & 7))
708 return -ENOSPC; 694 return -ENOSPC;
709 return 0; 695 return 0;
710} 696}
@@ -830,7 +816,7 @@ static int write_index(struct ubifs_info *c)
830 struct ubifs_idx_node *idx; 816 struct ubifs_idx_node *idx;
831 struct ubifs_znode *znode, *cnext; 817 struct ubifs_znode *znode, *cnext;
832 int i, lnum, offs, len, next_len, buf_len, buf_offs, used; 818 int i, lnum, offs, len, next_len, buf_len, buf_offs, used;
833 int avail, wlen, err, lnum_pos = 0; 819 int avail, wlen, err, lnum_pos = 0, blen, nxt_offs;
834 820
835 cnext = c->enext; 821 cnext = c->enext;
836 if (!cnext) 822 if (!cnext)
@@ -907,7 +893,7 @@ static int write_index(struct ubifs_info *c)
907 cnext = znode->cnext; 893 cnext = znode->cnext;
908 894
909 ubifs_assert(ubifs_zn_dirty(znode)); 895 ubifs_assert(ubifs_zn_dirty(znode));
910 ubifs_assert(test_bit(COW_ZNODE, &znode->flags)); 896 ubifs_assert(ubifs_zn_cow(znode));
911 897
912 /* 898 /*
913 * It is important that other threads should see %DIRTY_ZNODE 899 * It is important that other threads should see %DIRTY_ZNODE
@@ -922,6 +908,28 @@ static int write_index(struct ubifs_info *c)
922 clear_bit(COW_ZNODE, &znode->flags); 908 clear_bit(COW_ZNODE, &znode->flags);
923 smp_mb__after_clear_bit(); 909 smp_mb__after_clear_bit();
924 910
911 /*
912 * We have marked the znode as clean but have not updated the
913 * @c->clean_zn_cnt counter. If this znode becomes dirty again
914 * before 'free_obsolete_znodes()' is called, then
915 * @c->clean_zn_cnt will be decremented before it gets
916 * incremented (resulting in 2 decrements for the same znode).
917 * This means that @c->clean_zn_cnt may become negative for a
918 * while.
919 *
920 * Q: why we cannot increment @c->clean_zn_cnt?
921 * A: because we do not have the @c->tnc_mutex locked, and the
922 * following code would be racy and buggy:
923 *
924 * if (!ubifs_zn_obsolete(znode)) {
925 * atomic_long_inc(&c->clean_zn_cnt);
926 * atomic_long_inc(&ubifs_clean_zn_cnt);
927 * }
928 *
929 * Thus, we just delay the @c->clean_zn_cnt update until we
930 * have the mutex locked.
931 */
932
925 /* Do not access znode from this point on */ 933 /* Do not access znode from this point on */
926 934
927 /* Update buffer positions */ 935 /* Update buffer positions */
@@ -938,65 +946,38 @@ static int write_index(struct ubifs_info *c)
938 else 946 else
939 next_len = ubifs_idx_node_sz(c, cnext->child_cnt); 947 next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
940 948
941 if (c->min_io_size == 1) { 949 nxt_offs = buf_offs + used + next_len;
942 /* 950 if (next_len && nxt_offs <= c->leb_size) {
943 * Write the prepared index node immediately if there is 951 if (avail > 0)
944 * no minimum IO size
945 */
946 err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
947 wlen, UBI_SHORTTERM);
948 if (err)
949 return err;
950 buf_offs += ALIGN(wlen, 8);
951 if (next_len) {
952 used = 0;
953 avail = buf_len;
954 if (buf_offs + next_len > c->leb_size) {
955 err = ubifs_update_one_lp(c, lnum,
956 LPROPS_NC, 0, 0, LPROPS_TAKEN);
957 if (err)
958 return err;
959 lnum = -1;
960 }
961 continue; 952 continue;
962 } 953 else
954 blen = buf_len;
963 } else { 955 } else {
964 int blen, nxt_offs = buf_offs + used + next_len; 956 wlen = ALIGN(wlen, 8);
965 957 blen = ALIGN(wlen, c->min_io_size);
966 if (next_len && nxt_offs <= c->leb_size) { 958 ubifs_pad(c, c->cbuf + wlen, blen - wlen);
967 if (avail > 0) 959 }
968 continue; 960
969 else 961 /* The buffer is full or there are no more znodes to do */
970 blen = buf_len; 962 err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, blen,
971 } else { 963 UBI_SHORTTERM);
972 wlen = ALIGN(wlen, 8); 964 if (err)
973 blen = ALIGN(wlen, c->min_io_size); 965 return err;
974 ubifs_pad(c, c->cbuf + wlen, blen - wlen); 966 buf_offs += blen;
975 } 967 if (next_len) {
976 /* 968 if (nxt_offs > c->leb_size) {
977 * The buffer is full or there are no more znodes 969 err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0,
978 * to do 970 0, LPROPS_TAKEN);
979 */ 971 if (err)
980 err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs, 972 return err;
981 blen, UBI_SHORTTERM); 973 lnum = -1;
982 if (err)
983 return err;
984 buf_offs += blen;
985 if (next_len) {
986 if (nxt_offs > c->leb_size) {
987 err = ubifs_update_one_lp(c, lnum,
988 LPROPS_NC, 0, 0, LPROPS_TAKEN);
989 if (err)
990 return err;
991 lnum = -1;
992 }
993 used -= blen;
994 if (used < 0)
995 used = 0;
996 avail = buf_len - used;
997 memmove(c->cbuf, c->cbuf + blen, used);
998 continue;
999 } 974 }
975 used -= blen;
976 if (used < 0)
977 used = 0;
978 avail = buf_len - used;
979 memmove(c->cbuf, c->cbuf + blen, used);
980 continue;
1000 } 981 }
1001 break; 982 break;
1002 } 983 }
@@ -1029,7 +1010,7 @@ static void free_obsolete_znodes(struct ubifs_info *c)
1029 do { 1010 do {
1030 znode = cnext; 1011 znode = cnext;
1031 cnext = znode->cnext; 1012 cnext = znode->cnext;
1032 if (test_bit(OBSOLETE_ZNODE, &znode->flags)) 1013 if (ubifs_zn_obsolete(znode))
1033 kfree(znode); 1014 kfree(znode);
1034 else { 1015 else {
1035 znode->cnext = NULL; 1016 znode->cnext = NULL;
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index f79983d6f860..27f22551f805 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -230,14 +230,14 @@ enum {
230 * LPT cnode flag bits. 230 * LPT cnode flag bits.
231 * 231 *
232 * DIRTY_CNODE: cnode is dirty 232 * DIRTY_CNODE: cnode is dirty
233 * COW_CNODE: cnode is being committed and must be copied before writing
234 * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted), 233 * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted),
235 * so it can (and must) be freed when the commit is finished 234 * so it can (and must) be freed when the commit is finished
235 * COW_CNODE: cnode is being committed and must be copied before writing
236 */ 236 */
237enum { 237enum {
238 DIRTY_CNODE = 0, 238 DIRTY_CNODE = 0,
239 COW_CNODE = 1, 239 OBSOLETE_CNODE = 1,
240 OBSOLETE_CNODE = 2, 240 COW_CNODE = 2,
241}; 241};
242 242
243/* 243/*
@@ -1468,6 +1468,15 @@ extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
1468 1468
1469/* io.c */ 1469/* io.c */
1470void ubifs_ro_mode(struct ubifs_info *c, int err); 1470void ubifs_ro_mode(struct ubifs_info *c, int err);
1471int ubifs_leb_read(const struct ubifs_info *c, int lnum, void *buf, int offs,
1472 int len, int even_ebadmsg);
1473int ubifs_leb_write(struct ubifs_info *c, int lnum, const void *buf, int offs,
1474 int len, int dtype);
1475int ubifs_leb_change(struct ubifs_info *c, int lnum, const void *buf, int len,
1476 int dtype);
1477int ubifs_leb_unmap(struct ubifs_info *c, int lnum);
1478int ubifs_leb_map(struct ubifs_info *c, int lnum, int dtype);
1479int ubifs_is_mapped(const struct ubifs_info *c, int lnum);
1471int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); 1480int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len);
1472int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, 1481int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
1473 int dtype); 1482 int dtype);
@@ -1720,7 +1729,7 @@ const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c);
1720int ubifs_calc_dark(const struct ubifs_info *c, int spc); 1729int ubifs_calc_dark(const struct ubifs_info *c, int spc);
1721 1730
1722/* file.c */ 1731/* file.c */
1723int ubifs_fsync(struct file *file, int datasync); 1732int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
1724int ubifs_setattr(struct dentry *dentry, struct iattr *attr); 1733int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
1725 1734
1726/* dir.c */ 1735/* dir.c */
@@ -1747,8 +1756,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
1747 int offs, void *sbuf, int jhead); 1756 int offs, void *sbuf, int jhead);
1748struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, 1757struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
1749 int offs, void *sbuf); 1758 int offs, void *sbuf);
1750int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); 1759int ubifs_recover_inl_heads(struct ubifs_info *c, void *sbuf);
1751int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf); 1760int ubifs_clean_lebs(struct ubifs_info *c, void *sbuf);
1752int ubifs_rcvry_gc_commit(struct ubifs_info *c); 1761int ubifs_rcvry_gc_commit(struct ubifs_info *c);
1753int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key, 1762int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
1754 int deletion, loff_t new_size); 1763 int deletion, loff_t new_size);
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 2a346bb1d9f5..d8ffa7cc661d 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -150,7 +150,7 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
150 long old_block, new_block; 150 long old_block, new_block;
151 int result = -EINVAL; 151 int result = -EINVAL;
152 152
153 if (file_permission(filp, MAY_READ) != 0) { 153 if (inode_permission(inode, MAY_READ) != 0) {
154 udf_debug("no permission to access inode %lu\n", inode->i_ino); 154 udf_debug("no permission to access inode %lu\n", inode->i_ino);
155 result = -EPERM; 155 result = -EPERM;
156 goto out; 156 goto out;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index b57aab9a1184..639d49162241 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -59,8 +59,6 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru
59 if (ino) 59 if (ino)
60 inode = ufs_iget(dir->i_sb, ino); 60 inode = ufs_iget(dir->i_sb, ino);
61 unlock_ufs(dir->i_sb); 61 unlock_ufs(dir->i_sb);
62 if (IS_ERR(inode))
63 return ERR_CAST(inode);
64 return d_splice_alias(inode, dentry); 62 return d_splice_alias(inode, dentry);
65} 63}
66 64
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 284a7c89697e..75bb316529dd 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -88,8 +88,6 @@ xfs-y += xfs_alloc.o \
88 xfs_vnodeops.o \ 88 xfs_vnodeops.o \
89 xfs_rw.o 89 xfs_rw.o
90 90
91xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o
92
93# Objects in linux/ 91# Objects in linux/
94xfs-y += $(addprefix $(XFS_LINUX)/, \ 92xfs-y += $(addprefix $(XFS_LINUX)/, \
95 kmem.o \ 93 kmem.o \
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 39f4f809bb68..cac48fe22ad5 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -219,7 +219,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
219} 219}
220 220
221int 221int
222xfs_check_acl(struct inode *inode, int mask, unsigned int flags) 222xfs_check_acl(struct inode *inode, int mask)
223{ 223{
224 struct xfs_inode *ip; 224 struct xfs_inode *ip;
225 struct posix_acl *acl; 225 struct posix_acl *acl;
@@ -235,7 +235,7 @@ xfs_check_acl(struct inode *inode, int mask, unsigned int flags)
235 if (!XFS_IFORK_Q(ip)) 235 if (!XFS_IFORK_Q(ip))
236 return -EAGAIN; 236 return -EAGAIN;
237 237
238 if (flags & IPERM_FLAG_RCU) { 238 if (mask & MAY_NOT_BLOCK) {
239 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) 239 if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
240 return -ECHILD; 240 return -ECHILD;
241 return -EAGAIN; 241 return -EAGAIN;
@@ -264,7 +264,7 @@ xfs_set_mode(struct inode *inode, mode_t mode)
264 iattr.ia_mode = mode; 264 iattr.ia_mode = mode;
265 iattr.ia_ctime = current_fs_time(inode->i_sb); 265 iattr.ia_ctime = current_fs_time(inode->i_sb);
266 266
267 error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL); 267 error = -xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
268 } 268 }
269 269
270 return error; 270 return error;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 79ce38be15a1..63e971e2b837 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -181,6 +181,7 @@ xfs_setfilesize(
181 181
182 isize = xfs_ioend_new_eof(ioend); 182 isize = xfs_ioend_new_eof(ioend);
183 if (isize) { 183 if (isize) {
184 trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
184 ip->i_d.di_size = isize; 185 ip->i_d.di_size = isize;
185 xfs_mark_inode_dirty(ip); 186 xfs_mark_inode_dirty(ip);
186 } 187 }
@@ -894,11 +895,6 @@ out_invalidate:
894 * For unwritten space on the page we need to start the conversion to 895 * For unwritten space on the page we need to start the conversion to
895 * regular allocated space. 896 * regular allocated space.
896 * For any other dirty buffer heads on the page we should flush them. 897 * For any other dirty buffer heads on the page we should flush them.
897 *
898 * If we detect that a transaction would be required to flush the page, we
899 * have to check the process flags first, if we are already in a transaction
900 * or disk I/O during allocations is off, we need to fail the writepage and
901 * redirty the page.
902 */ 898 */
903STATIC int 899STATIC int
904xfs_vm_writepage( 900xfs_vm_writepage(
@@ -906,7 +902,6 @@ xfs_vm_writepage(
906 struct writeback_control *wbc) 902 struct writeback_control *wbc)
907{ 903{
908 struct inode *inode = page->mapping->host; 904 struct inode *inode = page->mapping->host;
909 int delalloc, unwritten;
910 struct buffer_head *bh, *head; 905 struct buffer_head *bh, *head;
911 struct xfs_bmbt_irec imap; 906 struct xfs_bmbt_irec imap;
912 xfs_ioend_t *ioend = NULL, *iohead = NULL; 907 xfs_ioend_t *ioend = NULL, *iohead = NULL;
@@ -938,15 +933,10 @@ xfs_vm_writepage(
938 goto redirty; 933 goto redirty;
939 934
940 /* 935 /*
941 * We need a transaction if there are delalloc or unwritten buffers 936 * Given that we do not allow direct reclaim to call us, we should
942 * on the page. 937 * never be called while in a filesystem transaction.
943 *
944 * If we need a transaction and the process flags say we are already
945 * in a transaction, or no IO is allowed then mark the page dirty
946 * again and leave the page as is.
947 */ 938 */
948 xfs_count_page_state(page, &delalloc, &unwritten); 939 if (WARN_ON(current->flags & PF_FSTRANS))
949 if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
950 goto redirty; 940 goto redirty;
951 941
952 /* Is this page beyond the end of the file? */ 942 /* Is this page beyond the end of the file? */
@@ -970,7 +960,7 @@ xfs_vm_writepage(
970 offset = page_offset(page); 960 offset = page_offset(page);
971 type = IO_OVERWRITE; 961 type = IO_OVERWRITE;
972 962
973 if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking) 963 if (wbc->sync_mode == WB_SYNC_NONE)
974 nonblocking = 1; 964 nonblocking = 1;
975 965
976 do { 966 do {
@@ -1339,6 +1329,9 @@ xfs_end_io_direct_write(
1339 } else { 1329 } else {
1340 xfs_finish_ioend_sync(ioend); 1330 xfs_finish_ioend_sync(ioend);
1341 } 1331 }
1332
1333 /* XXX: probably should move into the real I/O completion handler */
1334 inode_dio_done(ioend->io_inode);
1342} 1335}
1343 1336
1344STATIC ssize_t 1337STATIC ssize_t
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 5e68099db2a5..b2b411985591 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -499,16 +499,14 @@ found:
499 spin_unlock(&pag->pag_buf_lock); 499 spin_unlock(&pag->pag_buf_lock);
500 xfs_perag_put(pag); 500 xfs_perag_put(pag);
501 501
502 if (xfs_buf_cond_lock(bp)) { 502 if (!xfs_buf_trylock(bp)) {
503 /* failed, so wait for the lock if requested. */ 503 if (flags & XBF_TRYLOCK) {
504 if (!(flags & XBF_TRYLOCK)) {
505 xfs_buf_lock(bp);
506 XFS_STATS_INC(xb_get_locked_waited);
507 } else {
508 xfs_buf_rele(bp); 504 xfs_buf_rele(bp);
509 XFS_STATS_INC(xb_busy_locked); 505 XFS_STATS_INC(xb_busy_locked);
510 return NULL; 506 return NULL;
511 } 507 }
508 xfs_buf_lock(bp);
509 XFS_STATS_INC(xb_get_locked_waited);
512 } 510 }
513 511
514 /* 512 /*
@@ -594,10 +592,8 @@ _xfs_buf_read(
594 ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); 592 ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
595 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); 593 ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
596 594
597 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ 595 bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | XBF_READ_AHEAD);
598 XBF_READ_AHEAD | _XBF_RUN_QUEUES); 596 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
599 bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \
600 XBF_READ_AHEAD | _XBF_RUN_QUEUES);
601 597
602 status = xfs_buf_iorequest(bp); 598 status = xfs_buf_iorequest(bp);
603 if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC)) 599 if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC))
@@ -681,7 +677,6 @@ xfs_buf_read_uncached(
681 return NULL; 677 return NULL;
682 678
683 /* set up the buffer for a read IO */ 679 /* set up the buffer for a read IO */
684 xfs_buf_lock(bp);
685 XFS_BUF_SET_ADDR(bp, daddr); 680 XFS_BUF_SET_ADDR(bp, daddr);
686 XFS_BUF_READ(bp); 681 XFS_BUF_READ(bp);
687 XFS_BUF_BUSY(bp); 682 XFS_BUF_BUSY(bp);
@@ -816,8 +811,6 @@ xfs_buf_get_uncached(
816 goto fail_free_mem; 811 goto fail_free_mem;
817 } 812 }
818 813
819 xfs_buf_unlock(bp);
820
821 trace_xfs_buf_get_uncached(bp, _RET_IP_); 814 trace_xfs_buf_get_uncached(bp, _RET_IP_);
822 return bp; 815 return bp;
823 816
@@ -896,8 +889,8 @@ xfs_buf_rele(
896 * to push on stale inode buffers. 889 * to push on stale inode buffers.
897 */ 890 */
898int 891int
899xfs_buf_cond_lock( 892xfs_buf_trylock(
900 xfs_buf_t *bp) 893 struct xfs_buf *bp)
901{ 894{
902 int locked; 895 int locked;
903 896
@@ -907,15 +900,8 @@ xfs_buf_cond_lock(
907 else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) 900 else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
908 xfs_log_force(bp->b_target->bt_mount, 0); 901 xfs_log_force(bp->b_target->bt_mount, 0);
909 902
910 trace_xfs_buf_cond_lock(bp, _RET_IP_); 903 trace_xfs_buf_trylock(bp, _RET_IP_);
911 return locked ? 0 : -EBUSY; 904 return locked;
912}
913
914int
915xfs_buf_lock_value(
916 xfs_buf_t *bp)
917{
918 return bp->b_sema.count;
919} 905}
920 906
921/* 907/*
@@ -929,7 +915,7 @@ xfs_buf_lock_value(
929 */ 915 */
930void 916void
931xfs_buf_lock( 917xfs_buf_lock(
932 xfs_buf_t *bp) 918 struct xfs_buf *bp)
933{ 919{
934 trace_xfs_buf_lock(bp, _RET_IP_); 920 trace_xfs_buf_lock(bp, _RET_IP_);
935 921
@@ -950,7 +936,7 @@ xfs_buf_lock(
950 */ 936 */
951void 937void
952xfs_buf_unlock( 938xfs_buf_unlock(
953 xfs_buf_t *bp) 939 struct xfs_buf *bp)
954{ 940{
955 if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) { 941 if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
956 atomic_inc(&bp->b_hold); 942 atomic_inc(&bp->b_hold);
@@ -1121,7 +1107,7 @@ xfs_bioerror_relse(
1121 XFS_BUF_UNDELAYWRITE(bp); 1107 XFS_BUF_UNDELAYWRITE(bp);
1122 XFS_BUF_DONE(bp); 1108 XFS_BUF_DONE(bp);
1123 XFS_BUF_STALE(bp); 1109 XFS_BUF_STALE(bp);
1124 XFS_BUF_CLR_IODONE_FUNC(bp); 1110 bp->b_iodone = NULL;
1125 if (!(fl & XBF_ASYNC)) { 1111 if (!(fl & XBF_ASYNC)) {
1126 /* 1112 /*
1127 * Mark b_error and B_ERROR _both_. 1113 * Mark b_error and B_ERROR _both_.
@@ -1223,23 +1209,21 @@ _xfs_buf_ioapply(
1223 total_nr_pages = bp->b_page_count; 1209 total_nr_pages = bp->b_page_count;
1224 map_i = 0; 1210 map_i = 0;
1225 1211
1226 if (bp->b_flags & XBF_ORDERED) { 1212 if (bp->b_flags & XBF_WRITE) {
1227 ASSERT(!(bp->b_flags & XBF_READ)); 1213 if (bp->b_flags & XBF_SYNCIO)
1228 rw = WRITE_FLUSH_FUA; 1214 rw = WRITE_SYNC;
1229 } else if (bp->b_flags & XBF_LOG_BUFFER) { 1215 else
1230 ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); 1216 rw = WRITE;
1231 bp->b_flags &= ~_XBF_RUN_QUEUES; 1217 if (bp->b_flags & XBF_FUA)
1232 rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC; 1218 rw |= REQ_FUA;
1233 } else if (bp->b_flags & _XBF_RUN_QUEUES) { 1219 if (bp->b_flags & XBF_FLUSH)
1234 ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); 1220 rw |= REQ_FLUSH;
1235 bp->b_flags &= ~_XBF_RUN_QUEUES; 1221 } else if (bp->b_flags & XBF_READ_AHEAD) {
1236 rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META; 1222 rw = READA;
1237 } else { 1223 } else {
1238 rw = (bp->b_flags & XBF_WRITE) ? WRITE : 1224 rw = READ;
1239 (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
1240 } 1225 }
1241 1226
1242
1243next_chunk: 1227next_chunk:
1244 atomic_inc(&bp->b_io_remaining); 1228 atomic_inc(&bp->b_io_remaining);
1245 nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); 1229 nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
@@ -1694,15 +1678,14 @@ xfs_buf_delwri_split(
1694 list_for_each_entry_safe(bp, n, dwq, b_list) { 1678 list_for_each_entry_safe(bp, n, dwq, b_list) {
1695 ASSERT(bp->b_flags & XBF_DELWRI); 1679 ASSERT(bp->b_flags & XBF_DELWRI);
1696 1680
1697 if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) { 1681 if (!XFS_BUF_ISPINNED(bp) && xfs_buf_trylock(bp)) {
1698 if (!force && 1682 if (!force &&
1699 time_before(jiffies, bp->b_queuetime + age)) { 1683 time_before(jiffies, bp->b_queuetime + age)) {
1700 xfs_buf_unlock(bp); 1684 xfs_buf_unlock(bp);
1701 break; 1685 break;
1702 } 1686 }
1703 1687
1704 bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q| 1688 bp->b_flags &= ~(XBF_DELWRI | _XBF_DELWRI_Q);
1705 _XBF_RUN_QUEUES);
1706 bp->b_flags |= XBF_WRITE; 1689 bp->b_flags |= XBF_WRITE;
1707 list_move_tail(&bp->b_list, list); 1690 list_move_tail(&bp->b_list, list);
1708 trace_xfs_buf_delwri_split(bp, _RET_IP_); 1691 trace_xfs_buf_delwri_split(bp, _RET_IP_);
@@ -1738,14 +1721,6 @@ xfs_buf_cmp(
1738 return 0; 1721 return 0;
1739} 1722}
1740 1723
1741void
1742xfs_buf_delwri_sort(
1743 xfs_buftarg_t *target,
1744 struct list_head *list)
1745{
1746 list_sort(NULL, list, xfs_buf_cmp);
1747}
1748
1749STATIC int 1724STATIC int
1750xfsbufd( 1725xfsbufd(
1751 void *data) 1726 void *data)
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 50a7d5fb3b73..6a83b46b4bcf 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -46,43 +46,46 @@ typedef enum {
46 46
47#define XBF_READ (1 << 0) /* buffer intended for reading from device */ 47#define XBF_READ (1 << 0) /* buffer intended for reading from device */
48#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */ 48#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
49#define XBF_MAPPED (1 << 2) /* buffer mapped (b_addr valid) */ 49#define XBF_READ_AHEAD (1 << 2) /* asynchronous read-ahead */
50#define XBF_MAPPED (1 << 3) /* buffer mapped (b_addr valid) */
50#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */ 51#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
51#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ 52#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
52#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ 53#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */
53#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ 54#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */
54#define XBF_ORDERED (1 << 11)/* use ordered writes */ 55
55#define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */ 56/* I/O hints for the BIO layer */
56#define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */ 57#define XBF_SYNCIO (1 << 10)/* treat this buffer as synchronous I/O */
58#define XBF_FUA (1 << 11)/* force cache write through mode */
59#define XBF_FLUSH (1 << 12)/* flush the disk cache before a write */
57 60
58/* flags used only as arguments to access routines */ 61/* flags used only as arguments to access routines */
59#define XBF_LOCK (1 << 14)/* lock requested */ 62#define XBF_LOCK (1 << 15)/* lock requested */
60#define XBF_TRYLOCK (1 << 15)/* lock requested, but do not wait */ 63#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */
61#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */ 64#define XBF_DONT_BLOCK (1 << 17)/* do not block in current thread */
62 65
63/* flags used only internally */ 66/* flags used only internally */
64#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */ 67#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
65#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */ 68#define _XBF_KMEM (1 << 21)/* backed by heap memory */
66#define _XBF_KMEM (1 << 20)/* backed by heap memory */ 69#define _XBF_DELWRI_Q (1 << 22)/* buffer on delwri queue */
67#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */
68 70
69typedef unsigned int xfs_buf_flags_t; 71typedef unsigned int xfs_buf_flags_t;
70 72
71#define XFS_BUF_FLAGS \ 73#define XFS_BUF_FLAGS \
72 { XBF_READ, "READ" }, \ 74 { XBF_READ, "READ" }, \
73 { XBF_WRITE, "WRITE" }, \ 75 { XBF_WRITE, "WRITE" }, \
76 { XBF_READ_AHEAD, "READ_AHEAD" }, \
74 { XBF_MAPPED, "MAPPED" }, \ 77 { XBF_MAPPED, "MAPPED" }, \
75 { XBF_ASYNC, "ASYNC" }, \ 78 { XBF_ASYNC, "ASYNC" }, \
76 { XBF_DONE, "DONE" }, \ 79 { XBF_DONE, "DONE" }, \
77 { XBF_DELWRI, "DELWRI" }, \ 80 { XBF_DELWRI, "DELWRI" }, \
78 { XBF_STALE, "STALE" }, \ 81 { XBF_STALE, "STALE" }, \
79 { XBF_ORDERED, "ORDERED" }, \ 82 { XBF_SYNCIO, "SYNCIO" }, \
80 { XBF_READ_AHEAD, "READ_AHEAD" }, \ 83 { XBF_FUA, "FUA" }, \
84 { XBF_FLUSH, "FLUSH" }, \
81 { XBF_LOCK, "LOCK" }, /* should never be set */\ 85 { XBF_LOCK, "LOCK" }, /* should never be set */\
82 { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\ 86 { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
83 { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\ 87 { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
84 { _XBF_PAGES, "PAGES" }, \ 88 { _XBF_PAGES, "PAGES" }, \
85 { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
86 { _XBF_KMEM, "KMEM" }, \ 89 { _XBF_KMEM, "KMEM" }, \
87 { _XBF_DELWRI_Q, "DELWRI_Q" } 90 { _XBF_DELWRI_Q, "DELWRI_Q" }
88 91
@@ -91,11 +94,6 @@ typedef enum {
91 XBT_FORCE_FLUSH = 1, 94 XBT_FORCE_FLUSH = 1,
92} xfs_buftarg_flags_t; 95} xfs_buftarg_flags_t;
93 96
94typedef struct xfs_bufhash {
95 struct list_head bh_list;
96 spinlock_t bh_lock;
97} xfs_bufhash_t;
98
99typedef struct xfs_buftarg { 97typedef struct xfs_buftarg {
100 dev_t bt_dev; 98 dev_t bt_dev;
101 struct block_device *bt_bdev; 99 struct block_device *bt_bdev;
@@ -151,7 +149,7 @@ typedef struct xfs_buf {
151 xfs_buf_iodone_t b_iodone; /* I/O completion function */ 149 xfs_buf_iodone_t b_iodone; /* I/O completion function */
152 struct completion b_iowait; /* queue for I/O waiters */ 150 struct completion b_iowait; /* queue for I/O waiters */
153 void *b_fspriv; 151 void *b_fspriv;
154 void *b_fspriv2; 152 struct xfs_trans *b_transp;
155 struct page **b_pages; /* array of page pointers */ 153 struct page **b_pages; /* array of page pointers */
156 struct page *b_page_array[XB_PAGES]; /* inline pages */ 154 struct page *b_page_array[XB_PAGES]; /* inline pages */
157 unsigned long b_queuetime; /* time buffer was queued */ 155 unsigned long b_queuetime; /* time buffer was queued */
@@ -192,10 +190,11 @@ extern void xfs_buf_free(xfs_buf_t *);
192extern void xfs_buf_rele(xfs_buf_t *); 190extern void xfs_buf_rele(xfs_buf_t *);
193 191
194/* Locking and Unlocking Buffers */ 192/* Locking and Unlocking Buffers */
195extern int xfs_buf_cond_lock(xfs_buf_t *); 193extern int xfs_buf_trylock(xfs_buf_t *);
196extern int xfs_buf_lock_value(xfs_buf_t *);
197extern void xfs_buf_lock(xfs_buf_t *); 194extern void xfs_buf_lock(xfs_buf_t *);
198extern void xfs_buf_unlock(xfs_buf_t *); 195extern void xfs_buf_unlock(xfs_buf_t *);
196#define xfs_buf_islocked(bp) \
197 ((bp)->b_sema.count <= 0)
199 198
200/* Buffer Read and Write Routines */ 199/* Buffer Read and Write Routines */
201extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp); 200extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
@@ -234,8 +233,9 @@ extern void xfs_buf_terminate(void);
234 233
235 234
236#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags) 235#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags)
237#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \ 236#define XFS_BUF_ZEROFLAGS(bp) \
238 ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED)) 237 ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI| \
238 XBF_SYNCIO|XBF_FUA|XBF_FLUSH))
239 239
240void xfs_buf_stale(struct xfs_buf *bp); 240void xfs_buf_stale(struct xfs_buf *bp);
241#define XFS_BUF_STALE(bp) xfs_buf_stale(bp); 241#define XFS_BUF_STALE(bp) xfs_buf_stale(bp);
@@ -267,10 +267,6 @@ void xfs_buf_stale(struct xfs_buf *bp);
267#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC) 267#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC)
268#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC) 268#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC)
269 269
270#define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED)
271#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED)
272#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED)
273
274#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) 270#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp)
275#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) 271#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ)
276#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) 272#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ)
@@ -280,14 +276,6 @@ void xfs_buf_stale(struct xfs_buf *bp);
280#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE) 276#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE)
281#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE) 277#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE)
282 278
283#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)
284#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))
285#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)
286
287#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv)
288#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
289#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)
290#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))
291#define XFS_BUF_SET_START(bp) do { } while (0) 279#define XFS_BUF_SET_START(bp) do { } while (0)
292 280
293#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr) 281#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr)
@@ -313,10 +301,6 @@ xfs_buf_set_ref(
313 301
314#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count)) 302#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count))
315 303
316#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp)
317#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
318#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp)
319#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp)
320#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); 304#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait);
321 305
322#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) 306#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target))
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index f4f878fc0083..75e5d322e48f 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -151,14 +151,14 @@ xfs_nfs_get_inode(
151 * We don't use ESTALE directly down the chain to not 151 * We don't use ESTALE directly down the chain to not
152 * confuse applications using bulkstat that expect EINVAL. 152 * confuse applications using bulkstat that expect EINVAL.
153 */ 153 */
154 if (error == EINVAL) 154 if (error == EINVAL || error == ENOENT)
155 error = ESTALE; 155 error = ESTALE;
156 return ERR_PTR(-error); 156 return ERR_PTR(-error);
157 } 157 }
158 158
159 if (ip->i_d.di_gen != generation) { 159 if (ip->i_d.di_gen != generation) {
160 IRELE(ip); 160 IRELE(ip);
161 return ERR_PTR(-ENOENT); 161 return ERR_PTR(-ESTALE);
162 } 162 }
163 163
164 return VFS_I(ip); 164 return VFS_I(ip);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 7f782af286bf..cca00f49e092 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -127,6 +127,8 @@ xfs_iozero(
127STATIC int 127STATIC int
128xfs_file_fsync( 128xfs_file_fsync(
129 struct file *file, 129 struct file *file,
130 loff_t start,
131 loff_t end,
130 int datasync) 132 int datasync)
131{ 133{
132 struct inode *inode = file->f_mapping->host; 134 struct inode *inode = file->f_mapping->host;
@@ -138,6 +140,10 @@ xfs_file_fsync(
138 140
139 trace_xfs_file_fsync(ip); 141 trace_xfs_file_fsync(ip);
140 142
143 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
144 if (error)
145 return error;
146
141 if (XFS_FORCED_SHUTDOWN(mp)) 147 if (XFS_FORCED_SHUTDOWN(mp))
142 return -XFS_ERROR(EIO); 148 return -XFS_ERROR(EIO);
143 149
@@ -875,18 +881,11 @@ xfs_file_aio_write(
875 /* Handle various SYNC-type writes */ 881 /* Handle various SYNC-type writes */
876 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { 882 if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
877 loff_t end = pos + ret - 1; 883 loff_t end = pos + ret - 1;
878 int error, error2;
879 884
880 xfs_rw_iunlock(ip, iolock); 885 xfs_rw_iunlock(ip, iolock);
881 error = filemap_write_and_wait_range(mapping, pos, end); 886 ret = -xfs_file_fsync(file, pos, end,
887 (file->f_flags & __O_SYNC) ? 0 : 1);
882 xfs_rw_ilock(ip, iolock); 888 xfs_rw_ilock(ip, iolock);
883
884 error2 = -xfs_file_fsync(file,
885 (file->f_flags & __O_SYNC) ? 0 : 1);
886 if (error)
887 ret = error;
888 else if (error2)
889 ret = error2;
890 } 889 }
891 890
892out_unlock: 891out_unlock:
@@ -944,7 +943,7 @@ xfs_file_fallocate(
944 943
945 iattr.ia_valid = ATTR_SIZE; 944 iattr.ia_valid = ATTR_SIZE;
946 iattr.ia_size = new_size; 945 iattr.ia_size = new_size;
947 error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); 946 error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
948 } 947 }
949 948
950out_unlock: 949out_unlock:
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index d44d92cd12b1..501e4f630548 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -39,6 +39,7 @@
39#include "xfs_buf_item.h" 39#include "xfs_buf_item.h"
40#include "xfs_utils.h" 40#include "xfs_utils.h"
41#include "xfs_vnodeops.h" 41#include "xfs_vnodeops.h"
42#include "xfs_inode_item.h"
42#include "xfs_trace.h" 43#include "xfs_trace.h"
43 44
44#include <linux/capability.h> 45#include <linux/capability.h>
@@ -497,12 +498,442 @@ xfs_vn_getattr(
497 return 0; 498 return 0;
498} 499}
499 500
501int
502xfs_setattr_nonsize(
503 struct xfs_inode *ip,
504 struct iattr *iattr,
505 int flags)
506{
507 xfs_mount_t *mp = ip->i_mount;
508 struct inode *inode = VFS_I(ip);
509 int mask = iattr->ia_valid;
510 xfs_trans_t *tp;
511 int error;
512 uid_t uid = 0, iuid = 0;
513 gid_t gid = 0, igid = 0;
514 struct xfs_dquot *udqp = NULL, *gdqp = NULL;
515 struct xfs_dquot *olddquot1 = NULL, *olddquot2 = NULL;
516
517 trace_xfs_setattr(ip);
518
519 if (mp->m_flags & XFS_MOUNT_RDONLY)
520 return XFS_ERROR(EROFS);
521
522 if (XFS_FORCED_SHUTDOWN(mp))
523 return XFS_ERROR(EIO);
524
525 error = -inode_change_ok(inode, iattr);
526 if (error)
527 return XFS_ERROR(error);
528
529 ASSERT((mask & ATTR_SIZE) == 0);
530
531 /*
532 * If disk quotas is on, we make sure that the dquots do exist on disk,
533 * before we start any other transactions. Trying to do this later
534 * is messy. We don't care to take a readlock to look at the ids
535 * in inode here, because we can't hold it across the trans_reserve.
536 * If the IDs do change before we take the ilock, we're covered
537 * because the i_*dquot fields will get updated anyway.
538 */
539 if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
540 uint qflags = 0;
541
542 if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
543 uid = iattr->ia_uid;
544 qflags |= XFS_QMOPT_UQUOTA;
545 } else {
546 uid = ip->i_d.di_uid;
547 }
548 if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
549 gid = iattr->ia_gid;
550 qflags |= XFS_QMOPT_GQUOTA;
551 } else {
552 gid = ip->i_d.di_gid;
553 }
554
555 /*
556 * We take a reference when we initialize udqp and gdqp,
557 * so it is important that we never blindly double trip on
558 * the same variable. See xfs_create() for an example.
559 */
560 ASSERT(udqp == NULL);
561 ASSERT(gdqp == NULL);
562 error = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
563 qflags, &udqp, &gdqp);
564 if (error)
565 return error;
566 }
567
568 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
569 error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
570 if (error)
571 goto out_dqrele;
572
573 xfs_ilock(ip, XFS_ILOCK_EXCL);
574
575 /*
576 * Change file ownership. Must be the owner or privileged.
577 */
578 if (mask & (ATTR_UID|ATTR_GID)) {
579 /*
580 * These IDs could have changed since we last looked at them.
581 * But, we're assured that if the ownership did change
582 * while we didn't have the inode locked, inode's dquot(s)
583 * would have changed also.
584 */
585 iuid = ip->i_d.di_uid;
586 igid = ip->i_d.di_gid;
587 gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
588 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
589
590 /*
591 * Do a quota reservation only if uid/gid is actually
592 * going to change.
593 */
594 if (XFS_IS_QUOTA_RUNNING(mp) &&
595 ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
596 (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
597 ASSERT(tp);
598 error = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
599 capable(CAP_FOWNER) ?
600 XFS_QMOPT_FORCE_RES : 0);
601 if (error) /* out of quota */
602 goto out_trans_cancel;
603 }
604 }
605
606 xfs_trans_ijoin(tp, ip);
607
608 /*
609 * Change file ownership. Must be the owner or privileged.
610 */
611 if (mask & (ATTR_UID|ATTR_GID)) {
612 /*
613 * CAP_FSETID overrides the following restrictions:
614 *
615 * The set-user-ID and set-group-ID bits of a file will be
616 * cleared upon successful return from chown()
617 */
618 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
619 !capable(CAP_FSETID))
620 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
621
622 /*
623 * Change the ownerships and register quota modifications
624 * in the transaction.
625 */
626 if (iuid != uid) {
627 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
628 ASSERT(mask & ATTR_UID);
629 ASSERT(udqp);
630 olddquot1 = xfs_qm_vop_chown(tp, ip,
631 &ip->i_udquot, udqp);
632 }
633 ip->i_d.di_uid = uid;
634 inode->i_uid = uid;
635 }
636 if (igid != gid) {
637 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
638 ASSERT(!XFS_IS_PQUOTA_ON(mp));
639 ASSERT(mask & ATTR_GID);
640 ASSERT(gdqp);
641 olddquot2 = xfs_qm_vop_chown(tp, ip,
642 &ip->i_gdquot, gdqp);
643 }
644 ip->i_d.di_gid = gid;
645 inode->i_gid = gid;
646 }
647 }
648
649 /*
650 * Change file access modes.
651 */
652 if (mask & ATTR_MODE) {
653 umode_t mode = iattr->ia_mode;
654
655 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
656 mode &= ~S_ISGID;
657
658 ip->i_d.di_mode &= S_IFMT;
659 ip->i_d.di_mode |= mode & ~S_IFMT;
660
661 inode->i_mode &= S_IFMT;
662 inode->i_mode |= mode & ~S_IFMT;
663 }
664
665 /*
666 * Change file access or modified times.
667 */
668 if (mask & ATTR_ATIME) {
669 inode->i_atime = iattr->ia_atime;
670 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
671 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
672 ip->i_update_core = 1;
673 }
674 if (mask & ATTR_CTIME) {
675 inode->i_ctime = iattr->ia_ctime;
676 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
677 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
678 ip->i_update_core = 1;
679 }
680 if (mask & ATTR_MTIME) {
681 inode->i_mtime = iattr->ia_mtime;
682 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
683 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
684 ip->i_update_core = 1;
685 }
686
687 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
688
689 XFS_STATS_INC(xs_ig_attrchg);
690
691 if (mp->m_flags & XFS_MOUNT_WSYNC)
692 xfs_trans_set_sync(tp);
693 error = xfs_trans_commit(tp, 0);
694
695 xfs_iunlock(ip, XFS_ILOCK_EXCL);
696
697 /*
698 * Release any dquot(s) the inode had kept before chown.
699 */
700 xfs_qm_dqrele(olddquot1);
701 xfs_qm_dqrele(olddquot2);
702 xfs_qm_dqrele(udqp);
703 xfs_qm_dqrele(gdqp);
704
705 if (error)
706 return XFS_ERROR(error);
707
708 /*
709 * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
710 * update. We could avoid this with linked transactions
711 * and passing down the transaction pointer all the way
712 * to attr_set. No previous user of the generic
713 * Posix ACL code seems to care about this issue either.
714 */
715 if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
716 error = -xfs_acl_chmod(inode);
717 if (error)
718 return XFS_ERROR(error);
719 }
720
721 return 0;
722
723out_trans_cancel:
724 xfs_trans_cancel(tp, 0);
725 xfs_iunlock(ip, XFS_ILOCK_EXCL);
726out_dqrele:
727 xfs_qm_dqrele(udqp);
728 xfs_qm_dqrele(gdqp);
729 return error;
730}
731
732/*
733 * Truncate file. Must have write permission and not be a directory.
734 */
735int
736xfs_setattr_size(
737 struct xfs_inode *ip,
738 struct iattr *iattr,
739 int flags)
740{
741 struct xfs_mount *mp = ip->i_mount;
742 struct inode *inode = VFS_I(ip);
743 int mask = iattr->ia_valid;
744 struct xfs_trans *tp;
745 int error;
746 uint lock_flags;
747 uint commit_flags = 0;
748
749 trace_xfs_setattr(ip);
750
751 if (mp->m_flags & XFS_MOUNT_RDONLY)
752 return XFS_ERROR(EROFS);
753
754 if (XFS_FORCED_SHUTDOWN(mp))
755 return XFS_ERROR(EIO);
756
757 error = -inode_change_ok(inode, iattr);
758 if (error)
759 return XFS_ERROR(error);
760
761 ASSERT(S_ISREG(ip->i_d.di_mode));
762 ASSERT((mask & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
763 ATTR_MTIME_SET|ATTR_KILL_SUID|ATTR_KILL_SGID|
764 ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
765
766 lock_flags = XFS_ILOCK_EXCL;
767 if (!(flags & XFS_ATTR_NOLOCK))
768 lock_flags |= XFS_IOLOCK_EXCL;
769 xfs_ilock(ip, lock_flags);
770
771 /*
772 * Short circuit the truncate case for zero length files.
773 */
774 if (iattr->ia_size == 0 &&
775 ip->i_size == 0 && ip->i_d.di_nextents == 0) {
776 if (!(mask & (ATTR_CTIME|ATTR_MTIME)))
777 goto out_unlock;
778
779 /*
780 * Use the regular setattr path to update the timestamps.
781 */
782 xfs_iunlock(ip, lock_flags);
783 iattr->ia_valid &= ~ATTR_SIZE;
784 return xfs_setattr_nonsize(ip, iattr, 0);
785 }
786
787 /*
788 * Make sure that the dquots are attached to the inode.
789 */
790 error = xfs_qm_dqattach_locked(ip, 0);
791 if (error)
792 goto out_unlock;
793
794 /*
795 * Now we can make the changes. Before we join the inode to the
796 * transaction, take care of the part of the truncation that must be
797 * done without the inode lock. This needs to be done before joining
798 * the inode to the transaction, because the inode cannot be unlocked
799 * once it is a part of the transaction.
800 */
801 if (iattr->ia_size > ip->i_size) {
802 /*
803 * Do the first part of growing a file: zero any data in the
804 * last block that is beyond the old EOF. We need to do this
805 * before the inode is joined to the transaction to modify
806 * i_size.
807 */
808 error = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
809 if (error)
810 goto out_unlock;
811 }
812 xfs_iunlock(ip, XFS_ILOCK_EXCL);
813 lock_flags &= ~XFS_ILOCK_EXCL;
814
815 /*
816 * We are going to log the inode size change in this transaction so
817 * any previous writes that are beyond the on disk EOF and the new
818 * EOF that have not been written out need to be written here. If we
819 * do not write the data out, we expose ourselves to the null files
820 * problem.
821 *
822 * Only flush from the on disk size to the smaller of the in memory
823 * file size or the new size as that's the range we really care about
824 * here and prevents waiting for other data not within the range we
825 * care about here.
826 */
827 if (ip->i_size != ip->i_d.di_size && iattr->ia_size > ip->i_d.di_size) {
828 error = xfs_flush_pages(ip, ip->i_d.di_size, iattr->ia_size,
829 XBF_ASYNC, FI_NONE);
830 if (error)
831 goto out_unlock;
832 }
833
834 /*
835 * Wait for all I/O to complete.
836 */
837 xfs_ioend_wait(ip);
838
839 error = -block_truncate_page(inode->i_mapping, iattr->ia_size,
840 xfs_get_blocks);
841 if (error)
842 goto out_unlock;
843
844 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
845 error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
846 XFS_TRANS_PERM_LOG_RES,
847 XFS_ITRUNCATE_LOG_COUNT);
848 if (error)
849 goto out_trans_cancel;
850
851 truncate_setsize(inode, iattr->ia_size);
852
853 commit_flags = XFS_TRANS_RELEASE_LOG_RES;
854 lock_flags |= XFS_ILOCK_EXCL;
855
856 xfs_ilock(ip, XFS_ILOCK_EXCL);
857
858 xfs_trans_ijoin(tp, ip);
859
860 /*
861 * Only change the c/mtime if we are changing the size or we are
862 * explicitly asked to change it. This handles the semantic difference
863 * between truncate() and ftruncate() as implemented in the VFS.
864 *
865 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
866 * special case where we need to update the times despite not having
867 * these flags set. For all other operations the VFS set these flags
868 * explicitly if it wants a timestamp update.
869 */
870 if (iattr->ia_size != ip->i_size &&
871 (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
872 iattr->ia_ctime = iattr->ia_mtime =
873 current_fs_time(inode->i_sb);
874 mask |= ATTR_CTIME | ATTR_MTIME;
875 }
876
877 if (iattr->ia_size > ip->i_size) {
878 ip->i_d.di_size = iattr->ia_size;
879 ip->i_size = iattr->ia_size;
880 } else if (iattr->ia_size <= ip->i_size ||
881 (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
882 error = xfs_itruncate_data(&tp, ip, iattr->ia_size);
883 if (error)
884 goto out_trans_abort;
885
886 /*
887 * Truncated "down", so we're removing references to old data
888 * here - if we delay flushing for a long time, we expose
889 * ourselves unduly to the notorious NULL files problem. So,
890 * we mark this inode and flush it when the file is closed,
891 * and do not wait the usual (long) time for writeout.
892 */
893 xfs_iflags_set(ip, XFS_ITRUNCATED);
894 }
895
896 if (mask & ATTR_CTIME) {
897 inode->i_ctime = iattr->ia_ctime;
898 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
899 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
900 ip->i_update_core = 1;
901 }
902 if (mask & ATTR_MTIME) {
903 inode->i_mtime = iattr->ia_mtime;
904 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
905 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
906 ip->i_update_core = 1;
907 }
908
909 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
910
911 XFS_STATS_INC(xs_ig_attrchg);
912
913 if (mp->m_flags & XFS_MOUNT_WSYNC)
914 xfs_trans_set_sync(tp);
915
916 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
917out_unlock:
918 if (lock_flags)
919 xfs_iunlock(ip, lock_flags);
920 return error;
921
922out_trans_abort:
923 commit_flags |= XFS_TRANS_ABORT;
924out_trans_cancel:
925 xfs_trans_cancel(tp, commit_flags);
926 goto out_unlock;
927}
928
500STATIC int 929STATIC int
501xfs_vn_setattr( 930xfs_vn_setattr(
502 struct dentry *dentry, 931 struct dentry *dentry,
503 struct iattr *iattr) 932 struct iattr *iattr)
504{ 933{
505 return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); 934 if (iattr->ia_valid & ATTR_SIZE)
935 return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0);
936 return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
506} 937}
507 938
508#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) 939#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 8633521b3b2e..d42f814e4d35 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -33,7 +33,6 @@
33#endif 33#endif
34 34
35#include <xfs_types.h> 35#include <xfs_types.h>
36#include <xfs_arch.h>
37 36
38#include <kmem.h> 37#include <kmem.h>
39#include <mrlock.h> 38#include <mrlock.h>
@@ -88,6 +87,12 @@
88#include <xfs_buf.h> 87#include <xfs_buf.h>
89#include <xfs_message.h> 88#include <xfs_message.h>
90 89
90#ifdef __BIG_ENDIAN
91#define XFS_NATIVE_HOST 1
92#else
93#undef XFS_NATIVE_HOST
94#endif
95
91/* 96/*
92 * Feature macros (disable/enable) 97 * Feature macros (disable/enable)
93 */ 98 */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a1a881e68a9a..9a72dda58bd0 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -33,7 +33,6 @@
33#include "xfs_dinode.h" 33#include "xfs_dinode.h"
34#include "xfs_inode.h" 34#include "xfs_inode.h"
35#include "xfs_btree.h" 35#include "xfs_btree.h"
36#include "xfs_btree_trace.h"
37#include "xfs_ialloc.h" 36#include "xfs_ialloc.h"
38#include "xfs_bmap.h" 37#include "xfs_bmap.h"
39#include "xfs_rtalloc.h" 38#include "xfs_rtalloc.h"
@@ -1025,11 +1024,6 @@ xfs_fs_put_super(
1025{ 1024{
1026 struct xfs_mount *mp = XFS_M(sb); 1025 struct xfs_mount *mp = XFS_M(sb);
1027 1026
1028 /*
1029 * Unregister the memory shrinker before we tear down the mount
1030 * structure so we don't have memory reclaim racing with us here.
1031 */
1032 xfs_inode_shrinker_unregister(mp);
1033 xfs_syncd_stop(mp); 1027 xfs_syncd_stop(mp);
1034 1028
1035 /* 1029 /*
@@ -1412,36 +1406,31 @@ xfs_fs_fill_super(
1412 sb->s_time_gran = 1; 1406 sb->s_time_gran = 1;
1413 set_posix_acl_flag(sb); 1407 set_posix_acl_flag(sb);
1414 1408
1415 error = xfs_syncd_init(mp); 1409 error = xfs_mountfs(mp);
1416 if (error) 1410 if (error)
1417 goto out_filestream_unmount; 1411 goto out_filestream_unmount;
1418 1412
1419 xfs_inode_shrinker_register(mp); 1413 error = xfs_syncd_init(mp);
1420
1421 error = xfs_mountfs(mp);
1422 if (error) 1414 if (error)
1423 goto out_syncd_stop; 1415 goto out_unmount;
1424 1416
1425 root = igrab(VFS_I(mp->m_rootip)); 1417 root = igrab(VFS_I(mp->m_rootip));
1426 if (!root) { 1418 if (!root) {
1427 error = ENOENT; 1419 error = ENOENT;
1428 goto fail_unmount; 1420 goto out_syncd_stop;
1429 } 1421 }
1430 if (is_bad_inode(root)) { 1422 if (is_bad_inode(root)) {
1431 error = EINVAL; 1423 error = EINVAL;
1432 goto fail_vnrele; 1424 goto out_syncd_stop;
1433 } 1425 }
1434 sb->s_root = d_alloc_root(root); 1426 sb->s_root = d_alloc_root(root);
1435 if (!sb->s_root) { 1427 if (!sb->s_root) {
1436 error = ENOMEM; 1428 error = ENOMEM;
1437 goto fail_vnrele; 1429 goto out_iput;
1438 } 1430 }
1439 1431
1440 return 0; 1432 return 0;
1441 1433
1442 out_syncd_stop:
1443 xfs_inode_shrinker_unregister(mp);
1444 xfs_syncd_stop(mp);
1445 out_filestream_unmount: 1434 out_filestream_unmount:
1446 xfs_filestream_unmount(mp); 1435 xfs_filestream_unmount(mp);
1447 out_free_sb: 1436 out_free_sb:
@@ -1456,18 +1445,11 @@ xfs_fs_fill_super(
1456 out: 1445 out:
1457 return -error; 1446 return -error;
1458 1447
1459 fail_vnrele: 1448 out_iput:
1460 if (sb->s_root) { 1449 iput(root);
1461 dput(sb->s_root); 1450 out_syncd_stop:
1462 sb->s_root = NULL;
1463 } else {
1464 iput(root);
1465 }
1466
1467 fail_unmount:
1468 xfs_inode_shrinker_unregister(mp);
1469 xfs_syncd_stop(mp); 1451 xfs_syncd_stop(mp);
1470 1452 out_unmount:
1471 /* 1453 /*
1472 * Blow away any referenced inode in the filestreams cache. 1454 * Blow away any referenced inode in the filestreams cache.
1473 * This can and will cause log traffic as inodes go inactive 1455 * This can and will cause log traffic as inodes go inactive
@@ -1491,6 +1473,21 @@ xfs_fs_mount(
1491 return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); 1473 return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
1492} 1474}
1493 1475
1476static int
1477xfs_fs_nr_cached_objects(
1478 struct super_block *sb)
1479{
1480 return xfs_reclaim_inodes_count(XFS_M(sb));
1481}
1482
1483static void
1484xfs_fs_free_cached_objects(
1485 struct super_block *sb,
1486 int nr_to_scan)
1487{
1488 xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
1489}
1490
1494static const struct super_operations xfs_super_operations = { 1491static const struct super_operations xfs_super_operations = {
1495 .alloc_inode = xfs_fs_alloc_inode, 1492 .alloc_inode = xfs_fs_alloc_inode,
1496 .destroy_inode = xfs_fs_destroy_inode, 1493 .destroy_inode = xfs_fs_destroy_inode,
@@ -1504,6 +1501,8 @@ static const struct super_operations xfs_super_operations = {
1504 .statfs = xfs_fs_statfs, 1501 .statfs = xfs_fs_statfs,
1505 .remount_fs = xfs_fs_remount, 1502 .remount_fs = xfs_fs_remount,
1506 .show_options = xfs_fs_show_options, 1503 .show_options = xfs_fs_show_options,
1504 .nr_cached_objects = xfs_fs_nr_cached_objects,
1505 .free_cached_objects = xfs_fs_free_cached_objects,
1507}; 1506};
1508 1507
1509static struct file_system_type xfs_fs_type = { 1508static struct file_system_type xfs_fs_type = {
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 8ecad5ff9f9b..e4c938afb910 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -179,6 +179,8 @@ restart:
179 if (error == EFSCORRUPTED) 179 if (error == EFSCORRUPTED)
180 break; 180 break;
181 181
182 cond_resched();
183
182 } while (nr_found && !done); 184 } while (nr_found && !done);
183 185
184 if (skipped) { 186 if (skipped) {
@@ -359,14 +361,12 @@ xfs_quiesce_data(
359{ 361{
360 int error, error2 = 0; 362 int error, error2 = 0;
361 363
362 /* push non-blocking */
363 xfs_sync_data(mp, 0);
364 xfs_qm_sync(mp, SYNC_TRYLOCK); 364 xfs_qm_sync(mp, SYNC_TRYLOCK);
365
366 /* push and block till complete */
367 xfs_sync_data(mp, SYNC_WAIT);
368 xfs_qm_sync(mp, SYNC_WAIT); 365 xfs_qm_sync(mp, SYNC_WAIT);
369 366
367 /* force out the newly dirtied log buffers */
368 xfs_log_force(mp, XFS_LOG_SYNC);
369
370 /* write superblock and hoover up shutdown errors */ 370 /* write superblock and hoover up shutdown errors */
371 error = xfs_sync_fsdata(mp); 371 error = xfs_sync_fsdata(mp);
372 372
@@ -436,7 +436,7 @@ xfs_quiesce_attr(
436 WARN_ON(atomic_read(&mp->m_active_trans) != 0); 436 WARN_ON(atomic_read(&mp->m_active_trans) != 0);
437 437
438 /* Push the superblock and write an unmount record */ 438 /* Push the superblock and write an unmount record */
439 error = xfs_log_sbcount(mp, 1); 439 error = xfs_log_sbcount(mp);
440 if (error) 440 if (error)
441 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. " 441 xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
442 "Frozen image may not be consistent."); 442 "Frozen image may not be consistent.");
@@ -986,6 +986,8 @@ restart:
986 986
987 *nr_to_scan -= XFS_LOOKUP_BATCH; 987 *nr_to_scan -= XFS_LOOKUP_BATCH;
988 988
989 cond_resched();
990
989 } while (nr_found && !done && *nr_to_scan > 0); 991 } while (nr_found && !done && *nr_to_scan > 0);
990 992
991 if (trylock && !done) 993 if (trylock && !done)
@@ -1003,7 +1005,7 @@ restart:
1003 * ensure that when we get more reclaimers than AGs we block rather 1005 * ensure that when we get more reclaimers than AGs we block rather
1004 * than spin trying to execute reclaim. 1006 * than spin trying to execute reclaim.
1005 */ 1007 */
1006 if (trylock && skipped && *nr_to_scan > 0) { 1008 if (skipped && (flags & SYNC_WAIT) && *nr_to_scan > 0) {
1007 trylock = 0; 1009 trylock = 0;
1008 goto restart; 1010 goto restart;
1009 } 1011 }
@@ -1021,44 +1023,38 @@ xfs_reclaim_inodes(
1021} 1023}
1022 1024
1023/* 1025/*
1024 * Inode cache shrinker. 1026 * Scan a certain number of inodes for reclaim.
1025 * 1027 *
1026 * When called we make sure that there is a background (fast) inode reclaim in 1028 * When called we make sure that there is a background (fast) inode reclaim in
1027 * progress, while we will throttle the speed of reclaim via doiing synchronous 1029 * progress, while we will throttle the speed of reclaim via doing synchronous
1028 * reclaim of inodes. That means if we come across dirty inodes, we wait for 1030 * reclaim of inodes. That means if we come across dirty inodes, we wait for
1029 * them to be cleaned, which we hope will not be very long due to the 1031 * them to be cleaned, which we hope will not be very long due to the
1030 * background walker having already kicked the IO off on those dirty inodes. 1032 * background walker having already kicked the IO off on those dirty inodes.
1031 */ 1033 */
1032static int 1034void
1033xfs_reclaim_inode_shrink( 1035xfs_reclaim_inodes_nr(
1034 struct shrinker *shrink, 1036 struct xfs_mount *mp,
1035 struct shrink_control *sc) 1037 int nr_to_scan)
1036{ 1038{
1037 struct xfs_mount *mp; 1039 /* kick background reclaimer and push the AIL */
1038 struct xfs_perag *pag; 1040 xfs_syncd_queue_reclaim(mp);
1039 xfs_agnumber_t ag; 1041 xfs_ail_push_all(mp->m_ail);
1040 int reclaimable;
1041 int nr_to_scan = sc->nr_to_scan;
1042 gfp_t gfp_mask = sc->gfp_mask;
1043
1044 mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
1045 if (nr_to_scan) {
1046 /* kick background reclaimer and push the AIL */
1047 xfs_syncd_queue_reclaim(mp);
1048 xfs_ail_push_all(mp->m_ail);
1049 1042
1050 if (!(gfp_mask & __GFP_FS)) 1043 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
1051 return -1; 1044}
1052 1045
1053 xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, 1046/*
1054 &nr_to_scan); 1047 * Return the number of reclaimable inodes in the filesystem for
1055 /* terminate if we don't exhaust the scan */ 1048 * the shrinker to determine how much to reclaim.
1056 if (nr_to_scan > 0) 1049 */
1057 return -1; 1050int
1058 } 1051xfs_reclaim_inodes_count(
1052 struct xfs_mount *mp)
1053{
1054 struct xfs_perag *pag;
1055 xfs_agnumber_t ag = 0;
1056 int reclaimable = 0;
1059 1057
1060 reclaimable = 0;
1061 ag = 0;
1062 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { 1058 while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
1063 ag = pag->pag_agno + 1; 1059 ag = pag->pag_agno + 1;
1064 reclaimable += pag->pag_ici_reclaimable; 1060 reclaimable += pag->pag_ici_reclaimable;
@@ -1067,18 +1063,3 @@ xfs_reclaim_inode_shrink(
1067 return reclaimable; 1063 return reclaimable;
1068} 1064}
1069 1065
1070void
1071xfs_inode_shrinker_register(
1072 struct xfs_mount *mp)
1073{
1074 mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
1075 mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
1076 register_shrinker(&mp->m_inode_shrink);
1077}
1078
1079void
1080xfs_inode_shrinker_unregister(
1081 struct xfs_mount *mp)
1082{
1083 unregister_shrinker(&mp->m_inode_shrink);
1084}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index e3a6ad27415f..941202e7ac6e 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -21,14 +21,6 @@
21struct xfs_mount; 21struct xfs_mount;
22struct xfs_perag; 22struct xfs_perag;
23 23
24typedef struct xfs_sync_work {
25 struct list_head w_list;
26 struct xfs_mount *w_mount;
27 void *w_data; /* syncer routine argument */
28 void (*w_syncer)(struct xfs_mount *, void *);
29 struct completion *w_completion;
30} xfs_sync_work_t;
31
32#define SYNC_WAIT 0x0001 /* wait for i/o to complete */ 24#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
33#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */ 25#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
34 26
@@ -43,6 +35,8 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
43void xfs_flush_inodes(struct xfs_inode *ip); 35void xfs_flush_inodes(struct xfs_inode *ip);
44 36
45int xfs_reclaim_inodes(struct xfs_mount *mp, int mode); 37int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
38int xfs_reclaim_inodes_count(struct xfs_mount *mp);
39void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
46 40
47void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); 41void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
48void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); 42void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
@@ -54,7 +48,4 @@ int xfs_inode_ag_iterator(struct xfs_mount *mp,
54 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), 48 int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
55 int flags); 49 int flags);
56 50
57void xfs_inode_shrinker_register(struct xfs_mount *mp);
58void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
59
60#endif 51#endif
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index d48b7a579ae1..fda0708ef2ea 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -293,7 +293,7 @@ DECLARE_EVENT_CLASS(xfs_buf_class,
293 __entry->buffer_length = bp->b_buffer_length; 293 __entry->buffer_length = bp->b_buffer_length;
294 __entry->hold = atomic_read(&bp->b_hold); 294 __entry->hold = atomic_read(&bp->b_hold);
295 __entry->pincount = atomic_read(&bp->b_pin_count); 295 __entry->pincount = atomic_read(&bp->b_pin_count);
296 __entry->lockval = xfs_buf_lock_value(bp); 296 __entry->lockval = bp->b_sema.count;
297 __entry->flags = bp->b_flags; 297 __entry->flags = bp->b_flags;
298 __entry->caller_ip = caller_ip; 298 __entry->caller_ip = caller_ip;
299 ), 299 ),
@@ -323,7 +323,7 @@ DEFINE_BUF_EVENT(xfs_buf_bawrite);
323DEFINE_BUF_EVENT(xfs_buf_bdwrite); 323DEFINE_BUF_EVENT(xfs_buf_bdwrite);
324DEFINE_BUF_EVENT(xfs_buf_lock); 324DEFINE_BUF_EVENT(xfs_buf_lock);
325DEFINE_BUF_EVENT(xfs_buf_lock_done); 325DEFINE_BUF_EVENT(xfs_buf_lock_done);
326DEFINE_BUF_EVENT(xfs_buf_cond_lock); 326DEFINE_BUF_EVENT(xfs_buf_trylock);
327DEFINE_BUF_EVENT(xfs_buf_unlock); 327DEFINE_BUF_EVENT(xfs_buf_unlock);
328DEFINE_BUF_EVENT(xfs_buf_iowait); 328DEFINE_BUF_EVENT(xfs_buf_iowait);
329DEFINE_BUF_EVENT(xfs_buf_iowait_done); 329DEFINE_BUF_EVENT(xfs_buf_iowait_done);
@@ -366,7 +366,7 @@ DECLARE_EVENT_CLASS(xfs_buf_flags_class,
366 __entry->flags = flags; 366 __entry->flags = flags;
367 __entry->hold = atomic_read(&bp->b_hold); 367 __entry->hold = atomic_read(&bp->b_hold);
368 __entry->pincount = atomic_read(&bp->b_pin_count); 368 __entry->pincount = atomic_read(&bp->b_pin_count);
369 __entry->lockval = xfs_buf_lock_value(bp); 369 __entry->lockval = bp->b_sema.count;
370 __entry->caller_ip = caller_ip; 370 __entry->caller_ip = caller_ip;
371 ), 371 ),
372 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " 372 TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
@@ -409,7 +409,7 @@ TRACE_EVENT(xfs_buf_ioerror,
409 __entry->buffer_length = bp->b_buffer_length; 409 __entry->buffer_length = bp->b_buffer_length;
410 __entry->hold = atomic_read(&bp->b_hold); 410 __entry->hold = atomic_read(&bp->b_hold);
411 __entry->pincount = atomic_read(&bp->b_pin_count); 411 __entry->pincount = atomic_read(&bp->b_pin_count);
412 __entry->lockval = xfs_buf_lock_value(bp); 412 __entry->lockval = bp->b_sema.count;
413 __entry->error = error; 413 __entry->error = error;
414 __entry->flags = bp->b_flags; 414 __entry->flags = bp->b_flags;
415 __entry->caller_ip = caller_ip; 415 __entry->caller_ip = caller_ip;
@@ -454,7 +454,7 @@ DECLARE_EVENT_CLASS(xfs_buf_item_class,
454 __entry->buf_flags = bip->bli_buf->b_flags; 454 __entry->buf_flags = bip->bli_buf->b_flags;
455 __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); 455 __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
456 __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count); 456 __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
457 __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf); 457 __entry->buf_lockval = bip->bli_buf->b_sema.count;
458 __entry->li_desc = bip->bli_item.li_desc; 458 __entry->li_desc = bip->bli_item.li_desc;
459 __entry->li_flags = bip->bli_item.li_flags; 459 __entry->li_flags = bip->bli_item.li_flags;
460 ), 460 ),
@@ -998,7 +998,8 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
998 TP_STRUCT__entry( 998 TP_STRUCT__entry(
999 __field(dev_t, dev) 999 __field(dev_t, dev)
1000 __field(xfs_ino_t, ino) 1000 __field(xfs_ino_t, ino)
1001 __field(loff_t, size) 1001 __field(loff_t, isize)
1002 __field(loff_t, disize)
1002 __field(loff_t, new_size) 1003 __field(loff_t, new_size)
1003 __field(loff_t, offset) 1004 __field(loff_t, offset)
1004 __field(size_t, count) 1005 __field(size_t, count)
@@ -1006,16 +1007,18 @@ DECLARE_EVENT_CLASS(xfs_simple_io_class,
1006 TP_fast_assign( 1007 TP_fast_assign(
1007 __entry->dev = VFS_I(ip)->i_sb->s_dev; 1008 __entry->dev = VFS_I(ip)->i_sb->s_dev;
1008 __entry->ino = ip->i_ino; 1009 __entry->ino = ip->i_ino;
1009 __entry->size = ip->i_d.di_size; 1010 __entry->isize = ip->i_size;
1011 __entry->disize = ip->i_d.di_size;
1010 __entry->new_size = ip->i_new_size; 1012 __entry->new_size = ip->i_new_size;
1011 __entry->offset = offset; 1013 __entry->offset = offset;
1012 __entry->count = count; 1014 __entry->count = count;
1013 ), 1015 ),
1014 TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " 1016 TP_printk("dev %d:%d ino 0x%llx isize 0x%llx disize 0x%llx new_size 0x%llx "
1015 "offset 0x%llx count %zd", 1017 "offset 0x%llx count %zd",
1016 MAJOR(__entry->dev), MINOR(__entry->dev), 1018 MAJOR(__entry->dev), MINOR(__entry->dev),
1017 __entry->ino, 1019 __entry->ino,
1018 __entry->size, 1020 __entry->isize,
1021 __entry->disize,
1019 __entry->new_size, 1022 __entry->new_size,
1020 __entry->offset, 1023 __entry->offset,
1021 __entry->count) 1024 __entry->count)
@@ -1028,40 +1031,7 @@ DEFINE_EVENT(xfs_simple_io_class, name, \
1028DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc); 1031DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
1029DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); 1032DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
1030DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); 1033DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
1031 1034DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
1032
1033TRACE_EVENT(xfs_itruncate_start,
1034 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size, int flag,
1035 xfs_off_t toss_start, xfs_off_t toss_finish),
1036 TP_ARGS(ip, new_size, flag, toss_start, toss_finish),
1037 TP_STRUCT__entry(
1038 __field(dev_t, dev)
1039 __field(xfs_ino_t, ino)
1040 __field(xfs_fsize_t, size)
1041 __field(xfs_fsize_t, new_size)
1042 __field(xfs_off_t, toss_start)
1043 __field(xfs_off_t, toss_finish)
1044 __field(int, flag)
1045 ),
1046 TP_fast_assign(
1047 __entry->dev = VFS_I(ip)->i_sb->s_dev;
1048 __entry->ino = ip->i_ino;
1049 __entry->size = ip->i_d.di_size;
1050 __entry->new_size = new_size;
1051 __entry->toss_start = toss_start;
1052 __entry->toss_finish = toss_finish;
1053 __entry->flag = flag;
1054 ),
1055 TP_printk("dev %d:%d ino 0x%llx %s size 0x%llx new_size 0x%llx "
1056 "toss start 0x%llx toss finish 0x%llx",
1057 MAJOR(__entry->dev), MINOR(__entry->dev),
1058 __entry->ino,
1059 __print_flags(__entry->flag, "|", XFS_ITRUNC_FLAGS),
1060 __entry->size,
1061 __entry->new_size,
1062 __entry->toss_start,
1063 __entry->toss_finish)
1064);
1065 1035
1066DECLARE_EVENT_CLASS(xfs_itrunc_class, 1036DECLARE_EVENT_CLASS(xfs_itrunc_class,
1067 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), 1037 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
@@ -1089,8 +1059,8 @@ DECLARE_EVENT_CLASS(xfs_itrunc_class,
1089DEFINE_EVENT(xfs_itrunc_class, name, \ 1059DEFINE_EVENT(xfs_itrunc_class, name, \
1090 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \ 1060 TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
1091 TP_ARGS(ip, new_size)) 1061 TP_ARGS(ip, new_size))
1092DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start); 1062DEFINE_ITRUNC_EVENT(xfs_itruncate_data_start);
1093DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end); 1063DEFINE_ITRUNC_EVENT(xfs_itruncate_data_end);
1094 1064
1095TRACE_EVENT(xfs_pagecache_inval, 1065TRACE_EVENT(xfs_pagecache_inval,
1096 TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish), 1066 TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 6fa214603819..837f31158d43 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -220,7 +220,7 @@ xfs_qm_adjust_dqtimers(
220{ 220{
221 ASSERT(d->d_id); 221 ASSERT(d->d_id);
222 222
223#ifdef QUOTADEBUG 223#ifdef DEBUG
224 if (d->d_blk_hardlimit) 224 if (d->d_blk_hardlimit)
225 ASSERT(be64_to_cpu(d->d_blk_softlimit) <= 225 ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
226 be64_to_cpu(d->d_blk_hardlimit)); 226 be64_to_cpu(d->d_blk_hardlimit));
@@ -231,6 +231,7 @@ xfs_qm_adjust_dqtimers(
231 ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= 231 ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
232 be64_to_cpu(d->d_rtb_hardlimit)); 232 be64_to_cpu(d->d_rtb_hardlimit));
233#endif 233#endif
234
234 if (!d->d_btimer) { 235 if (!d->d_btimer) {
235 if ((d->d_blk_softlimit && 236 if ((d->d_blk_softlimit &&
236 (be64_to_cpu(d->d_bcount) >= 237 (be64_to_cpu(d->d_bcount) >=
@@ -318,7 +319,7 @@ xfs_qm_init_dquot_blk(
318 319
319 ASSERT(tp); 320 ASSERT(tp);
320 ASSERT(XFS_BUF_ISBUSY(bp)); 321 ASSERT(XFS_BUF_ISBUSY(bp));
321 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 322 ASSERT(xfs_buf_islocked(bp));
322 323
323 d = (xfs_dqblk_t *)XFS_BUF_PTR(bp); 324 d = (xfs_dqblk_t *)XFS_BUF_PTR(bp);
324 325
@@ -534,7 +535,7 @@ xfs_qm_dqtobp(
534 } 535 }
535 536
536 ASSERT(XFS_BUF_ISBUSY(bp)); 537 ASSERT(XFS_BUF_ISBUSY(bp));
537 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 538 ASSERT(xfs_buf_islocked(bp));
538 539
539 /* 540 /*
540 * calculate the location of the dquot inside the buffer. 541 * calculate the location of the dquot inside the buffer.
@@ -622,7 +623,7 @@ xfs_qm_dqread(
622 * brelse it because we have the changes incore. 623 * brelse it because we have the changes incore.
623 */ 624 */
624 ASSERT(XFS_BUF_ISBUSY(bp)); 625 ASSERT(XFS_BUF_ISBUSY(bp));
625 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 626 ASSERT(xfs_buf_islocked(bp));
626 xfs_trans_brelse(tp, bp); 627 xfs_trans_brelse(tp, bp);
627 628
628 return (error); 629 return (error);
@@ -1423,45 +1424,6 @@ xfs_qm_dqpurge(
1423} 1424}
1424 1425
1425 1426
1426#ifdef QUOTADEBUG
1427void
1428xfs_qm_dqprint(xfs_dquot_t *dqp)
1429{
1430 struct xfs_mount *mp = dqp->q_mount;
1431
1432 xfs_debug(mp, "-----------KERNEL DQUOT----------------");
1433 xfs_debug(mp, "---- dquotID = %d",
1434 (int)be32_to_cpu(dqp->q_core.d_id));
1435 xfs_debug(mp, "---- type = %s", DQFLAGTO_TYPESTR(dqp));
1436 xfs_debug(mp, "---- fs = 0x%p", dqp->q_mount);
1437 xfs_debug(mp, "---- blkno = 0x%x", (int) dqp->q_blkno);
1438 xfs_debug(mp, "---- boffset = 0x%x", (int) dqp->q_bufoffset);
1439 xfs_debug(mp, "---- blkhlimit = %Lu (0x%x)",
1440 be64_to_cpu(dqp->q_core.d_blk_hardlimit),
1441 (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit));
1442 xfs_debug(mp, "---- blkslimit = %Lu (0x%x)",
1443 be64_to_cpu(dqp->q_core.d_blk_softlimit),
1444 (int)be64_to_cpu(dqp->q_core.d_blk_softlimit));
1445 xfs_debug(mp, "---- inohlimit = %Lu (0x%x)",
1446 be64_to_cpu(dqp->q_core.d_ino_hardlimit),
1447 (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit));
1448 xfs_debug(mp, "---- inoslimit = %Lu (0x%x)",
1449 be64_to_cpu(dqp->q_core.d_ino_softlimit),
1450 (int)be64_to_cpu(dqp->q_core.d_ino_softlimit));
1451 xfs_debug(mp, "---- bcount = %Lu (0x%x)",
1452 be64_to_cpu(dqp->q_core.d_bcount),
1453 (int)be64_to_cpu(dqp->q_core.d_bcount));
1454 xfs_debug(mp, "---- icount = %Lu (0x%x)",
1455 be64_to_cpu(dqp->q_core.d_icount),
1456 (int)be64_to_cpu(dqp->q_core.d_icount));
1457 xfs_debug(mp, "---- btimer = %d",
1458 (int)be32_to_cpu(dqp->q_core.d_btimer));
1459 xfs_debug(mp, "---- itimer = %d",
1460 (int)be32_to_cpu(dqp->q_core.d_itimer));
1461 xfs_debug(mp, "---------------------------");
1462}
1463#endif
1464
1465/* 1427/*
1466 * Give the buffer a little push if it is incore and 1428 * Give the buffer a little push if it is incore and
1467 * wait on the flush lock. 1429 * wait on the flush lock.
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index 5da3a23b820d..34b7e945dbfa 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -116,12 +116,6 @@ static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
116 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \ 116 (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
117 (XFS_IS_OQUOTA_ON((d)->q_mount)))) 117 (XFS_IS_OQUOTA_ON((d)->q_mount))))
118 118
119#ifdef QUOTADEBUG
120extern void xfs_qm_dqprint(xfs_dquot_t *);
121#else
122#define xfs_qm_dqprint(a)
123#endif
124
125extern void xfs_qm_dqdestroy(xfs_dquot_t *); 119extern void xfs_qm_dqdestroy(xfs_dquot_t *);
126extern int xfs_qm_dqflush(xfs_dquot_t *, uint); 120extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
127extern int xfs_qm_dqpurge(xfs_dquot_t *); 121extern int xfs_qm_dqpurge(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index b94dace4e785..46e54ad9a2dc 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -67,32 +67,6 @@ static struct shrinker xfs_qm_shaker = {
67 .seeks = DEFAULT_SEEKS, 67 .seeks = DEFAULT_SEEKS,
68}; 68};
69 69
70#ifdef DEBUG
71extern struct mutex qcheck_lock;
72#endif
73
74#ifdef QUOTADEBUG
75static void
76xfs_qm_dquot_list_print(
77 struct xfs_mount *mp)
78{
79 xfs_dquot_t *dqp;
80 int i = 0;
81
82 list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) {
83 xfs_debug(mp, " %d. \"%d (%s)\" "
84 "bcnt = %lld, icnt = %lld, refs = %d",
85 i++, be32_to_cpu(dqp->q_core.d_id),
86 DQFLAGTO_TYPESTR(dqp),
87 (long long)be64_to_cpu(dqp->q_core.d_bcount),
88 (long long)be64_to_cpu(dqp->q_core.d_icount),
89 dqp->q_nrefs);
90 }
91}
92#else
93static void xfs_qm_dquot_list_print(struct xfs_mount *mp) { }
94#endif
95
96/* 70/*
97 * Initialize the XQM structure. 71 * Initialize the XQM structure.
98 * Note that there is not one quota manager per file system. 72 * Note that there is not one quota manager per file system.
@@ -165,9 +139,6 @@ xfs_Gqm_init(void)
165 atomic_set(&xqm->qm_totaldquots, 0); 139 atomic_set(&xqm->qm_totaldquots, 0);
166 xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO; 140 xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
167 xqm->qm_nrefs = 0; 141 xqm->qm_nrefs = 0;
168#ifdef DEBUG
169 mutex_init(&qcheck_lock);
170#endif
171 return xqm; 142 return xqm;
172 143
173 out_free_udqhash: 144 out_free_udqhash:
@@ -204,9 +175,6 @@ xfs_qm_destroy(
204 mutex_lock(&xqm->qm_dqfrlist_lock); 175 mutex_lock(&xqm->qm_dqfrlist_lock);
205 list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) { 176 list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
206 xfs_dqlock(dqp); 177 xfs_dqlock(dqp);
207#ifdef QUOTADEBUG
208 xfs_debug(dqp->q_mount, "FREELIST destroy 0x%p", dqp);
209#endif
210 list_del_init(&dqp->q_freelist); 178 list_del_init(&dqp->q_freelist);
211 xfs_Gqm->qm_dqfrlist_cnt--; 179 xfs_Gqm->qm_dqfrlist_cnt--;
212 xfs_dqunlock(dqp); 180 xfs_dqunlock(dqp);
@@ -214,9 +182,6 @@ xfs_qm_destroy(
214 } 182 }
215 mutex_unlock(&xqm->qm_dqfrlist_lock); 183 mutex_unlock(&xqm->qm_dqfrlist_lock);
216 mutex_destroy(&xqm->qm_dqfrlist_lock); 184 mutex_destroy(&xqm->qm_dqfrlist_lock);
217#ifdef DEBUG
218 mutex_destroy(&qcheck_lock);
219#endif
220 kmem_free(xqm); 185 kmem_free(xqm);
221} 186}
222 187
@@ -409,11 +374,6 @@ xfs_qm_mount_quotas(
409 xfs_warn(mp, "Failed to initialize disk quotas."); 374 xfs_warn(mp, "Failed to initialize disk quotas.");
410 return; 375 return;
411 } 376 }
412
413#ifdef QUOTADEBUG
414 if (XFS_IS_QUOTA_ON(mp))
415 xfs_qm_internalqcheck(mp);
416#endif
417} 377}
418 378
419/* 379/*
@@ -866,8 +826,8 @@ xfs_qm_dqattach_locked(
866 } 826 }
867 827
868 done: 828 done:
869#ifdef QUOTADEBUG 829#ifdef DEBUG
870 if (! error) { 830 if (!error) {
871 if (XFS_IS_UQUOTA_ON(mp)) 831 if (XFS_IS_UQUOTA_ON(mp))
872 ASSERT(ip->i_udquot); 832 ASSERT(ip->i_udquot);
873 if (XFS_IS_OQUOTA_ON(mp)) 833 if (XFS_IS_OQUOTA_ON(mp))
@@ -1733,8 +1693,6 @@ xfs_qm_quotacheck(
1733 mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD); 1693 mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
1734 mp->m_qflags |= flags; 1694 mp->m_qflags |= flags;
1735 1695
1736 xfs_qm_dquot_list_print(mp);
1737
1738 error_return: 1696 error_return:
1739 if (error) { 1697 if (error) {
1740 xfs_warn(mp, 1698 xfs_warn(mp,
@@ -2096,9 +2054,6 @@ xfs_qm_write_sb_changes(
2096 xfs_trans_t *tp; 2054 xfs_trans_t *tp;
2097 int error; 2055 int error;
2098 2056
2099#ifdef QUOTADEBUG
2100 xfs_notice(mp, "Writing superblock quota changes");
2101#endif
2102 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 2057 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
2103 if ((error = xfs_trans_reserve(tp, 0, 2058 if ((error = xfs_trans_reserve(tp, 0,
2104 mp->m_sb.sb_sectsize + 128, 0, 2059 mp->m_sb.sb_sectsize + 128, 0,
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index 567b29b9f1b3..43b9abe1052c 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -163,10 +163,4 @@ extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
163extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint); 163extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
164extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint); 164extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
165 165
166#ifdef DEBUG
167extern int xfs_qm_internalqcheck(xfs_mount_t *);
168#else
169#define xfs_qm_internalqcheck(mp) (0)
170#endif
171
172#endif /* __XFS_QM_H__ */ 166#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 2dadb15d5ca9..609246f42e6c 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -263,7 +263,7 @@ xfs_qm_scall_trunc_qfile(
263 xfs_ilock(ip, XFS_ILOCK_EXCL); 263 xfs_ilock(ip, XFS_ILOCK_EXCL);
264 xfs_trans_ijoin(tp, ip); 264 xfs_trans_ijoin(tp, ip);
265 265
266 error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1); 266 error = xfs_itruncate_data(&tp, ip, 0);
267 if (error) { 267 if (error) {
268 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | 268 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
269 XFS_TRANS_ABORT); 269 XFS_TRANS_ABORT);
@@ -622,7 +622,6 @@ xfs_qm_scall_setqlim(
622 xfs_trans_log_dquot(tp, dqp); 622 xfs_trans_log_dquot(tp, dqp);
623 623
624 error = xfs_trans_commit(tp, 0); 624 error = xfs_trans_commit(tp, 0);
625 xfs_qm_dqprint(dqp);
626 xfs_qm_dqrele(dqp); 625 xfs_qm_dqrele(dqp);
627 626
628 out_unlock: 627 out_unlock:
@@ -657,7 +656,6 @@ xfs_qm_scall_getquota(
657 xfs_qm_dqput(dqp); 656 xfs_qm_dqput(dqp);
658 return XFS_ERROR(ENOENT); 657 return XFS_ERROR(ENOENT);
659 } 658 }
660 /* xfs_qm_dqprint(dqp); */
661 /* 659 /*
662 * Convert the disk dquot to the exportable format 660 * Convert the disk dquot to the exportable format
663 */ 661 */
@@ -906,354 +904,3 @@ xfs_qm_dqrele_all_inodes(
906 ASSERT(mp->m_quotainfo); 904 ASSERT(mp->m_quotainfo);
907 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags); 905 xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
908} 906}
909
910/*------------------------------------------------------------------------*/
911#ifdef DEBUG
912/*
913 * This contains all the test functions for XFS disk quotas.
914 * Currently it does a quota accounting check. ie. it walks through
915 * all inodes in the file system, calculating the dquot accounting fields,
916 * and prints out any inconsistencies.
917 */
918xfs_dqhash_t *qmtest_udqtab;
919xfs_dqhash_t *qmtest_gdqtab;
920int qmtest_hashmask;
921int qmtest_nfails;
922struct mutex qcheck_lock;
923
924#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
925 (__psunsigned_t)(id)) & \
926 (qmtest_hashmask - 1))
927
928#define DQTEST_HASH(mp, id, type) ((type & XFS_DQ_USER) ? \
929 (qmtest_udqtab + \
930 DQTEST_HASHVAL(mp, id)) : \
931 (qmtest_gdqtab + \
932 DQTEST_HASHVAL(mp, id)))
933
934#define DQTEST_LIST_PRINT(l, NXT, title) \
935{ \
936 xfs_dqtest_t *dqp; int i = 0;\
937 xfs_debug(NULL, "%s (#%d)", title, (int) (l)->qh_nelems); \
938 for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \
939 dqp = (xfs_dqtest_t *)dqp->NXT) { \
940 xfs_debug(dqp->q_mount, \
941 " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \
942 ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \
943 dqp->d_bcount, dqp->d_icount); } \
944}
945
946typedef struct dqtest {
947 uint dq_flags; /* various flags (XFS_DQ_*) */
948 struct list_head q_hashlist;
949 xfs_dqhash_t *q_hash; /* the hashchain header */
950 xfs_mount_t *q_mount; /* filesystem this relates to */
951 xfs_dqid_t d_id; /* user id or group id */
952 xfs_qcnt_t d_bcount; /* # disk blocks owned by the user */
953 xfs_qcnt_t d_icount; /* # inodes owned by the user */
954} xfs_dqtest_t;
955
956STATIC void
957xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
958{
959 list_add(&dqp->q_hashlist, &h->qh_list);
960 h->qh_version++;
961 h->qh_nelems++;
962}
963STATIC void
964xfs_qm_dqtest_print(
965 struct xfs_mount *mp,
966 struct dqtest *d)
967{
968 xfs_debug(mp, "-----------DQTEST DQUOT----------------");
969 xfs_debug(mp, "---- dquot ID = %d", d->d_id);
970 xfs_debug(mp, "---- fs = 0x%p", d->q_mount);
971 xfs_debug(mp, "---- bcount = %Lu (0x%x)",
972 d->d_bcount, (int)d->d_bcount);
973 xfs_debug(mp, "---- icount = %Lu (0x%x)",
974 d->d_icount, (int)d->d_icount);
975 xfs_debug(mp, "---------------------------");
976}
977
978STATIC void
979xfs_qm_dqtest_failed(
980 xfs_dqtest_t *d,
981 xfs_dquot_t *dqp,
982 char *reason,
983 xfs_qcnt_t a,
984 xfs_qcnt_t b,
985 int error)
986{
987 qmtest_nfails++;
988 if (error)
989 xfs_debug(dqp->q_mount,
990 "quotacheck failed id=%d, err=%d\nreason: %s",
991 d->d_id, error, reason);
992 else
993 xfs_debug(dqp->q_mount,
994 "quotacheck failed id=%d (%s) [%d != %d]",
995 d->d_id, reason, (int)a, (int)b);
996 xfs_qm_dqtest_print(dqp->q_mount, d);
997 if (dqp)
998 xfs_qm_dqprint(dqp);
999}
1000
1001STATIC int
1002xfs_dqtest_cmp2(
1003 xfs_dqtest_t *d,
1004 xfs_dquot_t *dqp)
1005{
1006 int err = 0;
1007 if (be64_to_cpu(dqp->q_core.d_icount) != d->d_icount) {
1008 xfs_qm_dqtest_failed(d, dqp, "icount mismatch",
1009 be64_to_cpu(dqp->q_core.d_icount),
1010 d->d_icount, 0);
1011 err++;
1012 }
1013 if (be64_to_cpu(dqp->q_core.d_bcount) != d->d_bcount) {
1014 xfs_qm_dqtest_failed(d, dqp, "bcount mismatch",
1015 be64_to_cpu(dqp->q_core.d_bcount),
1016 d->d_bcount, 0);
1017 err++;
1018 }
1019 if (dqp->q_core.d_blk_softlimit &&
1020 be64_to_cpu(dqp->q_core.d_bcount) >=
1021 be64_to_cpu(dqp->q_core.d_blk_softlimit)) {
1022 if (!dqp->q_core.d_btimer && dqp->q_core.d_id) {
1023 xfs_debug(dqp->q_mount,
1024 "%d [%s] BLK TIMER NOT STARTED",
1025 d->d_id, DQFLAGTO_TYPESTR(d));
1026 err++;
1027 }
1028 }
1029 if (dqp->q_core.d_ino_softlimit &&
1030 be64_to_cpu(dqp->q_core.d_icount) >=
1031 be64_to_cpu(dqp->q_core.d_ino_softlimit)) {
1032 if (!dqp->q_core.d_itimer && dqp->q_core.d_id) {
1033 xfs_debug(dqp->q_mount,
1034 "%d [%s] INO TIMER NOT STARTED",
1035 d->d_id, DQFLAGTO_TYPESTR(d));
1036 err++;
1037 }
1038 }
1039#ifdef QUOTADEBUG
1040 if (!err) {
1041 xfs_debug(dqp->q_mount, "%d [%s] qchecked",
1042 d->d_id, DQFLAGTO_TYPESTR(d));
1043 }
1044#endif
1045 return (err);
1046}
1047
1048STATIC void
1049xfs_dqtest_cmp(
1050 xfs_dqtest_t *d)
1051{
1052 xfs_dquot_t *dqp;
1053 int error;
1054
1055 /* xfs_qm_dqtest_print(d); */
1056 if ((error = xfs_qm_dqget(d->q_mount, NULL, d->d_id, d->dq_flags, 0,
1057 &dqp))) {
1058 xfs_qm_dqtest_failed(d, NULL, "dqget failed", 0, 0, error);
1059 return;
1060 }
1061 xfs_dqtest_cmp2(d, dqp);
1062 xfs_qm_dqput(dqp);
1063}
1064
1065STATIC int
1066xfs_qm_internalqcheck_dqget(
1067 xfs_mount_t *mp,
1068 xfs_dqid_t id,
1069 uint type,
1070 xfs_dqtest_t **O_dq)
1071{
1072 xfs_dqtest_t *d;
1073 xfs_dqhash_t *h;
1074
1075 h = DQTEST_HASH(mp, id, type);
1076 list_for_each_entry(d, &h->qh_list, q_hashlist) {
1077 if (d->d_id == id && mp == d->q_mount) {
1078 *O_dq = d;
1079 return (0);
1080 }
1081 }
1082 d = kmem_zalloc(sizeof(xfs_dqtest_t), KM_SLEEP);
1083 d->dq_flags = type;
1084 d->d_id = id;
1085 d->q_mount = mp;
1086 d->q_hash = h;
1087 INIT_LIST_HEAD(&d->q_hashlist);
1088 xfs_qm_hashinsert(h, d);
1089 *O_dq = d;
1090 return (0);
1091}
1092
1093STATIC void
1094xfs_qm_internalqcheck_get_dquots(
1095 xfs_mount_t *mp,
1096 xfs_dqid_t uid,
1097 xfs_dqid_t projid,
1098 xfs_dqid_t gid,
1099 xfs_dqtest_t **ud,
1100 xfs_dqtest_t **gd)
1101{
1102 if (XFS_IS_UQUOTA_ON(mp))
1103 xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud);
1104 if (XFS_IS_GQUOTA_ON(mp))
1105 xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd);
1106 else if (XFS_IS_PQUOTA_ON(mp))
1107 xfs_qm_internalqcheck_dqget(mp, projid, XFS_DQ_PROJ, gd);
1108}
1109
1110
1111STATIC void
1112xfs_qm_internalqcheck_dqadjust(
1113 xfs_inode_t *ip,
1114 xfs_dqtest_t *d)
1115{
1116 d->d_icount++;
1117 d->d_bcount += (xfs_qcnt_t)ip->i_d.di_nblocks;
1118}
1119
1120STATIC int
1121xfs_qm_internalqcheck_adjust(
1122 xfs_mount_t *mp, /* mount point for filesystem */
1123 xfs_ino_t ino, /* inode number to get data for */
1124 void __user *buffer, /* not used */
1125 int ubsize, /* not used */
1126 int *ubused, /* not used */
1127 int *res) /* bulkstat result code */
1128{
1129 xfs_inode_t *ip;
1130 xfs_dqtest_t *ud, *gd;
1131 uint lock_flags;
1132 boolean_t ipreleased;
1133 int error;
1134
1135 ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1136
1137 if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1138 *res = BULKSTAT_RV_NOTHING;
1139 xfs_debug(mp, "%s: ino=%llu, uqino=%llu, gqino=%llu\n",
1140 __func__, (unsigned long long) ino,
1141 (unsigned long long) mp->m_sb.sb_uquotino,
1142 (unsigned long long) mp->m_sb.sb_gquotino);
1143 return XFS_ERROR(EINVAL);
1144 }
1145 ipreleased = B_FALSE;
1146 again:
1147 lock_flags = XFS_ILOCK_SHARED;
1148 if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) {
1149 *res = BULKSTAT_RV_NOTHING;
1150 return (error);
1151 }
1152
1153 /*
1154 * This inode can have blocks after eof which can get released
1155 * when we send it to inactive. Since we don't check the dquot
1156 * until the after all our calculations are done, we must get rid
1157 * of those now.
1158 */
1159 if (! ipreleased) {
1160 xfs_iunlock(ip, lock_flags);
1161 IRELE(ip);
1162 ipreleased = B_TRUE;
1163 goto again;
1164 }
1165 xfs_qm_internalqcheck_get_dquots(mp,
1166 (xfs_dqid_t) ip->i_d.di_uid,
1167 (xfs_dqid_t) xfs_get_projid(ip),
1168 (xfs_dqid_t) ip->i_d.di_gid,
1169 &ud, &gd);
1170 if (XFS_IS_UQUOTA_ON(mp)) {
1171 ASSERT(ud);
1172 xfs_qm_internalqcheck_dqadjust(ip, ud);
1173 }
1174 if (XFS_IS_OQUOTA_ON(mp)) {
1175 ASSERT(gd);
1176 xfs_qm_internalqcheck_dqadjust(ip, gd);
1177 }
1178 xfs_iunlock(ip, lock_flags);
1179 IRELE(ip);
1180 *res = BULKSTAT_RV_DIDONE;
1181 return (0);
1182}
1183
1184
1185/* PRIVATE, debugging */
1186int
1187xfs_qm_internalqcheck(
1188 xfs_mount_t *mp)
1189{
1190 xfs_ino_t lastino;
1191 int done, count;
1192 int i;
1193 int error;
1194
1195 lastino = 0;
1196 qmtest_hashmask = 32;
1197 count = 5;
1198 done = 0;
1199 qmtest_nfails = 0;
1200
1201 if (! XFS_IS_QUOTA_ON(mp))
1202 return XFS_ERROR(ESRCH);
1203
1204 xfs_log_force(mp, XFS_LOG_SYNC);
1205 XFS_bflush(mp->m_ddev_targp);
1206 xfs_log_force(mp, XFS_LOG_SYNC);
1207 XFS_bflush(mp->m_ddev_targp);
1208
1209 mutex_lock(&qcheck_lock);
1210 /* There should be absolutely no quota activity while this
1211 is going on. */
1212 qmtest_udqtab = kmem_zalloc(qmtest_hashmask *
1213 sizeof(xfs_dqhash_t), KM_SLEEP);
1214 qmtest_gdqtab = kmem_zalloc(qmtest_hashmask *
1215 sizeof(xfs_dqhash_t), KM_SLEEP);
1216 do {
1217 /*
1218 * Iterate thru all the inodes in the file system,
1219 * adjusting the corresponding dquot counters
1220 */
1221 error = xfs_bulkstat(mp, &lastino, &count,
1222 xfs_qm_internalqcheck_adjust,
1223 0, NULL, &done);
1224 if (error) {
1225 xfs_debug(mp, "Bulkstat returned error 0x%x", error);
1226 break;
1227 }
1228 } while (!done);
1229
1230 xfs_debug(mp, "Checking results against system dquots");
1231 for (i = 0; i < qmtest_hashmask; i++) {
1232 xfs_dqtest_t *d, *n;
1233 xfs_dqhash_t *h;
1234
1235 h = &qmtest_udqtab[i];
1236 list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
1237 xfs_dqtest_cmp(d);
1238 kmem_free(d);
1239 }
1240 h = &qmtest_gdqtab[i];
1241 list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
1242 xfs_dqtest_cmp(d);
1243 kmem_free(d);
1244 }
1245 }
1246
1247 if (qmtest_nfails) {
1248 xfs_debug(mp, "******** quotacheck failed ********");
1249 xfs_debug(mp, "failures = %d", qmtest_nfails);
1250 } else {
1251 xfs_debug(mp, "******** quotacheck successful! ********");
1252 }
1253 kmem_free(qmtest_udqtab);
1254 kmem_free(qmtest_gdqtab);
1255 mutex_unlock(&qcheck_lock);
1256 return (qmtest_nfails);
1257}
1258
1259#endif /* DEBUG */
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 2a3648731331..4d00ee67792d 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -59,7 +59,7 @@ xfs_trans_dqjoin(
59 xfs_trans_add_item(tp, &dqp->q_logitem.qli_item); 59 xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
60 60
61 /* 61 /*
62 * Initialize i_transp so we can later determine if this dquot is 62 * Initialize d_transp so we can later determine if this dquot is
63 * associated with this transaction. 63 * associated with this transaction.
64 */ 64 */
65 dqp->q_transp = tp; 65 dqp->q_transp = tp;
@@ -387,18 +387,18 @@ xfs_trans_apply_dquot_deltas(
387 qtrx->qt_delbcnt_delta; 387 qtrx->qt_delbcnt_delta;
388 totalrtbdelta = qtrx->qt_rtbcount_delta + 388 totalrtbdelta = qtrx->qt_rtbcount_delta +
389 qtrx->qt_delrtb_delta; 389 qtrx->qt_delrtb_delta;
390#ifdef QUOTADEBUG 390#ifdef DEBUG
391 if (totalbdelta < 0) 391 if (totalbdelta < 0)
392 ASSERT(be64_to_cpu(d->d_bcount) >= 392 ASSERT(be64_to_cpu(d->d_bcount) >=
393 (xfs_qcnt_t) -totalbdelta); 393 -totalbdelta);
394 394
395 if (totalrtbdelta < 0) 395 if (totalrtbdelta < 0)
396 ASSERT(be64_to_cpu(d->d_rtbcount) >= 396 ASSERT(be64_to_cpu(d->d_rtbcount) >=
397 (xfs_qcnt_t) -totalrtbdelta); 397 -totalrtbdelta);
398 398
399 if (qtrx->qt_icount_delta < 0) 399 if (qtrx->qt_icount_delta < 0)
400 ASSERT(be64_to_cpu(d->d_icount) >= 400 ASSERT(be64_to_cpu(d->d_icount) >=
401 (xfs_qcnt_t) -qtrx->qt_icount_delta); 401 -qtrx->qt_icount_delta);
402#endif 402#endif
403 if (totalbdelta) 403 if (totalbdelta)
404 be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta); 404 be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
@@ -642,11 +642,6 @@ xfs_trans_dqresv(
642 ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) || 642 ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
643 (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) && 643 (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
644 (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) { 644 (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
645#ifdef QUOTADEBUG
646 xfs_debug(mp,
647 "BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?",
648 nblks, *resbcountp, hardlimit);
649#endif
650 if (nblks > 0) { 645 if (nblks > 0) {
651 /* 646 /*
652 * dquot is locked already. See if we'd go over the 647 * dquot is locked already. See if we'd go over the
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 5ad8ad3a1dcd..53ec3ea9a625 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -22,7 +22,6 @@
22#define STATIC 22#define STATIC
23#define DEBUG 1 23#define DEBUG 1
24#define XFS_BUF_LOCK_TRACKING 1 24#define XFS_BUF_LOCK_TRACKING 1
25/* #define QUOTADEBUG 1 */
26#endif 25#endif
27 26
28#include <linux-2.6/xfs_linux.h> 27#include <linux-2.6/xfs_linux.h>
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 11dd72070cbb..0135e2a669d7 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -42,7 +42,7 @@ struct xfs_acl {
42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) 42#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
43 43
44#ifdef CONFIG_XFS_POSIX_ACL 44#ifdef CONFIG_XFS_POSIX_ACL
45extern int xfs_check_acl(struct inode *inode, int mask, unsigned int flags); 45extern int xfs_check_acl(struct inode *inode, int mask);
46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); 46extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); 47extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
48extern int xfs_acl_chmod(struct inode *inode); 48extern int xfs_acl_chmod(struct inode *inode);
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 95862bbff56b..1e00b3ef6274 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -570,9 +570,7 @@ xfs_alloc_ag_vextent_exact(
570 xfs_agblock_t tbno; /* start block of trimmed extent */ 570 xfs_agblock_t tbno; /* start block of trimmed extent */
571 xfs_extlen_t tlen; /* length of trimmed extent */ 571 xfs_extlen_t tlen; /* length of trimmed extent */
572 xfs_agblock_t tend; /* end block of trimmed extent */ 572 xfs_agblock_t tend; /* end block of trimmed extent */
573 xfs_agblock_t end; /* end of allocated extent */
574 int i; /* success/failure of operation */ 573 int i; /* success/failure of operation */
575 xfs_extlen_t rlen; /* length of returned extent */
576 574
577 ASSERT(args->alignment == 1); 575 ASSERT(args->alignment == 1);
578 576
@@ -625,18 +623,16 @@ xfs_alloc_ag_vextent_exact(
625 * 623 *
626 * Fix the length according to mod and prod if given. 624 * Fix the length according to mod and prod if given.
627 */ 625 */
628 end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen); 626 args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
629 args->len = end - args->agbno; 627 - args->agbno;
630 xfs_alloc_fix_len(args); 628 xfs_alloc_fix_len(args);
631 if (!xfs_alloc_fix_minleft(args)) 629 if (!xfs_alloc_fix_minleft(args))
632 goto not_found; 630 goto not_found;
633 631
634 rlen = args->len; 632 ASSERT(args->agbno + args->len <= tend);
635 ASSERT(args->agbno + rlen <= tend);
636 end = args->agbno + rlen;
637 633
638 /* 634 /*
639 * We are allocating agbno for rlen [agbno .. end] 635 * We are allocating agbno for args->len
640 * Allocate/initialize a cursor for the by-size btree. 636 * Allocate/initialize a cursor for the by-size btree.
641 */ 637 */
642 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 638 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
@@ -2127,7 +2123,7 @@ xfs_read_agf(
2127 * Validate the magic number of the agf block. 2123 * Validate the magic number of the agf block.
2128 */ 2124 */
2129 agf_ok = 2125 agf_ok =
2130 be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC && 2126 agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
2131 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && 2127 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2132 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && 2128 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2133 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && 2129 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 2b3518826a69..ffb3386e45c1 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -31,7 +31,6 @@
31#include "xfs_dinode.h" 31#include "xfs_dinode.h"
32#include "xfs_inode.h" 32#include "xfs_inode.h"
33#include "xfs_btree.h" 33#include "xfs_btree.h"
34#include "xfs_btree_trace.h"
35#include "xfs_alloc.h" 34#include "xfs_alloc.h"
36#include "xfs_error.h" 35#include "xfs_error.h"
37#include "xfs_trace.h" 36#include "xfs_trace.h"
@@ -311,72 +310,6 @@ xfs_allocbt_recs_inorder(
311} 310}
312#endif /* DEBUG */ 311#endif /* DEBUG */
313 312
314#ifdef XFS_BTREE_TRACE
315ktrace_t *xfs_allocbt_trace_buf;
316
317STATIC void
318xfs_allocbt_trace_enter(
319 struct xfs_btree_cur *cur,
320 const char *func,
321 char *s,
322 int type,
323 int line,
324 __psunsigned_t a0,
325 __psunsigned_t a1,
326 __psunsigned_t a2,
327 __psunsigned_t a3,
328 __psunsigned_t a4,
329 __psunsigned_t a5,
330 __psunsigned_t a6,
331 __psunsigned_t a7,
332 __psunsigned_t a8,
333 __psunsigned_t a9,
334 __psunsigned_t a10)
335{
336 ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type,
337 (void *)func, (void *)s, NULL, (void *)cur,
338 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
339 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
340 (void *)a8, (void *)a9, (void *)a10);
341}
342
343STATIC void
344xfs_allocbt_trace_cursor(
345 struct xfs_btree_cur *cur,
346 __uint32_t *s0,
347 __uint64_t *l0,
348 __uint64_t *l1)
349{
350 *s0 = cur->bc_private.a.agno;
351 *l0 = cur->bc_rec.a.ar_startblock;
352 *l1 = cur->bc_rec.a.ar_blockcount;
353}
354
355STATIC void
356xfs_allocbt_trace_key(
357 struct xfs_btree_cur *cur,
358 union xfs_btree_key *key,
359 __uint64_t *l0,
360 __uint64_t *l1)
361{
362 *l0 = be32_to_cpu(key->alloc.ar_startblock);
363 *l1 = be32_to_cpu(key->alloc.ar_blockcount);
364}
365
366STATIC void
367xfs_allocbt_trace_record(
368 struct xfs_btree_cur *cur,
369 union xfs_btree_rec *rec,
370 __uint64_t *l0,
371 __uint64_t *l1,
372 __uint64_t *l2)
373{
374 *l0 = be32_to_cpu(rec->alloc.ar_startblock);
375 *l1 = be32_to_cpu(rec->alloc.ar_blockcount);
376 *l2 = 0;
377}
378#endif /* XFS_BTREE_TRACE */
379
380static const struct xfs_btree_ops xfs_allocbt_ops = { 313static const struct xfs_btree_ops xfs_allocbt_ops = {
381 .rec_len = sizeof(xfs_alloc_rec_t), 314 .rec_len = sizeof(xfs_alloc_rec_t),
382 .key_len = sizeof(xfs_alloc_key_t), 315 .key_len = sizeof(xfs_alloc_key_t),
@@ -393,18 +326,10 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
393 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur, 326 .init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
394 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur, 327 .init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
395 .key_diff = xfs_allocbt_key_diff, 328 .key_diff = xfs_allocbt_key_diff,
396
397#ifdef DEBUG 329#ifdef DEBUG
398 .keys_inorder = xfs_allocbt_keys_inorder, 330 .keys_inorder = xfs_allocbt_keys_inorder,
399 .recs_inorder = xfs_allocbt_recs_inorder, 331 .recs_inorder = xfs_allocbt_recs_inorder,
400#endif 332#endif
401
402#ifdef XFS_BTREE_TRACE
403 .trace_enter = xfs_allocbt_trace_enter,
404 .trace_cursor = xfs_allocbt_trace_cursor,
405 .trace_key = xfs_allocbt_trace_key,
406 .trace_record = xfs_allocbt_trace_record,
407#endif
408}; 333};
409 334
410/* 335/*
@@ -427,13 +352,16 @@ xfs_allocbt_init_cursor(
427 352
428 cur->bc_tp = tp; 353 cur->bc_tp = tp;
429 cur->bc_mp = mp; 354 cur->bc_mp = mp;
430 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]);
431 cur->bc_btnum = btnum; 355 cur->bc_btnum = btnum;
432 cur->bc_blocklog = mp->m_sb.sb_blocklog; 356 cur->bc_blocklog = mp->m_sb.sb_blocklog;
433
434 cur->bc_ops = &xfs_allocbt_ops; 357 cur->bc_ops = &xfs_allocbt_ops;
435 if (btnum == XFS_BTNUM_CNT) 358
359 if (btnum == XFS_BTNUM_CNT) {
360 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
436 cur->bc_flags = XFS_BTREE_LASTREC_UPDATE; 361 cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
362 } else {
363 cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
364 }
437 365
438 cur->bc_private.a.agbp = agbp; 366 cur->bc_private.a.agbp = agbp;
439 cur->bc_private.a.agno = agno; 367 cur->bc_private.a.agno = agno;
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
deleted file mode 100644
index 0902249354a0..000000000000
--- a/fs/xfs/xfs_arch.h
+++ /dev/null
@@ -1,136 +0,0 @@
1/*
2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_ARCH_H__
19#define __XFS_ARCH_H__
20
21#ifndef XFS_BIG_INUMS
22# error XFS_BIG_INUMS must be defined true or false
23#endif
24
25#ifdef __KERNEL__
26
27#include <asm/byteorder.h>
28
29#ifdef __BIG_ENDIAN
30#define XFS_NATIVE_HOST 1
31#else
32#undef XFS_NATIVE_HOST
33#endif
34
35#else /* __KERNEL__ */
36
37#if __BYTE_ORDER == __BIG_ENDIAN
38#define XFS_NATIVE_HOST 1
39#else
40#undef XFS_NATIVE_HOST
41#endif
42
43#ifdef XFS_NATIVE_HOST
44#define cpu_to_be16(val) ((__force __be16)(__u16)(val))
45#define cpu_to_be32(val) ((__force __be32)(__u32)(val))
46#define cpu_to_be64(val) ((__force __be64)(__u64)(val))
47#define be16_to_cpu(val) ((__force __u16)(__be16)(val))
48#define be32_to_cpu(val) ((__force __u32)(__be32)(val))
49#define be64_to_cpu(val) ((__force __u64)(__be64)(val))
50#else
51#define cpu_to_be16(val) ((__force __be16)__swab16((__u16)(val)))
52#define cpu_to_be32(val) ((__force __be32)__swab32((__u32)(val)))
53#define cpu_to_be64(val) ((__force __be64)__swab64((__u64)(val)))
54#define be16_to_cpu(val) (__swab16((__force __u16)(__be16)(val)))
55#define be32_to_cpu(val) (__swab32((__force __u32)(__be32)(val)))
56#define be64_to_cpu(val) (__swab64((__force __u64)(__be64)(val)))
57#endif
58
59static inline void be16_add_cpu(__be16 *a, __s16 b)
60{
61 *a = cpu_to_be16(be16_to_cpu(*a) + b);
62}
63
64static inline void be32_add_cpu(__be32 *a, __s32 b)
65{
66 *a = cpu_to_be32(be32_to_cpu(*a) + b);
67}
68
69static inline void be64_add_cpu(__be64 *a, __s64 b)
70{
71 *a = cpu_to_be64(be64_to_cpu(*a) + b);
72}
73
74#endif /* __KERNEL__ */
75
76/*
77 * get and set integers from potentially unaligned locations
78 */
79
80#define INT_GET_UNALIGNED_16_BE(pointer) \
81 ((__u16)((((__u8*)(pointer))[0] << 8) | (((__u8*)(pointer))[1])))
82#define INT_SET_UNALIGNED_16_BE(pointer,value) \
83 { \
84 ((__u8*)(pointer))[0] = (((value) >> 8) & 0xff); \
85 ((__u8*)(pointer))[1] = (((value) ) & 0xff); \
86 }
87
88/*
89 * In directories inode numbers are stored as unaligned arrays of unsigned
90 * 8bit integers on disk.
91 *
92 * For v1 directories or v2 directories that contain inode numbers that
93 * do not fit into 32bit the array has eight members, but the first member
94 * is always zero:
95 *
96 * |unused|48-55|40-47|32-39|24-31|16-23| 8-15| 0- 7|
97 *
98 * For v2 directories that only contain entries with inode numbers that fit
99 * into 32bits a four-member array is used:
100 *
101 * |24-31|16-23| 8-15| 0- 7|
102 */
103
104#define XFS_GET_DIR_INO4(di) \
105 (((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
106
107#define XFS_PUT_DIR_INO4(from, di) \
108do { \
109 (di).i[0] = (((from) & 0xff000000ULL) >> 24); \
110 (di).i[1] = (((from) & 0x00ff0000ULL) >> 16); \
111 (di).i[2] = (((from) & 0x0000ff00ULL) >> 8); \
112 (di).i[3] = ((from) & 0x000000ffULL); \
113} while (0)
114
115#define XFS_DI_HI(di) \
116 (((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
117#define XFS_DI_LO(di) \
118 (((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
119
120#define XFS_GET_DIR_INO8(di) \
121 (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \
122 ((xfs_ino_t)XFS_DI_HI(di) << 32))
123
124#define XFS_PUT_DIR_INO8(from, di) \
125do { \
126 (di).i[0] = 0; \
127 (di).i[1] = (((from) & 0x00ff000000000000ULL) >> 48); \
128 (di).i[2] = (((from) & 0x0000ff0000000000ULL) >> 40); \
129 (di).i[3] = (((from) & 0x000000ff00000000ULL) >> 32); \
130 (di).i[4] = (((from) & 0x00000000ff000000ULL) >> 24); \
131 (di).i[5] = (((from) & 0x0000000000ff0000ULL) >> 16); \
132 (di).i[6] = (((from) & 0x000000000000ff00ULL) >> 8); \
133 (di).i[7] = ((from) & 0x00000000000000ffULL); \
134} while (0)
135
136#endif /* __XFS_ARCH_H__ */
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 01d2072fb6d4..cbae424fe1ba 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -822,17 +822,21 @@ xfs_attr_inactive(xfs_inode_t *dp)
822 error = xfs_attr_root_inactive(&trans, dp); 822 error = xfs_attr_root_inactive(&trans, dp);
823 if (error) 823 if (error)
824 goto out; 824 goto out;
825
825 /* 826 /*
826 * signal synchronous inactive transactions unless this 827 * Signal synchronous inactive transactions unless this is a
827 * is a synchronous mount filesystem in which case we 828 * synchronous mount filesystem in which case we know that we're here
828 * know that we're here because we've been called out of 829 * because we've been called out of xfs_inactive which means that the
829 * xfs_inactive which means that the last reference is gone 830 * last reference is gone and the unlink transaction has already hit
830 * and the unlink transaction has already hit the disk so 831 * the disk so async inactive transactions are safe.
831 * async inactive transactions are safe.
832 */ 832 */
833 if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK, 833 if (!(mp->m_flags & XFS_MOUNT_WSYNC)) {
834 (!(mp->m_flags & XFS_MOUNT_WSYNC) 834 if (dp->i_d.di_anextents > 0)
835 ? 1 : 0)))) 835 xfs_trans_set_sync(trans);
836 }
837
838 error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
839 if (error)
836 goto out; 840 goto out;
837 841
838 /* 842 /*
@@ -1199,7 +1203,7 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
1199 return XFS_ERROR(error); 1203 return XFS_ERROR(error);
1200 ASSERT(bp != NULL); 1204 ASSERT(bp != NULL);
1201 leaf = bp->data; 1205 leaf = bp->data;
1202 if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) { 1206 if (unlikely(leaf->hdr.info.magic != cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
1203 XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW, 1207 XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
1204 context->dp->i_mount, leaf); 1208 context->dp->i_mount, leaf);
1205 xfs_da_brelse(NULL, bp); 1209 xfs_da_brelse(NULL, bp);
@@ -1606,9 +1610,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
1606 XFS_ATTR_FORK); 1610 XFS_ATTR_FORK);
1607 if (error) 1611 if (error)
1608 goto out; 1612 goto out;
1609 ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *) 1613 ASSERT((((xfs_attr_leafblock_t *)bp->data)->hdr.info.magic) ==
1610 bp->data)->hdr.info.magic) 1614 cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1611 == XFS_ATTR_LEAF_MAGIC);
1612 1615
1613 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) { 1616 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1614 xfs_bmap_init(args->flist, args->firstblock); 1617 xfs_bmap_init(args->flist, args->firstblock);
@@ -1873,11 +1876,11 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1873 return(XFS_ERROR(EFSCORRUPTED)); 1876 return(XFS_ERROR(EFSCORRUPTED));
1874 } 1877 }
1875 node = bp->data; 1878 node = bp->data;
1876 if (be16_to_cpu(node->hdr.info.magic) 1879 if (node->hdr.info.magic ==
1877 == XFS_ATTR_LEAF_MAGIC) 1880 cpu_to_be16(XFS_ATTR_LEAF_MAGIC))
1878 break; 1881 break;
1879 if (unlikely(be16_to_cpu(node->hdr.info.magic) 1882 if (unlikely(node->hdr.info.magic !=
1880 != XFS_DA_NODE_MAGIC)) { 1883 cpu_to_be16(XFS_DA_NODE_MAGIC))) {
1881 XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)", 1884 XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
1882 XFS_ERRLEVEL_LOW, 1885 XFS_ERRLEVEL_LOW,
1883 context->dp->i_mount, 1886 context->dp->i_mount,
@@ -1912,8 +1915,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
1912 */ 1915 */
1913 for (;;) { 1916 for (;;) {
1914 leaf = bp->data; 1917 leaf = bp->data;
1915 if (unlikely(be16_to_cpu(leaf->hdr.info.magic) 1918 if (unlikely(leaf->hdr.info.magic !=
1916 != XFS_ATTR_LEAF_MAGIC)) { 1919 cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
1917 XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)", 1920 XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
1918 XFS_ERRLEVEL_LOW, 1921 XFS_ERRLEVEL_LOW,
1919 context->dp->i_mount, leaf); 1922 context->dp->i_mount, leaf);
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 71e90dc2aeb1..8fad9602542b 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -731,7 +731,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
731 int bytes, i; 731 int bytes, i;
732 732
733 leaf = bp->data; 733 leaf = bp->data;
734 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 734 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
735 735
736 entry = &leaf->entries[0]; 736 entry = &leaf->entries[0];
737 bytes = sizeof(struct xfs_attr_sf_hdr); 737 bytes = sizeof(struct xfs_attr_sf_hdr);
@@ -777,7 +777,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
777 ASSERT(bp != NULL); 777 ASSERT(bp != NULL);
778 memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount)); 778 memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
779 leaf = (xfs_attr_leafblock_t *)tmpbuffer; 779 leaf = (xfs_attr_leafblock_t *)tmpbuffer;
780 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 780 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
781 memset(bp->data, 0, XFS_LBSIZE(dp->i_mount)); 781 memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
782 782
783 /* 783 /*
@@ -872,7 +872,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
872 goto out; 872 goto out;
873 node = bp1->data; 873 node = bp1->data;
874 leaf = bp2->data; 874 leaf = bp2->data;
875 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 875 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
876 /* both on-disk, don't endian-flip twice */ 876 /* both on-disk, don't endian-flip twice */
877 node->btree[0].hashval = 877 node->btree[0].hashval =
878 leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval; 878 leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
@@ -997,7 +997,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
997 int tablesize, entsize, sum, tmp, i; 997 int tablesize, entsize, sum, tmp, i;
998 998
999 leaf = bp->data; 999 leaf = bp->data;
1000 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 1000 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1001 ASSERT((args->index >= 0) 1001 ASSERT((args->index >= 0)
1002 && (args->index <= be16_to_cpu(leaf->hdr.count))); 1002 && (args->index <= be16_to_cpu(leaf->hdr.count)));
1003 hdr = &leaf->hdr; 1003 hdr = &leaf->hdr;
@@ -1070,7 +1070,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
1070 int tmp, i; 1070 int tmp, i;
1071 1071
1072 leaf = bp->data; 1072 leaf = bp->data;
1073 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 1073 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1074 hdr = &leaf->hdr; 1074 hdr = &leaf->hdr;
1075 ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE)); 1075 ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
1076 ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count))); 1076 ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count)));
@@ -1256,8 +1256,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1256 ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC); 1256 ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
1257 leaf1 = blk1->bp->data; 1257 leaf1 = blk1->bp->data;
1258 leaf2 = blk2->bp->data; 1258 leaf2 = blk2->bp->data;
1259 ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 1259 ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1260 ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 1260 ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1261 args = state->args; 1261 args = state->args;
1262 1262
1263 /* 1263 /*
@@ -1533,7 +1533,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
1533 */ 1533 */
1534 blk = &state->path.blk[ state->path.active-1 ]; 1534 blk = &state->path.blk[ state->path.active-1 ];
1535 info = blk->bp->data; 1535 info = blk->bp->data;
1536 ASSERT(be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC); 1536 ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1537 leaf = (xfs_attr_leafblock_t *)info; 1537 leaf = (xfs_attr_leafblock_t *)info;
1538 count = be16_to_cpu(leaf->hdr.count); 1538 count = be16_to_cpu(leaf->hdr.count);
1539 bytes = sizeof(xfs_attr_leaf_hdr_t) + 1539 bytes = sizeof(xfs_attr_leaf_hdr_t) +
@@ -1596,7 +1596,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
1596 bytes = state->blocksize - (state->blocksize>>2); 1596 bytes = state->blocksize - (state->blocksize>>2);
1597 bytes -= be16_to_cpu(leaf->hdr.usedbytes); 1597 bytes -= be16_to_cpu(leaf->hdr.usedbytes);
1598 leaf = bp->data; 1598 leaf = bp->data;
1599 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 1599 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1600 count += be16_to_cpu(leaf->hdr.count); 1600 count += be16_to_cpu(leaf->hdr.count);
1601 bytes -= be16_to_cpu(leaf->hdr.usedbytes); 1601 bytes -= be16_to_cpu(leaf->hdr.usedbytes);
1602 bytes -= count * sizeof(xfs_attr_leaf_entry_t); 1602 bytes -= count * sizeof(xfs_attr_leaf_entry_t);
@@ -1650,7 +1650,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
1650 xfs_mount_t *mp; 1650 xfs_mount_t *mp;
1651 1651
1652 leaf = bp->data; 1652 leaf = bp->data;
1653 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 1653 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1654 hdr = &leaf->hdr; 1654 hdr = &leaf->hdr;
1655 mp = args->trans->t_mountp; 1655 mp = args->trans->t_mountp;
1656 ASSERT((be16_to_cpu(hdr->count) > 0) 1656 ASSERT((be16_to_cpu(hdr->count) > 0)
@@ -1813,8 +1813,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1813 ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC); 1813 ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1814 drop_leaf = drop_blk->bp->data; 1814 drop_leaf = drop_blk->bp->data;
1815 save_leaf = save_blk->bp->data; 1815 save_leaf = save_blk->bp->data;
1816 ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 1816 ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1817 ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 1817 ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1818 drop_hdr = &drop_leaf->hdr; 1818 drop_hdr = &drop_leaf->hdr;
1819 save_hdr = &save_leaf->hdr; 1819 save_hdr = &save_leaf->hdr;
1820 1820
@@ -1915,7 +1915,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
1915 xfs_dahash_t hashval; 1915 xfs_dahash_t hashval;
1916 1916
1917 leaf = bp->data; 1917 leaf = bp->data;
1918 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 1918 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1919 ASSERT(be16_to_cpu(leaf->hdr.count) 1919 ASSERT(be16_to_cpu(leaf->hdr.count)
1920 < (XFS_LBSIZE(args->dp->i_mount)/8)); 1920 < (XFS_LBSIZE(args->dp->i_mount)/8));
1921 1921
@@ -2019,7 +2019,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
2019 xfs_attr_leaf_name_remote_t *name_rmt; 2019 xfs_attr_leaf_name_remote_t *name_rmt;
2020 2020
2021 leaf = bp->data; 2021 leaf = bp->data;
2022 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 2022 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
2023 ASSERT(be16_to_cpu(leaf->hdr.count) 2023 ASSERT(be16_to_cpu(leaf->hdr.count)
2024 < (XFS_LBSIZE(args->dp->i_mount)/8)); 2024 < (XFS_LBSIZE(args->dp->i_mount)/8));
2025 ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); 2025 ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
@@ -2087,8 +2087,8 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
2087 /* 2087 /*
2088 * Set up environment. 2088 * Set up environment.
2089 */ 2089 */
2090 ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 2090 ASSERT(leaf_s->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
2091 ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 2091 ASSERT(leaf_d->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
2092 hdr_s = &leaf_s->hdr; 2092 hdr_s = &leaf_s->hdr;
2093 hdr_d = &leaf_d->hdr; 2093 hdr_d = &leaf_d->hdr;
2094 ASSERT((be16_to_cpu(hdr_s->count) > 0) && 2094 ASSERT((be16_to_cpu(hdr_s->count) > 0) &&
@@ -2222,8 +2222,8 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
2222 2222
2223 leaf1 = leaf1_bp->data; 2223 leaf1 = leaf1_bp->data;
2224 leaf2 = leaf2_bp->data; 2224 leaf2 = leaf2_bp->data;
2225 ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC) && 2225 ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) &&
2226 (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC)); 2226 (leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)));
2227 if ((be16_to_cpu(leaf1->hdr.count) > 0) && 2227 if ((be16_to_cpu(leaf1->hdr.count) > 0) &&
2228 (be16_to_cpu(leaf2->hdr.count) > 0) && 2228 (be16_to_cpu(leaf2->hdr.count) > 0) &&
2229 ((be32_to_cpu(leaf2->entries[0].hashval) < 2229 ((be32_to_cpu(leaf2->entries[0].hashval) <
@@ -2246,7 +2246,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
2246 xfs_attr_leafblock_t *leaf; 2246 xfs_attr_leafblock_t *leaf;
2247 2247
2248 leaf = bp->data; 2248 leaf = bp->data;
2249 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 2249 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
2250 if (count) 2250 if (count)
2251 *count = be16_to_cpu(leaf->hdr.count); 2251 *count = be16_to_cpu(leaf->hdr.count);
2252 if (!leaf->hdr.count) 2252 if (!leaf->hdr.count)
@@ -2265,7 +2265,7 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
2265 xfs_attr_leaf_name_remote_t *name_rmt; 2265 xfs_attr_leaf_name_remote_t *name_rmt;
2266 int size; 2266 int size;
2267 2267
2268 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 2268 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
2269 if (leaf->entries[index].flags & XFS_ATTR_LOCAL) { 2269 if (leaf->entries[index].flags & XFS_ATTR_LOCAL) {
2270 name_loc = xfs_attr_leaf_name_local(leaf, index); 2270 name_loc = xfs_attr_leaf_name_local(leaf, index);
2271 size = xfs_attr_leaf_entsize_local(name_loc->namelen, 2271 size = xfs_attr_leaf_entsize_local(name_loc->namelen,
@@ -2451,7 +2451,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
2451 ASSERT(bp != NULL); 2451 ASSERT(bp != NULL);
2452 2452
2453 leaf = bp->data; 2453 leaf = bp->data;
2454 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 2454 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
2455 ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); 2455 ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
2456 ASSERT(args->index >= 0); 2456 ASSERT(args->index >= 0);
2457 entry = &leaf->entries[ args->index ]; 2457 entry = &leaf->entries[ args->index ];
@@ -2515,7 +2515,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
2515 ASSERT(bp != NULL); 2515 ASSERT(bp != NULL);
2516 2516
2517 leaf = bp->data; 2517 leaf = bp->data;
2518 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 2518 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
2519 ASSERT(args->index < be16_to_cpu(leaf->hdr.count)); 2519 ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
2520 ASSERT(args->index >= 0); 2520 ASSERT(args->index >= 0);
2521 entry = &leaf->entries[ args->index ]; 2521 entry = &leaf->entries[ args->index ];
@@ -2585,13 +2585,13 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
2585 } 2585 }
2586 2586
2587 leaf1 = bp1->data; 2587 leaf1 = bp1->data;
2588 ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 2588 ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
2589 ASSERT(args->index < be16_to_cpu(leaf1->hdr.count)); 2589 ASSERT(args->index < be16_to_cpu(leaf1->hdr.count));
2590 ASSERT(args->index >= 0); 2590 ASSERT(args->index >= 0);
2591 entry1 = &leaf1->entries[ args->index ]; 2591 entry1 = &leaf1->entries[ args->index ];
2592 2592
2593 leaf2 = bp2->data; 2593 leaf2 = bp2->data;
2594 ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 2594 ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
2595 ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count)); 2595 ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count));
2596 ASSERT(args->index2 >= 0); 2596 ASSERT(args->index2 >= 0);
2597 entry2 = &leaf2->entries[ args->index2 ]; 2597 entry2 = &leaf2->entries[ args->index2 ];
@@ -2689,9 +2689,9 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
2689 * This is a depth-first traversal! 2689 * This is a depth-first traversal!
2690 */ 2690 */
2691 info = bp->data; 2691 info = bp->data;
2692 if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) { 2692 if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
2693 error = xfs_attr_node_inactive(trans, dp, bp, 1); 2693 error = xfs_attr_node_inactive(trans, dp, bp, 1);
2694 } else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) { 2694 } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) {
2695 error = xfs_attr_leaf_inactive(trans, dp, bp); 2695 error = xfs_attr_leaf_inactive(trans, dp, bp);
2696 } else { 2696 } else {
2697 error = XFS_ERROR(EIO); 2697 error = XFS_ERROR(EIO);
@@ -2739,7 +2739,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
2739 } 2739 }
2740 2740
2741 node = bp->data; 2741 node = bp->data;
2742 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 2742 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
2743 parent_blkno = xfs_da_blkno(bp); /* save for re-read later */ 2743 parent_blkno = xfs_da_blkno(bp); /* save for re-read later */
2744 count = be16_to_cpu(node->hdr.count); 2744 count = be16_to_cpu(node->hdr.count);
2745 if (!count) { 2745 if (!count) {
@@ -2773,10 +2773,10 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
2773 * Invalidate the subtree, however we have to. 2773 * Invalidate the subtree, however we have to.
2774 */ 2774 */
2775 info = child_bp->data; 2775 info = child_bp->data;
2776 if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) { 2776 if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
2777 error = xfs_attr_node_inactive(trans, dp, 2777 error = xfs_attr_node_inactive(trans, dp,
2778 child_bp, level+1); 2778 child_bp, level+1);
2779 } else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) { 2779 } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) {
2780 error = xfs_attr_leaf_inactive(trans, dp, 2780 error = xfs_attr_leaf_inactive(trans, dp,
2781 child_bp); 2781 child_bp);
2782 } else { 2782 } else {
@@ -2836,7 +2836,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
2836 int error, count, size, tmp, i; 2836 int error, count, size, tmp, i;
2837 2837
2838 leaf = bp->data; 2838 leaf = bp->data;
2839 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC); 2839 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
2840 2840
2841 /* 2841 /*
2842 * Count the number of "remote" value extents. 2842 * Count the number of "remote" value extents.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index e546a33214c9..c51a3f903633 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -29,15 +29,11 @@
29#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
30#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
31#include "xfs_ialloc_btree.h" 31#include "xfs_ialloc_btree.h"
32#include "xfs_dir2_sf.h"
33#include "xfs_dinode.h" 32#include "xfs_dinode.h"
34#include "xfs_inode.h" 33#include "xfs_inode.h"
35#include "xfs_btree.h" 34#include "xfs_btree.h"
36#include "xfs_mount.h" 35#include "xfs_mount.h"
37#include "xfs_itable.h" 36#include "xfs_itable.h"
38#include "xfs_dir2_data.h"
39#include "xfs_dir2_leaf.h"
40#include "xfs_dir2_block.h"
41#include "xfs_inode_item.h" 37#include "xfs_inode_item.h"
42#include "xfs_extfree_item.h" 38#include "xfs_extfree_item.h"
43#include "xfs_alloc.h" 39#include "xfs_alloc.h"
@@ -94,6 +90,7 @@ xfs_bmap_add_attrfork_local(
94 */ 90 */
95STATIC int /* error */ 91STATIC int /* error */
96xfs_bmap_add_extent_delay_real( 92xfs_bmap_add_extent_delay_real(
93 struct xfs_trans *tp, /* transaction pointer */
97 xfs_inode_t *ip, /* incore inode pointer */ 94 xfs_inode_t *ip, /* incore inode pointer */
98 xfs_extnum_t *idx, /* extent number to update/insert */ 95 xfs_extnum_t *idx, /* extent number to update/insert */
99 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 96 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
@@ -439,6 +436,7 @@ xfs_bmap_add_attrfork_local(
439 */ 436 */
440STATIC int /* error */ 437STATIC int /* error */
441xfs_bmap_add_extent( 438xfs_bmap_add_extent(
439 struct xfs_trans *tp, /* transaction pointer */
442 xfs_inode_t *ip, /* incore inode pointer */ 440 xfs_inode_t *ip, /* incore inode pointer */
443 xfs_extnum_t *idx, /* extent number to update/insert */ 441 xfs_extnum_t *idx, /* extent number to update/insert */
444 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 442 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
@@ -524,7 +522,7 @@ xfs_bmap_add_extent(
524 if (cur) 522 if (cur)
525 ASSERT(cur->bc_private.b.flags & 523 ASSERT(cur->bc_private.b.flags &
526 XFS_BTCUR_BPRV_WASDEL); 524 XFS_BTCUR_BPRV_WASDEL);
527 error = xfs_bmap_add_extent_delay_real(ip, 525 error = xfs_bmap_add_extent_delay_real(tp, ip,
528 idx, &cur, new, &da_new, 526 idx, &cur, new, &da_new,
529 first, flist, &logflags); 527 first, flist, &logflags);
530 } else { 528 } else {
@@ -561,7 +559,7 @@ xfs_bmap_add_extent(
561 int tmp_logflags; /* partial log flag return val */ 559 int tmp_logflags; /* partial log flag return val */
562 560
563 ASSERT(cur == NULL); 561 ASSERT(cur == NULL);
564 error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first, 562 error = xfs_bmap_extents_to_btree(tp, ip, first,
565 flist, &cur, da_old > 0, &tmp_logflags, whichfork); 563 flist, &cur, da_old > 0, &tmp_logflags, whichfork);
566 logflags |= tmp_logflags; 564 logflags |= tmp_logflags;
567 if (error) 565 if (error)
@@ -604,6 +602,7 @@ done:
604 */ 602 */
605STATIC int /* error */ 603STATIC int /* error */
606xfs_bmap_add_extent_delay_real( 604xfs_bmap_add_extent_delay_real(
605 struct xfs_trans *tp, /* transaction pointer */
607 xfs_inode_t *ip, /* incore inode pointer */ 606 xfs_inode_t *ip, /* incore inode pointer */
608 xfs_extnum_t *idx, /* extent number to update/insert */ 607 xfs_extnum_t *idx, /* extent number to update/insert */
609 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 608 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
@@ -901,7 +900,7 @@ xfs_bmap_add_extent_delay_real(
901 } 900 }
902 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 901 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
903 ip->i_d.di_nextents > ip->i_df.if_ext_max) { 902 ip->i_d.di_nextents > ip->i_df.if_ext_max) {
904 error = xfs_bmap_extents_to_btree(ip->i_transp, ip, 903 error = xfs_bmap_extents_to_btree(tp, ip,
905 first, flist, &cur, 1, &tmp_rval, 904 first, flist, &cur, 1, &tmp_rval,
906 XFS_DATA_FORK); 905 XFS_DATA_FORK);
907 rval |= tmp_rval; 906 rval |= tmp_rval;
@@ -984,7 +983,7 @@ xfs_bmap_add_extent_delay_real(
984 } 983 }
985 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 984 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
986 ip->i_d.di_nextents > ip->i_df.if_ext_max) { 985 ip->i_d.di_nextents > ip->i_df.if_ext_max) {
987 error = xfs_bmap_extents_to_btree(ip->i_transp, ip, 986 error = xfs_bmap_extents_to_btree(tp, ip,
988 first, flist, &cur, 1, &tmp_rval, 987 first, flist, &cur, 1, &tmp_rval,
989 XFS_DATA_FORK); 988 XFS_DATA_FORK);
990 rval |= tmp_rval; 989 rval |= tmp_rval;
@@ -1052,7 +1051,7 @@ xfs_bmap_add_extent_delay_real(
1052 } 1051 }
1053 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && 1052 if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1054 ip->i_d.di_nextents > ip->i_df.if_ext_max) { 1053 ip->i_d.di_nextents > ip->i_df.if_ext_max) {
1055 error = xfs_bmap_extents_to_btree(ip->i_transp, ip, 1054 error = xfs_bmap_extents_to_btree(tp, ip,
1056 first, flist, &cur, 1, &tmp_rval, 1055 first, flist, &cur, 1, &tmp_rval,
1057 XFS_DATA_FORK); 1056 XFS_DATA_FORK);
1058 rval |= tmp_rval; 1057 rval |= tmp_rval;
@@ -2871,8 +2870,8 @@ xfs_bmap_del_extent(
2871 len = del->br_blockcount; 2870 len = del->br_blockcount;
2872 do_div(bno, mp->m_sb.sb_rextsize); 2871 do_div(bno, mp->m_sb.sb_rextsize);
2873 do_div(len, mp->m_sb.sb_rextsize); 2872 do_div(len, mp->m_sb.sb_rextsize);
2874 if ((error = xfs_rtfree_extent(ip->i_transp, bno, 2873 error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
2875 (xfs_extlen_t)len))) 2874 if (error)
2876 goto done; 2875 goto done;
2877 do_fx = 0; 2876 do_fx = 0;
2878 nblks = len * mp->m_sb.sb_rextsize; 2877 nblks = len * mp->m_sb.sb_rextsize;
@@ -4080,7 +4079,7 @@ xfs_bmap_sanity_check(
4080{ 4079{
4081 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); 4080 struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
4082 4081
4083 if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC || 4082 if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) ||
4084 be16_to_cpu(block->bb_level) != level || 4083 be16_to_cpu(block->bb_level) != level ||
4085 be16_to_cpu(block->bb_numrecs) == 0 || 4084 be16_to_cpu(block->bb_numrecs) == 0 ||
4086 be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0]) 4085 be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
@@ -4662,7 +4661,7 @@ xfs_bmapi(
4662 if (!wasdelay && (flags & XFS_BMAPI_PREALLOC)) 4661 if (!wasdelay && (flags & XFS_BMAPI_PREALLOC))
4663 got.br_state = XFS_EXT_UNWRITTEN; 4662 got.br_state = XFS_EXT_UNWRITTEN;
4664 } 4663 }
4665 error = xfs_bmap_add_extent(ip, &lastx, &cur, &got, 4664 error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &got,
4666 firstblock, flist, &tmp_logflags, 4665 firstblock, flist, &tmp_logflags,
4667 whichfork); 4666 whichfork);
4668 logflags |= tmp_logflags; 4667 logflags |= tmp_logflags;
@@ -4763,7 +4762,7 @@ xfs_bmapi(
4763 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) 4762 mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
4764 ? XFS_EXT_NORM 4763 ? XFS_EXT_NORM
4765 : XFS_EXT_UNWRITTEN; 4764 : XFS_EXT_UNWRITTEN;
4766 error = xfs_bmap_add_extent(ip, &lastx, &cur, mval, 4765 error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, mval,
4767 firstblock, flist, &tmp_logflags, 4766 firstblock, flist, &tmp_logflags,
4768 whichfork); 4767 whichfork);
4769 logflags |= tmp_logflags; 4768 logflags |= tmp_logflags;
@@ -5117,7 +5116,7 @@ xfs_bunmapi(
5117 del.br_blockcount = mod; 5116 del.br_blockcount = mod;
5118 } 5117 }
5119 del.br_state = XFS_EXT_UNWRITTEN; 5118 del.br_state = XFS_EXT_UNWRITTEN;
5120 error = xfs_bmap_add_extent(ip, &lastx, &cur, &del, 5119 error = xfs_bmap_add_extent(tp, ip, &lastx, &cur, &del,
5121 firstblock, flist, &logflags, 5120 firstblock, flist, &logflags,
5122 XFS_DATA_FORK); 5121 XFS_DATA_FORK);
5123 if (error) 5122 if (error)
@@ -5175,18 +5174,18 @@ xfs_bunmapi(
5175 } 5174 }
5176 prev.br_state = XFS_EXT_UNWRITTEN; 5175 prev.br_state = XFS_EXT_UNWRITTEN;
5177 lastx--; 5176 lastx--;
5178 error = xfs_bmap_add_extent(ip, &lastx, &cur, 5177 error = xfs_bmap_add_extent(tp, ip, &lastx,
5179 &prev, firstblock, flist, &logflags, 5178 &cur, &prev, firstblock, flist,
5180 XFS_DATA_FORK); 5179 &logflags, XFS_DATA_FORK);
5181 if (error) 5180 if (error)
5182 goto error0; 5181 goto error0;
5183 goto nodelete; 5182 goto nodelete;
5184 } else { 5183 } else {
5185 ASSERT(del.br_state == XFS_EXT_NORM); 5184 ASSERT(del.br_state == XFS_EXT_NORM);
5186 del.br_state = XFS_EXT_UNWRITTEN; 5185 del.br_state = XFS_EXT_UNWRITTEN;
5187 error = xfs_bmap_add_extent(ip, &lastx, &cur, 5186 error = xfs_bmap_add_extent(tp, ip, &lastx,
5188 &del, firstblock, flist, &logflags, 5187 &cur, &del, firstblock, flist,
5189 XFS_DATA_FORK); 5188 &logflags, XFS_DATA_FORK);
5190 if (error) 5189 if (error)
5191 goto error0; 5190 goto error0;
5192 goto nodelete; 5191 goto nodelete;
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 87d3c10b6954..e2f5d59cbeaf 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -33,7 +33,6 @@
33#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
34#include "xfs_alloc.h" 34#include "xfs_alloc.h"
35#include "xfs_btree.h" 35#include "xfs_btree.h"
36#include "xfs_btree_trace.h"
37#include "xfs_itable.h" 36#include "xfs_itable.h"
38#include "xfs_bmap.h" 37#include "xfs_bmap.h"
39#include "xfs_error.h" 38#include "xfs_error.h"
@@ -425,10 +424,10 @@ xfs_bmbt_to_bmdr(
425 xfs_bmbt_key_t *tkp; 424 xfs_bmbt_key_t *tkp;
426 __be64 *tpp; 425 __be64 *tpp;
427 426
428 ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC); 427 ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
429 ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO); 428 ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
430 ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO); 429 ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
431 ASSERT(be16_to_cpu(rblock->bb_level) > 0); 430 ASSERT(rblock->bb_level != 0);
432 dblock->bb_level = rblock->bb_level; 431 dblock->bb_level = rblock->bb_level;
433 dblock->bb_numrecs = rblock->bb_numrecs; 432 dblock->bb_numrecs = rblock->bb_numrecs;
434 dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); 433 dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
@@ -732,95 +731,6 @@ xfs_bmbt_recs_inorder(
732} 731}
733#endif /* DEBUG */ 732#endif /* DEBUG */
734 733
735#ifdef XFS_BTREE_TRACE
736ktrace_t *xfs_bmbt_trace_buf;
737
738STATIC void
739xfs_bmbt_trace_enter(
740 struct xfs_btree_cur *cur,
741 const char *func,
742 char *s,
743 int type,
744 int line,
745 __psunsigned_t a0,
746 __psunsigned_t a1,
747 __psunsigned_t a2,
748 __psunsigned_t a3,
749 __psunsigned_t a4,
750 __psunsigned_t a5,
751 __psunsigned_t a6,
752 __psunsigned_t a7,
753 __psunsigned_t a8,
754 __psunsigned_t a9,
755 __psunsigned_t a10)
756{
757 struct xfs_inode *ip = cur->bc_private.b.ip;
758 int whichfork = cur->bc_private.b.whichfork;
759
760 ktrace_enter(xfs_bmbt_trace_buf,
761 (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
762 (void *)func, (void *)s, (void *)ip, (void *)cur,
763 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
764 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
765 (void *)a8, (void *)a9, (void *)a10);
766}
767
768STATIC void
769xfs_bmbt_trace_cursor(
770 struct xfs_btree_cur *cur,
771 __uint32_t *s0,
772 __uint64_t *l0,
773 __uint64_t *l1)
774{
775 struct xfs_bmbt_rec_host r;
776
777 xfs_bmbt_set_all(&r, &cur->bc_rec.b);
778
779 *s0 = (cur->bc_nlevels << 24) |
780 (cur->bc_private.b.flags << 16) |
781 cur->bc_private.b.allocated;
782 *l0 = r.l0;
783 *l1 = r.l1;
784}
785
786STATIC void
787xfs_bmbt_trace_key(
788 struct xfs_btree_cur *cur,
789 union xfs_btree_key *key,
790 __uint64_t *l0,
791 __uint64_t *l1)
792{
793 *l0 = be64_to_cpu(key->bmbt.br_startoff);
794 *l1 = 0;
795}
796
797/* Endian flipping versions of the bmbt extraction functions */
798STATIC void
799xfs_bmbt_disk_get_all(
800 xfs_bmbt_rec_t *r,
801 xfs_bmbt_irec_t *s)
802{
803 __xfs_bmbt_get_all(get_unaligned_be64(&r->l0),
804 get_unaligned_be64(&r->l1), s);
805}
806
807STATIC void
808xfs_bmbt_trace_record(
809 struct xfs_btree_cur *cur,
810 union xfs_btree_rec *rec,
811 __uint64_t *l0,
812 __uint64_t *l1,
813 __uint64_t *l2)
814{
815 struct xfs_bmbt_irec irec;
816
817 xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
818 *l0 = irec.br_startoff;
819 *l1 = irec.br_startblock;
820 *l2 = irec.br_blockcount;
821}
822#endif /* XFS_BTREE_TRACE */
823
824static const struct xfs_btree_ops xfs_bmbt_ops = { 734static const struct xfs_btree_ops xfs_bmbt_ops = {
825 .rec_len = sizeof(xfs_bmbt_rec_t), 735 .rec_len = sizeof(xfs_bmbt_rec_t),
826 .key_len = sizeof(xfs_bmbt_key_t), 736 .key_len = sizeof(xfs_bmbt_key_t),
@@ -837,18 +747,10 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
837 .init_rec_from_cur = xfs_bmbt_init_rec_from_cur, 747 .init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
838 .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur, 748 .init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
839 .key_diff = xfs_bmbt_key_diff, 749 .key_diff = xfs_bmbt_key_diff,
840
841#ifdef DEBUG 750#ifdef DEBUG
842 .keys_inorder = xfs_bmbt_keys_inorder, 751 .keys_inorder = xfs_bmbt_keys_inorder,
843 .recs_inorder = xfs_bmbt_recs_inorder, 752 .recs_inorder = xfs_bmbt_recs_inorder,
844#endif 753#endif
845
846#ifdef XFS_BTREE_TRACE
847 .trace_enter = xfs_bmbt_trace_enter,
848 .trace_cursor = xfs_bmbt_trace_cursor,
849 .trace_key = xfs_bmbt_trace_key,
850 .trace_record = xfs_bmbt_trace_record,
851#endif
852}; 754};
853 755
854/* 756/*
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 2f9e97c128a0..cabf4b5604aa 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -32,7 +32,6 @@
32#include "xfs_inode.h" 32#include "xfs_inode.h"
33#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
34#include "xfs_btree.h" 34#include "xfs_btree.h"
35#include "xfs_btree_trace.h"
36#include "xfs_error.h" 35#include "xfs_error.h"
37#include "xfs_trace.h" 36#include "xfs_trace.h"
38 37
@@ -66,11 +65,11 @@ xfs_btree_check_lblock(
66 be16_to_cpu(block->bb_numrecs) <= 65 be16_to_cpu(block->bb_numrecs) <=
67 cur->bc_ops->get_maxrecs(cur, level) && 66 cur->bc_ops->get_maxrecs(cur, level) &&
68 block->bb_u.l.bb_leftsib && 67 block->bb_u.l.bb_leftsib &&
69 (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO || 68 (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
70 XFS_FSB_SANITY_CHECK(mp, 69 XFS_FSB_SANITY_CHECK(mp,
71 be64_to_cpu(block->bb_u.l.bb_leftsib))) && 70 be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
72 block->bb_u.l.bb_rightsib && 71 block->bb_u.l.bb_rightsib &&
73 (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO || 72 (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
74 XFS_FSB_SANITY_CHECK(mp, 73 XFS_FSB_SANITY_CHECK(mp,
75 be64_to_cpu(block->bb_u.l.bb_rightsib))); 74 be64_to_cpu(block->bb_u.l.bb_rightsib)));
76 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, 75 if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
@@ -105,10 +104,10 @@ xfs_btree_check_sblock(
105 be16_to_cpu(block->bb_level) == level && 104 be16_to_cpu(block->bb_level) == level &&
106 be16_to_cpu(block->bb_numrecs) <= 105 be16_to_cpu(block->bb_numrecs) <=
107 cur->bc_ops->get_maxrecs(cur, level) && 106 cur->bc_ops->get_maxrecs(cur, level) &&
108 (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK || 107 (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
109 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && 108 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
110 block->bb_u.s.bb_leftsib && 109 block->bb_u.s.bb_leftsib &&
111 (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK || 110 (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
112 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && 111 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
113 block->bb_u.s.bb_rightsib; 112 block->bb_u.s.bb_rightsib;
114 if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, 113 if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
@@ -511,9 +510,9 @@ xfs_btree_islastblock(
511 block = xfs_btree_get_block(cur, level, &bp); 510 block = xfs_btree_get_block(cur, level, &bp);
512 xfs_btree_check_block(cur, block, level, bp); 511 xfs_btree_check_block(cur, block, level, bp);
513 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 512 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
514 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO; 513 return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
515 else 514 else
516 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK; 515 return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
517} 516}
518 517
519/* 518/*
@@ -777,14 +776,14 @@ xfs_btree_setbuf(
777 776
778 b = XFS_BUF_TO_BLOCK(bp); 777 b = XFS_BUF_TO_BLOCK(bp);
779 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 778 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
780 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) 779 if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
781 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 780 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
782 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO) 781 if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
783 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 782 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
784 } else { 783 } else {
785 if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK) 784 if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
786 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; 785 cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
787 if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK) 786 if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
788 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; 787 cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
789 } 788 }
790} 789}
@@ -795,9 +794,9 @@ xfs_btree_ptr_is_null(
795 union xfs_btree_ptr *ptr) 794 union xfs_btree_ptr *ptr)
796{ 795{
797 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) 796 if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
798 return be64_to_cpu(ptr->l) == NULLDFSBNO; 797 return ptr->l == cpu_to_be64(NULLDFSBNO);
799 else 798 else
800 return be32_to_cpu(ptr->s) == NULLAGBLOCK; 799 return ptr->s == cpu_to_be32(NULLAGBLOCK);
801} 800}
802 801
803STATIC void 802STATIC void
@@ -923,12 +922,12 @@ xfs_btree_ptr_to_daddr(
923 union xfs_btree_ptr *ptr) 922 union xfs_btree_ptr *ptr)
924{ 923{
925 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { 924 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
926 ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO); 925 ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
927 926
928 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); 927 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
929 } else { 928 } else {
930 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); 929 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
931 ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK); 930 ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
932 931
933 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, 932 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
934 be32_to_cpu(ptr->s)); 933 be32_to_cpu(ptr->s));
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 82fafc66bd1f..8d05a6a46ce3 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -199,25 +199,6 @@ struct xfs_btree_ops {
199 union xfs_btree_rec *r1, 199 union xfs_btree_rec *r1,
200 union xfs_btree_rec *r2); 200 union xfs_btree_rec *r2);
201#endif 201#endif
202
203 /* btree tracing */
204#ifdef XFS_BTREE_TRACE
205 void (*trace_enter)(struct xfs_btree_cur *, const char *,
206 char *, int, int, __psunsigned_t,
207 __psunsigned_t, __psunsigned_t,
208 __psunsigned_t, __psunsigned_t,
209 __psunsigned_t, __psunsigned_t,
210 __psunsigned_t, __psunsigned_t,
211 __psunsigned_t, __psunsigned_t);
212 void (*trace_cursor)(struct xfs_btree_cur *, __uint32_t *,
213 __uint64_t *, __uint64_t *);
214 void (*trace_key)(struct xfs_btree_cur *,
215 union xfs_btree_key *, __uint64_t *,
216 __uint64_t *);
217 void (*trace_record)(struct xfs_btree_cur *,
218 union xfs_btree_rec *, __uint64_t *,
219 __uint64_t *, __uint64_t *);
220#endif
221}; 202};
222 203
223/* 204/*
@@ -452,4 +433,23 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
452 (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \ 433 (XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
453 XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks) 434 XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
454 435
436/*
437 * Trace hooks. Currently not implemented as they need to be ported
438 * over to the generic tracing functionality, which is some effort.
439 *
440 * i,j = integer (32 bit)
441 * b = btree block buffer (xfs_buf_t)
442 * p = btree ptr
443 * r = btree record
444 * k = btree key
445 */
446#define XFS_BTREE_TRACE_ARGBI(c, b, i)
447#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
448#define XFS_BTREE_TRACE_ARGI(c, i)
449#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
450#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
451#define XFS_BTREE_TRACE_ARGIK(c, i, k)
452#define XFS_BTREE_TRACE_ARGR(c, r)
453#define XFS_BTREE_TRACE_CURSOR(c, t)
454
455#endif /* __XFS_BTREE_H__ */ 455#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/xfs_btree_trace.c b/fs/xfs/xfs_btree_trace.c
deleted file mode 100644
index 44ff942a0fda..000000000000
--- a/fs/xfs/xfs_btree_trace.c
+++ /dev/null
@@ -1,249 +0,0 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#include "xfs.h"
19#include "xfs_types.h"
20#include "xfs_inum.h"
21#include "xfs_bmap_btree.h"
22#include "xfs_alloc_btree.h"
23#include "xfs_ialloc_btree.h"
24#include "xfs_inode.h"
25#include "xfs_btree.h"
26#include "xfs_btree_trace.h"
27
28STATIC void
29xfs_btree_trace_ptr(
30 struct xfs_btree_cur *cur,
31 union xfs_btree_ptr ptr,
32 __psunsigned_t *high,
33 __psunsigned_t *low)
34{
35 if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
36 __u64 val = be64_to_cpu(ptr.l);
37 *high = val >> 32;
38 *low = (int)val;
39 } else {
40 *high = 0;
41 *low = be32_to_cpu(ptr.s);
42 }
43}
44
45/*
46 * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
47 */
48void
49xfs_btree_trace_argbi(
50 const char *func,
51 struct xfs_btree_cur *cur,
52 struct xfs_buf *b,
53 int i,
54 int line)
55{
56 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI,
57 line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0,
58 0, 0, 0, 0);
59}
60
61/*
62 * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
63 */
64void
65xfs_btree_trace_argbii(
66 const char *func,
67 struct xfs_btree_cur *cur,
68 struct xfs_buf *b,
69 int i0,
70 int i1,
71 int line)
72{
73 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII,
74 line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0,
75 0, 0, 0, 0);
76}
77
78/*
79 * Add a trace buffer entry for arguments, for 3 block-length args
80 * and an integer arg.
81 */
82void
83xfs_btree_trace_argfffi(
84 const char *func,
85 struct xfs_btree_cur *cur,
86 xfs_dfiloff_t o,
87 xfs_dfsbno_t b,
88 xfs_dfilblks_t i,
89 int j,
90 int line)
91{
92 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI,
93 line,
94 o >> 32, (int)o,
95 b >> 32, (int)b,
96 i >> 32, (int)i,
97 (int)j, 0, 0, 0, 0);
98}
99
100/*
101 * Add a trace buffer entry for arguments, for one integer arg.
102 */
103void
104xfs_btree_trace_argi(
105 const char *func,
106 struct xfs_btree_cur *cur,
107 int i,
108 int line)
109{
110 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI,
111 line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
112}
113
114/*
115 * Add a trace buffer entry for arguments, for int, fsblock, key.
116 */
117void
118xfs_btree_trace_argipk(
119 const char *func,
120 struct xfs_btree_cur *cur,
121 int i,
122 union xfs_btree_ptr ptr,
123 union xfs_btree_key *key,
124 int line)
125{
126 __psunsigned_t high, low;
127 __uint64_t l0, l1;
128
129 xfs_btree_trace_ptr(cur, ptr, &high, &low);
130 cur->bc_ops->trace_key(cur, key, &l0, &l1);
131 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK,
132 line, i, high, low,
133 l0 >> 32, (int)l0,
134 l1 >> 32, (int)l1,
135 0, 0, 0, 0);
136}
137
138/*
139 * Add a trace buffer entry for arguments, for int, fsblock, rec.
140 */
141void
142xfs_btree_trace_argipr(
143 const char *func,
144 struct xfs_btree_cur *cur,
145 int i,
146 union xfs_btree_ptr ptr,
147 union xfs_btree_rec *rec,
148 int line)
149{
150 __psunsigned_t high, low;
151 __uint64_t l0, l1, l2;
152
153 xfs_btree_trace_ptr(cur, ptr, &high, &low);
154 cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
155 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR,
156 line, i,
157 high, low,
158 l0 >> 32, (int)l0,
159 l1 >> 32, (int)l1,
160 l2 >> 32, (int)l2,
161 0, 0);
162}
163
164/*
165 * Add a trace buffer entry for arguments, for int, key.
166 */
167void
168xfs_btree_trace_argik(
169 const char *func,
170 struct xfs_btree_cur *cur,
171 int i,
172 union xfs_btree_key *key,
173 int line)
174{
175 __uint64_t l0, l1;
176
177 cur->bc_ops->trace_key(cur, key, &l0, &l1);
178 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK,
179 line, i,
180 l0 >> 32, (int)l0,
181 l1 >> 32, (int)l1,
182 0, 0, 0, 0, 0, 0);
183}
184
185/*
186 * Add a trace buffer entry for arguments, for record.
187 */
188void
189xfs_btree_trace_argr(
190 const char *func,
191 struct xfs_btree_cur *cur,
192 union xfs_btree_rec *rec,
193 int line)
194{
195 __uint64_t l0, l1, l2;
196
197 cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
198 cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR,
199 line,
200 l0 >> 32, (int)l0,
201 l1 >> 32, (int)l1,
202 l2 >> 32, (int)l2,
203 0, 0, 0, 0, 0);
204}
205
206/*
207 * Add a trace buffer entry for the cursor/operation.
208 */
209void
210xfs_btree_trace_cursor(
211 const char *func,
212 struct xfs_btree_cur *cur,
213 int type,
214 int line)
215{
216 __uint32_t s0;
217 __uint64_t l0, l1;
218 char *s;
219
220 switch (type) {
221 case XBT_ARGS:
222 s = "args";
223 break;
224 case XBT_ENTRY:
225 s = "entry";
226 break;
227 case XBT_ERROR:
228 s = "error";
229 break;
230 case XBT_EXIT:
231 s = "exit";
232 break;
233 default:
234 s = "unknown";
235 break;
236 }
237
238 cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1);
239 cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line,
240 s0,
241 l0 >> 32, (int)l0,
242 l1 >> 32, (int)l1,
243 (__psunsigned_t)cur->bc_bufs[0],
244 (__psunsigned_t)cur->bc_bufs[1],
245 (__psunsigned_t)cur->bc_bufs[2],
246 (__psunsigned_t)cur->bc_bufs[3],
247 (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
248 (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
249}
diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h
deleted file mode 100644
index 2d8a309873ea..000000000000
--- a/fs/xfs/xfs_btree_trace.h
+++ /dev/null
@@ -1,99 +0,0 @@
1/*
2 * Copyright (c) 2008 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_BTREE_TRACE_H__
19#define __XFS_BTREE_TRACE_H__
20
21struct xfs_btree_cur;
22struct xfs_buf;
23
24
25/*
26 * Trace hooks.
27 * i,j = integer (32 bit)
28 * b = btree block buffer (xfs_buf_t)
29 * p = btree ptr
30 * r = btree record
31 * k = btree key
32 */
33
34#ifdef XFS_BTREE_TRACE
35
36/*
37 * Trace buffer entry types.
38 */
39#define XFS_BTREE_KTRACE_ARGBI 1
40#define XFS_BTREE_KTRACE_ARGBII 2
41#define XFS_BTREE_KTRACE_ARGFFFI 3
42#define XFS_BTREE_KTRACE_ARGI 4
43#define XFS_BTREE_KTRACE_ARGIPK 5
44#define XFS_BTREE_KTRACE_ARGIPR 6
45#define XFS_BTREE_KTRACE_ARGIK 7
46#define XFS_BTREE_KTRACE_ARGR 8
47#define XFS_BTREE_KTRACE_CUR 9
48
49/*
50 * Sub-types for cursor traces.
51 */
52#define XBT_ARGS 0
53#define XBT_ENTRY 1
54#define XBT_ERROR 2
55#define XBT_EXIT 3
56
57void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *,
58 struct xfs_buf *, int, int);
59void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *,
60 struct xfs_buf *, int, int, int);
61void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int);
62void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int,
63 union xfs_btree_ptr, union xfs_btree_key *, int);
64void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int,
65 union xfs_btree_ptr, union xfs_btree_rec *, int);
66void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int,
67 union xfs_btree_key *, int);
68void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *,
69 union xfs_btree_rec *, int);
70void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int);
71
72#define XFS_BTREE_TRACE_ARGBI(c, b, i) \
73 xfs_btree_trace_argbi(__func__, c, b, i, __LINE__)
74#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \
75 xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__)
76#define XFS_BTREE_TRACE_ARGI(c, i) \
77 xfs_btree_trace_argi(__func__, c, i, __LINE__)
78#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \
79 xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__)
80#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) \
81 xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__)
82#define XFS_BTREE_TRACE_ARGIK(c, i, k) \
83 xfs_btree_trace_argik(__func__, c, i, k, __LINE__)
84#define XFS_BTREE_TRACE_ARGR(c, r) \
85 xfs_btree_trace_argr(__func__, c, r, __LINE__)
86#define XFS_BTREE_TRACE_CURSOR(c, t) \
87 xfs_btree_trace_cursor(__func__, c, t, __LINE__)
88#else
89#define XFS_BTREE_TRACE_ARGBI(c, b, i)
90#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
91#define XFS_BTREE_TRACE_ARGI(c, i)
92#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
93#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
94#define XFS_BTREE_TRACE_ARGIK(c, i, k)
95#define XFS_BTREE_TRACE_ARGR(c, r)
96#define XFS_BTREE_TRACE_CURSOR(c, t)
97#endif /* XFS_BTREE_TRACE */
98
99#endif /* __XFS_BTREE_TRACE_H__ */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 7b7e005e3dcc..88492916c3dc 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -90,13 +90,11 @@ xfs_buf_item_flush_log_debug(
90 uint first, 90 uint first,
91 uint last) 91 uint last)
92{ 92{
93 xfs_buf_log_item_t *bip; 93 xfs_buf_log_item_t *bip = bp->b_fspriv;
94 uint nbytes; 94 uint nbytes;
95 95
96 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 96 if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF))
97 if ((bip == NULL) || (bip->bli_item.li_type != XFS_LI_BUF)) {
98 return; 97 return;
99 }
100 98
101 ASSERT(bip->bli_logged != NULL); 99 ASSERT(bip->bli_logged != NULL);
102 nbytes = last - first + 1; 100 nbytes = last - first + 1;
@@ -408,7 +406,7 @@ xfs_buf_item_unpin(
408 int stale = bip->bli_flags & XFS_BLI_STALE; 406 int stale = bip->bli_flags & XFS_BLI_STALE;
409 int freed; 407 int freed;
410 408
411 ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip); 409 ASSERT(bp->b_fspriv == bip);
412 ASSERT(atomic_read(&bip->bli_refcount) > 0); 410 ASSERT(atomic_read(&bip->bli_refcount) > 0);
413 411
414 trace_xfs_buf_item_unpin(bip); 412 trace_xfs_buf_item_unpin(bip);
@@ -420,7 +418,7 @@ xfs_buf_item_unpin(
420 418
421 if (freed && stale) { 419 if (freed && stale) {
422 ASSERT(bip->bli_flags & XFS_BLI_STALE); 420 ASSERT(bip->bli_flags & XFS_BLI_STALE);
423 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 421 ASSERT(xfs_buf_islocked(bp));
424 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp))); 422 ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
425 ASSERT(XFS_BUF_ISSTALE(bp)); 423 ASSERT(XFS_BUF_ISSTALE(bp));
426 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL); 424 ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
@@ -443,7 +441,7 @@ xfs_buf_item_unpin(
443 * Since the transaction no longer refers to the buffer, 441 * Since the transaction no longer refers to the buffer,
444 * the buffer should no longer refer to the transaction. 442 * the buffer should no longer refer to the transaction.
445 */ 443 */
446 XFS_BUF_SET_FSPRIVATE2(bp, NULL); 444 bp->b_transp = NULL;
447 } 445 }
448 446
449 /* 447 /*
@@ -454,13 +452,13 @@ xfs_buf_item_unpin(
454 */ 452 */
455 if (bip->bli_flags & XFS_BLI_STALE_INODE) { 453 if (bip->bli_flags & XFS_BLI_STALE_INODE) {
456 xfs_buf_do_callbacks(bp); 454 xfs_buf_do_callbacks(bp);
457 XFS_BUF_SET_FSPRIVATE(bp, NULL); 455 bp->b_fspriv = NULL;
458 XFS_BUF_CLR_IODONE_FUNC(bp); 456 bp->b_iodone = NULL;
459 } else { 457 } else {
460 spin_lock(&ailp->xa_lock); 458 spin_lock(&ailp->xa_lock);
461 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip); 459 xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
462 xfs_buf_item_relse(bp); 460 xfs_buf_item_relse(bp);
463 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL); 461 ASSERT(bp->b_fspriv == NULL);
464 } 462 }
465 xfs_buf_relse(bp); 463 xfs_buf_relse(bp);
466 } 464 }
@@ -483,7 +481,7 @@ xfs_buf_item_trylock(
483 481
484 if (XFS_BUF_ISPINNED(bp)) 482 if (XFS_BUF_ISPINNED(bp))
485 return XFS_ITEM_PINNED; 483 return XFS_ITEM_PINNED;
486 if (!XFS_BUF_CPSEMA(bp)) 484 if (!xfs_buf_trylock(bp))
487 return XFS_ITEM_LOCKED; 485 return XFS_ITEM_LOCKED;
488 486
489 /* take a reference to the buffer. */ 487 /* take a reference to the buffer. */
@@ -525,7 +523,7 @@ xfs_buf_item_unlock(
525 uint hold; 523 uint hold;
526 524
527 /* Clear the buffer's association with this transaction. */ 525 /* Clear the buffer's association with this transaction. */
528 XFS_BUF_SET_FSPRIVATE2(bp, NULL); 526 bp->b_transp = NULL;
529 527
530 /* 528 /*
531 * If this is a transaction abort, don't return early. Instead, allow 529 * If this is a transaction abort, don't return early. Instead, allow
@@ -684,7 +682,7 @@ xfs_buf_item_init(
684 xfs_buf_t *bp, 682 xfs_buf_t *bp,
685 xfs_mount_t *mp) 683 xfs_mount_t *mp)
686{ 684{
687 xfs_log_item_t *lip; 685 xfs_log_item_t *lip = bp->b_fspriv;
688 xfs_buf_log_item_t *bip; 686 xfs_buf_log_item_t *bip;
689 int chunks; 687 int chunks;
690 int map_size; 688 int map_size;
@@ -696,12 +694,8 @@ xfs_buf_item_init(
696 * nothing to do here so return. 694 * nothing to do here so return.
697 */ 695 */
698 ASSERT(bp->b_target->bt_mount == mp); 696 ASSERT(bp->b_target->bt_mount == mp);
699 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { 697 if (lip != NULL && lip->li_type == XFS_LI_BUF)
700 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 698 return;
701 if (lip->li_type == XFS_LI_BUF) {
702 return;
703 }
704 }
705 699
706 /* 700 /*
707 * chunks is the number of XFS_BLF_CHUNK size pieces 701 * chunks is the number of XFS_BLF_CHUNK size pieces
@@ -740,11 +734,9 @@ xfs_buf_item_init(
740 * Put the buf item into the list of items attached to the 734 * Put the buf item into the list of items attached to the
741 * buffer at the front. 735 * buffer at the front.
742 */ 736 */
743 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { 737 if (bp->b_fspriv)
744 bip->bli_item.li_bio_list = 738 bip->bli_item.li_bio_list = bp->b_fspriv;
745 XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 739 bp->b_fspriv = bip;
746 }
747 XFS_BUF_SET_FSPRIVATE(bp, bip);
748} 740}
749 741
750 742
@@ -876,12 +868,11 @@ xfs_buf_item_relse(
876 868
877 trace_xfs_buf_item_relse(bp, _RET_IP_); 869 trace_xfs_buf_item_relse(bp, _RET_IP_);
878 870
879 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 871 bip = bp->b_fspriv;
880 XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list); 872 bp->b_fspriv = bip->bli_item.li_bio_list;
881 if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) && 873 if (bp->b_fspriv == NULL)
882 (XFS_BUF_IODONE_FUNC(bp) != NULL)) { 874 bp->b_iodone = NULL;
883 XFS_BUF_CLR_IODONE_FUNC(bp); 875
884 }
885 xfs_buf_rele(bp); 876 xfs_buf_rele(bp);
886 xfs_buf_item_free(bip); 877 xfs_buf_item_free(bip);
887} 878}
@@ -905,20 +896,20 @@ xfs_buf_attach_iodone(
905 xfs_log_item_t *head_lip; 896 xfs_log_item_t *head_lip;
906 897
907 ASSERT(XFS_BUF_ISBUSY(bp)); 898 ASSERT(XFS_BUF_ISBUSY(bp));
908 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 899 ASSERT(xfs_buf_islocked(bp));
909 900
910 lip->li_cb = cb; 901 lip->li_cb = cb;
911 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { 902 head_lip = bp->b_fspriv;
912 head_lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 903 if (head_lip) {
913 lip->li_bio_list = head_lip->li_bio_list; 904 lip->li_bio_list = head_lip->li_bio_list;
914 head_lip->li_bio_list = lip; 905 head_lip->li_bio_list = lip;
915 } else { 906 } else {
916 XFS_BUF_SET_FSPRIVATE(bp, lip); 907 bp->b_fspriv = lip;
917 } 908 }
918 909
919 ASSERT((XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks) || 910 ASSERT(bp->b_iodone == NULL ||
920 (XFS_BUF_IODONE_FUNC(bp) == NULL)); 911 bp->b_iodone == xfs_buf_iodone_callbacks);
921 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); 912 bp->b_iodone = xfs_buf_iodone_callbacks;
922} 913}
923 914
924/* 915/*
@@ -939,8 +930,8 @@ xfs_buf_do_callbacks(
939{ 930{
940 struct xfs_log_item *lip; 931 struct xfs_log_item *lip;
941 932
942 while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) { 933 while ((lip = bp->b_fspriv) != NULL) {
943 XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list); 934 bp->b_fspriv = lip->li_bio_list;
944 ASSERT(lip->li_cb != NULL); 935 ASSERT(lip->li_cb != NULL);
945 /* 936 /*
946 * Clear the next pointer so we don't have any 937 * Clear the next pointer so we don't have any
@@ -1007,7 +998,7 @@ xfs_buf_iodone_callbacks(
1007 XFS_BUF_DONE(bp); 998 XFS_BUF_DONE(bp);
1008 XFS_BUF_SET_START(bp); 999 XFS_BUF_SET_START(bp);
1009 } 1000 }
1010 ASSERT(XFS_BUF_IODONE_FUNC(bp)); 1001 ASSERT(bp->b_iodone != NULL);
1011 trace_xfs_buf_item_iodone_async(bp, _RET_IP_); 1002 trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
1012 xfs_buf_relse(bp); 1003 xfs_buf_relse(bp);
1013 return; 1004 return;
@@ -1026,8 +1017,8 @@ xfs_buf_iodone_callbacks(
1026 1017
1027do_callbacks: 1018do_callbacks:
1028 xfs_buf_do_callbacks(bp); 1019 xfs_buf_do_callbacks(bp);
1029 XFS_BUF_SET_FSPRIVATE(bp, NULL); 1020 bp->b_fspriv = NULL;
1030 XFS_BUF_CLR_IODONE_FUNC(bp); 1021 bp->b_iodone = NULL;
1031 xfs_buf_ioend(bp, 0); 1022 xfs_buf_ioend(bp, 0);
1032} 1023}
1033 1024
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 6102ac6d1dff..2925726529f8 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -24,11 +24,12 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir2_sf.h" 30#include "xfs_dir2.h"
31#include "xfs_dir2_format.h"
32#include "xfs_dir2_priv.h"
32#include "xfs_dinode.h" 33#include "xfs_dinode.h"
33#include "xfs_inode.h" 34#include "xfs_inode.h"
34#include "xfs_inode_item.h" 35#include "xfs_inode_item.h"
@@ -36,10 +37,6 @@
36#include "xfs_bmap.h" 37#include "xfs_bmap.h"
37#include "xfs_attr.h" 38#include "xfs_attr.h"
38#include "xfs_attr_leaf.h" 39#include "xfs_attr_leaf.h"
39#include "xfs_dir2_data.h"
40#include "xfs_dir2_leaf.h"
41#include "xfs_dir2_block.h"
42#include "xfs_dir2_node.h"
43#include "xfs_error.h" 40#include "xfs_error.h"
44#include "xfs_trace.h" 41#include "xfs_trace.h"
45 42
@@ -89,7 +86,7 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
89 */ 86 */
90STATIC uint xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count); 87STATIC uint xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
91STATIC int xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp); 88STATIC int xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
92STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra); 89STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps);
93STATIC int xfs_da_blk_unlink(xfs_da_state_t *state, 90STATIC int xfs_da_blk_unlink(xfs_da_state_t *state,
94 xfs_da_state_blk_t *drop_blk, 91 xfs_da_state_blk_t *drop_blk,
95 xfs_da_state_blk_t *save_blk); 92 xfs_da_state_blk_t *save_blk);
@@ -321,11 +318,11 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
321 ASSERT(bp != NULL); 318 ASSERT(bp != NULL);
322 node = bp->data; 319 node = bp->data;
323 oldroot = blk1->bp->data; 320 oldroot = blk1->bp->data;
324 if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) { 321 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
325 size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] - 322 size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
326 (char *)oldroot); 323 (char *)oldroot);
327 } else { 324 } else {
328 ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 325 ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
329 leaf = (xfs_dir2_leaf_t *)oldroot; 326 leaf = (xfs_dir2_leaf_t *)oldroot;
330 size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] - 327 size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
331 (char *)leaf); 328 (char *)leaf);
@@ -352,7 +349,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
352 node->hdr.count = cpu_to_be16(2); 349 node->hdr.count = cpu_to_be16(2);
353 350
354#ifdef DEBUG 351#ifdef DEBUG
355 if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) { 352 if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
356 ASSERT(blk1->blkno >= mp->m_dirleafblk && 353 ASSERT(blk1->blkno >= mp->m_dirleafblk &&
357 blk1->blkno < mp->m_dirfreeblk); 354 blk1->blkno < mp->m_dirfreeblk);
358 ASSERT(blk2->blkno >= mp->m_dirleafblk && 355 ASSERT(blk2->blkno >= mp->m_dirleafblk &&
@@ -384,7 +381,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
384 int useextra; 381 int useextra;
385 382
386 node = oldblk->bp->data; 383 node = oldblk->bp->data;
387 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 384 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
388 385
389 /* 386 /*
390 * With V2 dirs the extra block is data or freespace. 387 * With V2 dirs the extra block is data or freespace.
@@ -483,8 +480,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
483 node1 = node2; 480 node1 = node2;
484 node2 = tmpnode; 481 node2 = tmpnode;
485 } 482 }
486 ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC); 483 ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
487 ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC); 484 ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
488 count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2; 485 count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
489 if (count == 0) 486 if (count == 0)
490 return; 487 return;
@@ -578,7 +575,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
578 int tmp; 575 int tmp;
579 576
580 node = oldblk->bp->data; 577 node = oldblk->bp->data;
581 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 578 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
582 ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count))); 579 ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
583 ASSERT(newblk->blkno != 0); 580 ASSERT(newblk->blkno != 0);
584 if (state->args->whichfork == XFS_DATA_FORK) 581 if (state->args->whichfork == XFS_DATA_FORK)
@@ -714,7 +711,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
714 ASSERT(args != NULL); 711 ASSERT(args != NULL);
715 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC); 712 ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
716 oldroot = root_blk->bp->data; 713 oldroot = root_blk->bp->data;
717 ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC); 714 ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
718 ASSERT(!oldroot->hdr.info.forw); 715 ASSERT(!oldroot->hdr.info.forw);
719 ASSERT(!oldroot->hdr.info.back); 716 ASSERT(!oldroot->hdr.info.back);
720 717
@@ -737,10 +734,10 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
737 ASSERT(bp != NULL); 734 ASSERT(bp != NULL);
738 blkinfo = bp->data; 735 blkinfo = bp->data;
739 if (be16_to_cpu(oldroot->hdr.level) == 1) { 736 if (be16_to_cpu(oldroot->hdr.level) == 1) {
740 ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC || 737 ASSERT(blkinfo->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
741 be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC); 738 blkinfo->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
742 } else { 739 } else {
743 ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC); 740 ASSERT(blkinfo->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
744 } 741 }
745 ASSERT(!blkinfo->forw); 742 ASSERT(!blkinfo->forw);
746 ASSERT(!blkinfo->back); 743 ASSERT(!blkinfo->back);
@@ -776,7 +773,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
776 */ 773 */
777 blk = &state->path.blk[ state->path.active-1 ]; 774 blk = &state->path.blk[ state->path.active-1 ];
778 info = blk->bp->data; 775 info = blk->bp->data;
779 ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC); 776 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
780 node = (xfs_da_intnode_t *)info; 777 node = (xfs_da_intnode_t *)info;
781 count = be16_to_cpu(node->hdr.count); 778 count = be16_to_cpu(node->hdr.count);
782 if (count > (state->node_ents >> 1)) { 779 if (count > (state->node_ents >> 1)) {
@@ -836,7 +833,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
836 count -= state->node_ents >> 2; 833 count -= state->node_ents >> 2;
837 count -= be16_to_cpu(node->hdr.count); 834 count -= be16_to_cpu(node->hdr.count);
838 node = bp->data; 835 node = bp->data;
839 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 836 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
840 count -= be16_to_cpu(node->hdr.count); 837 count -= be16_to_cpu(node->hdr.count);
841 xfs_da_brelse(state->args->trans, bp); 838 xfs_da_brelse(state->args->trans, bp);
842 if (count >= 0) 839 if (count >= 0)
@@ -911,7 +908,7 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
911 } 908 }
912 for (blk--, level--; level >= 0; blk--, level--) { 909 for (blk--, level--; level >= 0; blk--, level--) {
913 node = blk->bp->data; 910 node = blk->bp->data;
914 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 911 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
915 btree = &node->btree[ blk->index ]; 912 btree = &node->btree[ blk->index ];
916 if (be32_to_cpu(btree->hashval) == lasthash) 913 if (be32_to_cpu(btree->hashval) == lasthash)
917 break; 914 break;
@@ -979,8 +976,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
979 976
980 drop_node = drop_blk->bp->data; 977 drop_node = drop_blk->bp->data;
981 save_node = save_blk->bp->data; 978 save_node = save_blk->bp->data;
982 ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 979 ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
983 ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 980 ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
984 tp = state->args->trans; 981 tp = state->args->trans;
985 982
986 /* 983 /*
@@ -1278,8 +1275,8 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
1278 1275
1279 node1 = node1_bp->data; 1276 node1 = node1_bp->data;
1280 node2 = node2_bp->data; 1277 node2 = node2_bp->data;
1281 ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) && 1278 ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) &&
1282 (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC)); 1279 node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1283 if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) && 1280 if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
1284 ((be32_to_cpu(node2->btree[0].hashval) < 1281 ((be32_to_cpu(node2->btree[0].hashval) <
1285 be32_to_cpu(node1->btree[0].hashval)) || 1282 be32_to_cpu(node1->btree[0].hashval)) ||
@@ -1299,7 +1296,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
1299 xfs_da_intnode_t *node; 1296 xfs_da_intnode_t *node;
1300 1297
1301 node = bp->data; 1298 node = bp->data;
1302 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 1299 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1303 if (count) 1300 if (count)
1304 *count = be16_to_cpu(node->hdr.count); 1301 *count = be16_to_cpu(node->hdr.count);
1305 if (!node->hdr.count) 1302 if (!node->hdr.count)
@@ -1412,7 +1409,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1412 for (blk = &path->blk[level]; level >= 0; blk--, level--) { 1409 for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1413 ASSERT(blk->bp != NULL); 1410 ASSERT(blk->bp != NULL);
1414 node = blk->bp->data; 1411 node = blk->bp->data;
1415 ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC); 1412 ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1416 if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) { 1413 if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
1417 blk->index++; 1414 blk->index++;
1418 blkno = be32_to_cpu(node->btree[blk->index].before); 1415 blkno = be32_to_cpu(node->btree[blk->index].before);
@@ -1451,9 +1448,9 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1451 return(error); 1448 return(error);
1452 ASSERT(blk->bp != NULL); 1449 ASSERT(blk->bp != NULL);
1453 info = blk->bp->data; 1450 info = blk->bp->data;
1454 ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC || 1451 ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1455 be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC || 1452 info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
1456 be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC); 1453 info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
1457 blk->magic = be16_to_cpu(info->magic); 1454 blk->magic = be16_to_cpu(info->magic);
1458 if (blk->magic == XFS_DA_NODE_MAGIC) { 1455 if (blk->magic == XFS_DA_NODE_MAGIC) {
1459 node = (xfs_da_intnode_t *)info; 1456 node = (xfs_da_intnode_t *)info;
@@ -1546,79 +1543,62 @@ const struct xfs_nameops xfs_default_nameops = {
1546 .compname = xfs_da_compname 1543 .compname = xfs_da_compname
1547}; 1544};
1548 1545
1549/*
1550 * Add a block to the btree ahead of the file.
1551 * Return the new block number to the caller.
1552 */
1553int 1546int
1554xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) 1547xfs_da_grow_inode_int(
1548 struct xfs_da_args *args,
1549 xfs_fileoff_t *bno,
1550 int count)
1555{ 1551{
1556 xfs_fileoff_t bno, b; 1552 struct xfs_trans *tp = args->trans;
1557 xfs_bmbt_irec_t map; 1553 struct xfs_inode *dp = args->dp;
1558 xfs_bmbt_irec_t *mapp; 1554 int w = args->whichfork;
1559 xfs_inode_t *dp; 1555 xfs_drfsbno_t nblks = dp->i_d.di_nblocks;
1560 int nmap, error, w, count, c, got, i, mapi; 1556 struct xfs_bmbt_irec map, *mapp;
1561 xfs_trans_t *tp; 1557 int nmap, error, got, i, mapi;
1562 xfs_mount_t *mp;
1563 xfs_drfsbno_t nblks;
1564 1558
1565 dp = args->dp;
1566 mp = dp->i_mount;
1567 w = args->whichfork;
1568 tp = args->trans;
1569 nblks = dp->i_d.di_nblocks;
1570
1571 /*
1572 * For new directories adjust the file offset and block count.
1573 */
1574 if (w == XFS_DATA_FORK) {
1575 bno = mp->m_dirleafblk;
1576 count = mp->m_dirblkfsbs;
1577 } else {
1578 bno = 0;
1579 count = 1;
1580 }
1581 /* 1559 /*
1582 * Find a spot in the file space to put the new block. 1560 * Find a spot in the file space to put the new block.
1583 */ 1561 */
1584 if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) 1562 error = xfs_bmap_first_unused(tp, dp, count, bno, w);
1563 if (error)
1585 return error; 1564 return error;
1586 if (w == XFS_DATA_FORK) 1565
1587 ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
1588 /* 1566 /*
1589 * Try mapping it in one filesystem block. 1567 * Try mapping it in one filesystem block.
1590 */ 1568 */
1591 nmap = 1; 1569 nmap = 1;
1592 ASSERT(args->firstblock != NULL); 1570 ASSERT(args->firstblock != NULL);
1593 if ((error = xfs_bmapi(tp, dp, bno, count, 1571 error = xfs_bmapi(tp, dp, *bno, count,
1594 xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA| 1572 xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
1595 XFS_BMAPI_CONTIG, 1573 XFS_BMAPI_CONTIG,
1596 args->firstblock, args->total, &map, &nmap, 1574 args->firstblock, args->total, &map, &nmap,
1597 args->flist))) { 1575 args->flist);
1576 if (error)
1598 return error; 1577 return error;
1599 } 1578
1600 ASSERT(nmap <= 1); 1579 ASSERT(nmap <= 1);
1601 if (nmap == 1) { 1580 if (nmap == 1) {
1602 mapp = &map; 1581 mapp = &map;
1603 mapi = 1; 1582 mapi = 1;
1604 } 1583 } else if (nmap == 0 && count > 1) {
1605 /* 1584 xfs_fileoff_t b;
1606 * If we didn't get it and the block might work if fragmented, 1585 int c;
1607 * try without the CONTIG flag. Loop until we get it all. 1586
1608 */ 1587 /*
1609 else if (nmap == 0 && count > 1) { 1588 * If we didn't get it and the block might work if fragmented,
1589 * try without the CONTIG flag. Loop until we get it all.
1590 */
1610 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP); 1591 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
1611 for (b = bno, mapi = 0; b < bno + count; ) { 1592 for (b = *bno, mapi = 0; b < *bno + count; ) {
1612 nmap = MIN(XFS_BMAP_MAX_NMAP, count); 1593 nmap = MIN(XFS_BMAP_MAX_NMAP, count);
1613 c = (int)(bno + count - b); 1594 c = (int)(*bno + count - b);
1614 if ((error = xfs_bmapi(tp, dp, b, c, 1595 error = xfs_bmapi(tp, dp, b, c,
1615 xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE| 1596 xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|
1616 XFS_BMAPI_METADATA, 1597 XFS_BMAPI_METADATA,
1617 args->firstblock, args->total, 1598 args->firstblock, args->total,
1618 &mapp[mapi], &nmap, args->flist))) { 1599 &mapp[mapi], &nmap, args->flist);
1619 kmem_free(mapp); 1600 if (error)
1620 return error; 1601 goto out_free_map;
1621 }
1622 if (nmap < 1) 1602 if (nmap < 1)
1623 break; 1603 break;
1624 mapi += nmap; 1604 mapi += nmap;
@@ -1629,24 +1609,53 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1629 mapi = 0; 1609 mapi = 0;
1630 mapp = NULL; 1610 mapp = NULL;
1631 } 1611 }
1612
1632 /* 1613 /*
1633 * Count the blocks we got, make sure it matches the total. 1614 * Count the blocks we got, make sure it matches the total.
1634 */ 1615 */
1635 for (i = 0, got = 0; i < mapi; i++) 1616 for (i = 0, got = 0; i < mapi; i++)
1636 got += mapp[i].br_blockcount; 1617 got += mapp[i].br_blockcount;
1637 if (got != count || mapp[0].br_startoff != bno || 1618 if (got != count || mapp[0].br_startoff != *bno ||
1638 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != 1619 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
1639 bno + count) { 1620 *bno + count) {
1640 if (mapp != &map) 1621 error = XFS_ERROR(ENOSPC);
1641 kmem_free(mapp); 1622 goto out_free_map;
1642 return XFS_ERROR(ENOSPC);
1643 } 1623 }
1644 if (mapp != &map) 1624
1645 kmem_free(mapp);
1646 /* account for newly allocated blocks in reserved blocks total */ 1625 /* account for newly allocated blocks in reserved blocks total */
1647 args->total -= dp->i_d.di_nblocks - nblks; 1626 args->total -= dp->i_d.di_nblocks - nblks;
1648 *new_blkno = (xfs_dablk_t)bno; 1627
1649 return 0; 1628out_free_map:
1629 if (mapp != &map)
1630 kmem_free(mapp);
1631 return error;
1632}
1633
1634/*
1635 * Add a block to the btree ahead of the file.
1636 * Return the new block number to the caller.
1637 */
1638int
1639xfs_da_grow_inode(
1640 struct xfs_da_args *args,
1641 xfs_dablk_t *new_blkno)
1642{
1643 xfs_fileoff_t bno;
1644 int count;
1645 int error;
1646
1647 if (args->whichfork == XFS_DATA_FORK) {
1648 bno = args->dp->i_mount->m_dirleafblk;
1649 count = args->dp->i_mount->m_dirblkfsbs;
1650 } else {
1651 bno = 0;
1652 count = 1;
1653 }
1654
1655 error = xfs_da_grow_inode_int(args, &bno, count);
1656 if (!error)
1657 *new_blkno = (xfs_dablk_t)bno;
1658 return error;
1650} 1659}
1651 1660
1652/* 1661/*
@@ -1704,12 +1713,12 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1704 /* 1713 /*
1705 * Get values from the moved block. 1714 * Get values from the moved block.
1706 */ 1715 */
1707 if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) { 1716 if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
1708 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; 1717 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
1709 dead_level = 0; 1718 dead_level = 0;
1710 dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval); 1719 dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
1711 } else { 1720 } else {
1712 ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC); 1721 ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
1713 dead_node = (xfs_da_intnode_t *)dead_info; 1722 dead_node = (xfs_da_intnode_t *)dead_info;
1714 dead_level = be16_to_cpu(dead_node->hdr.level); 1723 dead_level = be16_to_cpu(dead_node->hdr.level);
1715 dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval); 1724 dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval);
@@ -1768,8 +1777,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1768 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w))) 1777 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1769 goto done; 1778 goto done;
1770 par_node = par_buf->data; 1779 par_node = par_buf->data;
1771 if (unlikely( 1780 if (unlikely(par_node->hdr.info.magic !=
1772 be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC || 1781 cpu_to_be16(XFS_DA_NODE_MAGIC) ||
1773 (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) { 1782 (level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
1774 XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)", 1783 XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
1775 XFS_ERRLEVEL_LOW, mp); 1784 XFS_ERRLEVEL_LOW, mp);
@@ -1820,7 +1829,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1820 par_node = par_buf->data; 1829 par_node = par_buf->data;
1821 if (unlikely( 1830 if (unlikely(
1822 be16_to_cpu(par_node->hdr.level) != level || 1831 be16_to_cpu(par_node->hdr.level) != level ||
1823 be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) { 1832 par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) {
1824 XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)", 1833 XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
1825 XFS_ERRLEVEL_LOW, mp); 1834 XFS_ERRLEVEL_LOW, mp);
1826 error = XFS_ERROR(EFSCORRUPTED); 1835 error = XFS_ERROR(EFSCORRUPTED);
@@ -1930,8 +1939,7 @@ xfs_da_do_buf(
1930 xfs_daddr_t *mappedbnop, 1939 xfs_daddr_t *mappedbnop,
1931 xfs_dabuf_t **bpp, 1940 xfs_dabuf_t **bpp,
1932 int whichfork, 1941 int whichfork,
1933 int caller, 1942 int caller)
1934 inst_t *ra)
1935{ 1943{
1936 xfs_buf_t *bp = NULL; 1944 xfs_buf_t *bp = NULL;
1937 xfs_buf_t **bplist; 1945 xfs_buf_t **bplist;
@@ -2070,25 +2078,22 @@ xfs_da_do_buf(
2070 * Build a dabuf structure. 2078 * Build a dabuf structure.
2071 */ 2079 */
2072 if (bplist) { 2080 if (bplist) {
2073 rbp = xfs_da_buf_make(nbplist, bplist, ra); 2081 rbp = xfs_da_buf_make(nbplist, bplist);
2074 } else if (bp) 2082 } else if (bp)
2075 rbp = xfs_da_buf_make(1, &bp, ra); 2083 rbp = xfs_da_buf_make(1, &bp);
2076 else 2084 else
2077 rbp = NULL; 2085 rbp = NULL;
2078 /* 2086 /*
2079 * For read_buf, check the magic number. 2087 * For read_buf, check the magic number.
2080 */ 2088 */
2081 if (caller == 1) { 2089 if (caller == 1) {
2082 xfs_dir2_data_t *data; 2090 xfs_dir2_data_hdr_t *hdr = rbp->data;
2083 xfs_dir2_free_t *free; 2091 xfs_dir2_free_t *free = rbp->data;
2084 xfs_da_blkinfo_t *info; 2092 xfs_da_blkinfo_t *info = rbp->data;
2085 uint magic, magic1; 2093 uint magic, magic1;
2086 2094
2087 info = rbp->data;
2088 data = rbp->data;
2089 free = rbp->data;
2090 magic = be16_to_cpu(info->magic); 2095 magic = be16_to_cpu(info->magic);
2091 magic1 = be32_to_cpu(data->hdr.magic); 2096 magic1 = be32_to_cpu(hdr->magic);
2092 if (unlikely( 2097 if (unlikely(
2093 XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) && 2098 XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
2094 (magic != XFS_ATTR_LEAF_MAGIC) && 2099 (magic != XFS_ATTR_LEAF_MAGIC) &&
@@ -2096,7 +2101,7 @@ xfs_da_do_buf(
2096 (magic != XFS_DIR2_LEAFN_MAGIC) && 2101 (magic != XFS_DIR2_LEAFN_MAGIC) &&
2097 (magic1 != XFS_DIR2_BLOCK_MAGIC) && 2102 (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
2098 (magic1 != XFS_DIR2_DATA_MAGIC) && 2103 (magic1 != XFS_DIR2_DATA_MAGIC) &&
2099 (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC), 2104 (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)),
2100 mp, XFS_ERRTAG_DA_READ_BUF, 2105 mp, XFS_ERRTAG_DA_READ_BUF,
2101 XFS_RANDOM_DA_READ_BUF))) { 2106 XFS_RANDOM_DA_READ_BUF))) {
2102 trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_); 2107 trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
@@ -2143,8 +2148,7 @@ xfs_da_get_buf(
2143 xfs_dabuf_t **bpp, 2148 xfs_dabuf_t **bpp,
2144 int whichfork) 2149 int whichfork)
2145{ 2150{
2146 return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0, 2151 return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0);
2147 (inst_t *)__return_address);
2148} 2152}
2149 2153
2150/* 2154/*
@@ -2159,8 +2163,7 @@ xfs_da_read_buf(
2159 xfs_dabuf_t **bpp, 2163 xfs_dabuf_t **bpp,
2160 int whichfork) 2164 int whichfork)
2161{ 2165{
2162 return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1, 2166 return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1);
2163 (inst_t *)__return_address);
2164} 2167}
2165 2168
2166/* 2169/*
@@ -2176,8 +2179,7 @@ xfs_da_reada_buf(
2176 xfs_daddr_t rval; 2179 xfs_daddr_t rval;
2177 2180
2178 rval = -1; 2181 rval = -1;
2179 if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3, 2182 if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3))
2180 (inst_t *)__return_address))
2181 return -1; 2183 return -1;
2182 else 2184 else
2183 return rval; 2185 return rval;
@@ -2235,17 +2237,12 @@ xfs_da_state_free(xfs_da_state_t *state)
2235 kmem_zone_free(xfs_da_state_zone, state); 2237 kmem_zone_free(xfs_da_state_zone, state);
2236} 2238}
2237 2239
2238#ifdef XFS_DABUF_DEBUG
2239xfs_dabuf_t *xfs_dabuf_global_list;
2240static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
2241#endif
2242
2243/* 2240/*
2244 * Create a dabuf. 2241 * Create a dabuf.
2245 */ 2242 */
2246/* ARGSUSED */ 2243/* ARGSUSED */
2247STATIC xfs_dabuf_t * 2244STATIC xfs_dabuf_t *
2248xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra) 2245xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
2249{ 2246{
2250 xfs_buf_t *bp; 2247 xfs_buf_t *bp;
2251 xfs_dabuf_t *dabuf; 2248 xfs_dabuf_t *dabuf;
@@ -2257,11 +2254,6 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2257 else 2254 else
2258 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS); 2255 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
2259 dabuf->dirty = 0; 2256 dabuf->dirty = 0;
2260#ifdef XFS_DABUF_DEBUG
2261 dabuf->ra = ra;
2262 dabuf->target = XFS_BUF_TARGET(bps[0]);
2263 dabuf->blkno = XFS_BUF_ADDR(bps[0]);
2264#endif
2265 if (nbuf == 1) { 2257 if (nbuf == 1) {
2266 dabuf->nbuf = 1; 2258 dabuf->nbuf = 1;
2267 bp = bps[0]; 2259 bp = bps[0];
@@ -2281,23 +2273,6 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2281 XFS_BUF_COUNT(bp)); 2273 XFS_BUF_COUNT(bp));
2282 } 2274 }
2283 } 2275 }
2284#ifdef XFS_DABUF_DEBUG
2285 {
2286 xfs_dabuf_t *p;
2287
2288 spin_lock(&xfs_dabuf_global_lock);
2289 for (p = xfs_dabuf_global_list; p; p = p->next) {
2290 ASSERT(p->blkno != dabuf->blkno ||
2291 p->target != dabuf->target);
2292 }
2293 dabuf->prev = NULL;
2294 if (xfs_dabuf_global_list)
2295 xfs_dabuf_global_list->prev = dabuf;
2296 dabuf->next = xfs_dabuf_global_list;
2297 xfs_dabuf_global_list = dabuf;
2298 spin_unlock(&xfs_dabuf_global_lock);
2299 }
2300#endif
2301 return dabuf; 2276 return dabuf;
2302} 2277}
2303 2278
@@ -2333,25 +2308,12 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
2333 ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]); 2308 ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2334 if (dabuf->dirty) 2309 if (dabuf->dirty)
2335 xfs_da_buf_clean(dabuf); 2310 xfs_da_buf_clean(dabuf);
2336 if (dabuf->nbuf > 1) 2311 if (dabuf->nbuf > 1) {
2337 kmem_free(dabuf->data); 2312 kmem_free(dabuf->data);
2338#ifdef XFS_DABUF_DEBUG
2339 {
2340 spin_lock(&xfs_dabuf_global_lock);
2341 if (dabuf->prev)
2342 dabuf->prev->next = dabuf->next;
2343 else
2344 xfs_dabuf_global_list = dabuf->next;
2345 if (dabuf->next)
2346 dabuf->next->prev = dabuf->prev;
2347 spin_unlock(&xfs_dabuf_global_lock);
2348 }
2349 memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
2350#endif
2351 if (dabuf->nbuf == 1)
2352 kmem_zone_free(xfs_dabuf_zone, dabuf);
2353 else
2354 kmem_free(dabuf); 2313 kmem_free(dabuf);
2314 } else {
2315 kmem_zone_free(xfs_dabuf_zone, dabuf);
2316 }
2355} 2317}
2356 2318
2357/* 2319/*
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index fe9f5a8c1d2a..dbf7c074ae73 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -145,22 +145,11 @@ typedef struct xfs_dabuf {
145 short dirty; /* data needs to be copied back */ 145 short dirty; /* data needs to be copied back */
146 short bbcount; /* how large is data in bbs */ 146 short bbcount; /* how large is data in bbs */
147 void *data; /* pointer for buffers' data */ 147 void *data; /* pointer for buffers' data */
148#ifdef XFS_DABUF_DEBUG
149 inst_t *ra; /* return address of caller to make */
150 struct xfs_dabuf *next; /* next in global chain */
151 struct xfs_dabuf *prev; /* previous in global chain */
152 struct xfs_buftarg *target; /* device for buffer */
153 xfs_daddr_t blkno; /* daddr first in bps[0] */
154#endif
155 struct xfs_buf *bps[1]; /* actually nbuf of these */ 148 struct xfs_buf *bps[1]; /* actually nbuf of these */
156} xfs_dabuf_t; 149} xfs_dabuf_t;
157#define XFS_DA_BUF_SIZE(n) \ 150#define XFS_DA_BUF_SIZE(n) \
158 (sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1)) 151 (sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1))
159 152
160#ifdef XFS_DABUF_DEBUG
161extern xfs_dabuf_t *xfs_dabuf_global_list;
162#endif
163
164/* 153/*
165 * Storage for holding state during Btree searches and split/join ops. 154 * Storage for holding state during Btree searches and split/join ops.
166 * 155 *
@@ -248,6 +237,8 @@ int xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
248 * Utility routines. 237 * Utility routines.
249 */ 238 */
250int xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno); 239int xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno);
240int xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno,
241 int count);
251int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp, 242int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp,
252 xfs_dablk_t bno, xfs_daddr_t mappedbno, 243 xfs_dablk_t bno, xfs_daddr_t mappedbno,
253 xfs_dabuf_t **bp, int whichfork); 244 xfs_dabuf_t **bp, int whichfork);
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index dba7a71cedf3..4580ce00aeb4 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -24,20 +24,17 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_alloc_btree.h" 30#include "xfs_alloc_btree.h"
32#include "xfs_dir2_sf.h"
33#include "xfs_dinode.h" 31#include "xfs_dinode.h"
34#include "xfs_inode.h" 32#include "xfs_inode.h"
35#include "xfs_inode_item.h" 33#include "xfs_inode_item.h"
36#include "xfs_bmap.h" 34#include "xfs_bmap.h"
37#include "xfs_dir2_data.h" 35#include "xfs_dir2.h"
38#include "xfs_dir2_leaf.h" 36#include "xfs_dir2_format.h"
39#include "xfs_dir2_block.h" 37#include "xfs_dir2_priv.h"
40#include "xfs_dir2_node.h"
41#include "xfs_error.h" 38#include "xfs_error.h"
42#include "xfs_vnodeops.h" 39#include "xfs_vnodeops.h"
43#include "xfs_trace.h" 40#include "xfs_trace.h"
@@ -122,15 +119,15 @@ int
122xfs_dir_isempty( 119xfs_dir_isempty(
123 xfs_inode_t *dp) 120 xfs_inode_t *dp)
124{ 121{
125 xfs_dir2_sf_t *sfp; 122 xfs_dir2_sf_hdr_t *sfp;
126 123
127 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); 124 ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
128 if (dp->i_d.di_size == 0) /* might happen during shutdown. */ 125 if (dp->i_d.di_size == 0) /* might happen during shutdown. */
129 return 1; 126 return 1;
130 if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp)) 127 if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
131 return 0; 128 return 0;
132 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 129 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
133 return !sfp->hdr.count; 130 return !sfp->count;
134} 131}
135 132
136/* 133/*
@@ -500,129 +497,34 @@ xfs_dir_canenter(
500 497
501/* 498/*
502 * Add a block to the directory. 499 * Add a block to the directory.
503 * This routine is for data and free blocks, not leaf/node blocks 500 *
504 * which are handled by xfs_da_grow_inode. 501 * This routine is for data and free blocks, not leaf/node blocks which are
502 * handled by xfs_da_grow_inode.
505 */ 503 */
506int 504int
507xfs_dir2_grow_inode( 505xfs_dir2_grow_inode(
508 xfs_da_args_t *args, 506 struct xfs_da_args *args,
509 int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */ 507 int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */
510 xfs_dir2_db_t *dbp) /* out: block number added */ 508 xfs_dir2_db_t *dbp) /* out: block number added */
511{ 509{
512 xfs_fileoff_t bno; /* directory offset of new block */ 510 struct xfs_inode *dp = args->dp;
513 int count; /* count of filesystem blocks */ 511 struct xfs_mount *mp = dp->i_mount;
514 xfs_inode_t *dp; /* incore directory inode */ 512 xfs_fileoff_t bno; /* directory offset of new block */
515 int error; 513 int count; /* count of filesystem blocks */
516 int got; /* blocks actually mapped */ 514 int error;
517 int i;
518 xfs_bmbt_irec_t map; /* single structure for bmap */
519 int mapi; /* mapping index */
520 xfs_bmbt_irec_t *mapp; /* bmap mapping structure(s) */
521 xfs_mount_t *mp;
522 int nmap; /* number of bmap entries */
523 xfs_trans_t *tp;
524 xfs_drfsbno_t nblks;
525 515
526 trace_xfs_dir2_grow_inode(args, space); 516 trace_xfs_dir2_grow_inode(args, space);
527 517
528 dp = args->dp;
529 tp = args->trans;
530 mp = dp->i_mount;
531 nblks = dp->i_d.di_nblocks;
532 /* 518 /*
533 * Set lowest possible block in the space requested. 519 * Set lowest possible block in the space requested.
534 */ 520 */
535 bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE); 521 bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE);
536 count = mp->m_dirblkfsbs; 522 count = mp->m_dirblkfsbs;
537 /*
538 * Find the first hole for our block.
539 */
540 if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK)))
541 return error;
542 nmap = 1;
543 ASSERT(args->firstblock != NULL);
544 /*
545 * Try mapping the new block contiguously (one extent).
546 */
547 if ((error = xfs_bmapi(tp, dp, bno, count,
548 XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
549 args->firstblock, args->total, &map, &nmap,
550 args->flist)))
551 return error;
552 ASSERT(nmap <= 1);
553 if (nmap == 1) {
554 mapp = &map;
555 mapi = 1;
556 }
557 /*
558 * Didn't work and this is a multiple-fsb directory block.
559 * Try again with contiguous flag turned on.
560 */
561 else if (nmap == 0 && count > 1) {
562 xfs_fileoff_t b; /* current file offset */
563 523
564 /* 524 error = xfs_da_grow_inode_int(args, &bno, count);
565 * Space for maximum number of mappings. 525 if (error)
566 */ 526 return error;
567 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
568 /*
569 * Iterate until we get to the end of our block.
570 */
571 for (b = bno, mapi = 0; b < bno + count; ) {
572 int c; /* current fsb count */
573
574 /*
575 * Can't map more than MAX_NMAP at once.
576 */
577 nmap = MIN(XFS_BMAP_MAX_NMAP, count);
578 c = (int)(bno + count - b);
579 if ((error = xfs_bmapi(tp, dp, b, c,
580 XFS_BMAPI_WRITE|XFS_BMAPI_METADATA,
581 args->firstblock, args->total,
582 &mapp[mapi], &nmap, args->flist))) {
583 kmem_free(mapp);
584 return error;
585 }
586 if (nmap < 1)
587 break;
588 /*
589 * Add this bunch into our table, go to the next offset.
590 */
591 mapi += nmap;
592 b = mapp[mapi - 1].br_startoff +
593 mapp[mapi - 1].br_blockcount;
594 }
595 }
596 /*
597 * Didn't work.
598 */
599 else {
600 mapi = 0;
601 mapp = NULL;
602 }
603 /*
604 * See how many fsb's we got.
605 */
606 for (i = 0, got = 0; i < mapi; i++)
607 got += mapp[i].br_blockcount;
608 /*
609 * Didn't get enough fsb's, or the first/last block's are wrong.
610 */
611 if (got != count || mapp[0].br_startoff != bno ||
612 mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
613 bno + count) {
614 if (mapp != &map)
615 kmem_free(mapp);
616 return XFS_ERROR(ENOSPC);
617 }
618 /*
619 * Done with the temporary mapping table.
620 */
621 if (mapp != &map)
622 kmem_free(mapp);
623 527
624 /* account for newly allocated blocks in reserved blocks total */
625 args->total -= dp->i_d.di_nblocks - nblks;
626 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); 528 *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
627 529
628 /* 530 /*
@@ -634,7 +536,7 @@ xfs_dir2_grow_inode(
634 size = XFS_FSB_TO_B(mp, bno + count); 536 size = XFS_FSB_TO_B(mp, bno + count);
635 if (size > dp->i_d.di_size) { 537 if (size > dp->i_d.di_size) {
636 dp->i_d.di_size = size; 538 dp->i_d.di_size = size;
637 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 539 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
638 } 540 }
639 } 541 }
640 return 0; 542 return 0;
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 74a3b1057685..e937d9991c18 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -16,49 +16,14 @@
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */ 17 */
18#ifndef __XFS_DIR2_H__ 18#ifndef __XFS_DIR2_H__
19#define __XFS_DIR2_H__ 19#define __XFS_DIR2_H__
20 20
21struct uio;
22struct xfs_dabuf;
23struct xfs_da_args;
24struct xfs_dir2_put_args;
25struct xfs_bmap_free; 21struct xfs_bmap_free;
22struct xfs_da_args;
26struct xfs_inode; 23struct xfs_inode;
27struct xfs_mount; 24struct xfs_mount;
28struct xfs_trans; 25struct xfs_trans;
29 26
30/*
31 * Directory version 2.
32 * There are 4 possible formats:
33 * shortform
34 * single block - data with embedded leaf at the end
35 * multiple data blocks, single leaf+freeindex block
36 * data blocks, node&leaf blocks (btree), freeindex blocks
37 *
38 * The shortform format is in xfs_dir2_sf.h.
39 * The single block format is in xfs_dir2_block.h.
40 * The data block format is in xfs_dir2_data.h.
41 * The leaf and freeindex block formats are in xfs_dir2_leaf.h.
42 * Node blocks are the same as the other version, in xfs_da_btree.h.
43 */
44
45/*
46 * Byte offset in data block and shortform entry.
47 */
48typedef __uint16_t xfs_dir2_data_off_t;
49#define NULLDATAOFF 0xffffU
50typedef uint xfs_dir2_data_aoff_t; /* argument form */
51
52/*
53 * Directory block number (logical dirblk in file)
54 */
55typedef __uint32_t xfs_dir2_db_t;
56
57/*
58 * Byte offset in a directory.
59 */
60typedef xfs_off_t xfs_dir2_off_t;
61
62extern struct xfs_name xfs_name_dotdot; 27extern struct xfs_name xfs_name_dotdot;
63 28
64/* 29/*
@@ -86,21 +51,10 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
86 struct xfs_bmap_free *flist, xfs_extlen_t tot); 51 struct xfs_bmap_free *flist, xfs_extlen_t tot);
87extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp, 52extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
88 struct xfs_name *name, uint resblks); 53 struct xfs_name *name, uint resblks);
89extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
90 54
91/* 55/*
92 * Utility routines for v2 directories. 56 * Direct call from the bmap code, bypassing the generic directory layer.
93 */ 57 */
94extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, 58extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
95 xfs_dir2_db_t *dbp);
96extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp,
97 int *vp);
98extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
99 int *vp);
100extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
101 struct xfs_dabuf *bp);
102
103extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
104 const unsigned char *name, int len);
105 59
106#endif /* __XFS_DIR2_H__ */ 60#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 580d99cef9e7..9245e029b8ea 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -23,17 +23,14 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_mount.h" 26#include "xfs_mount.h"
28#include "xfs_da_btree.h" 27#include "xfs_da_btree.h"
29#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
30#include "xfs_dir2_sf.h"
31#include "xfs_dinode.h" 29#include "xfs_dinode.h"
32#include "xfs_inode.h" 30#include "xfs_inode.h"
33#include "xfs_inode_item.h" 31#include "xfs_inode_item.h"
34#include "xfs_dir2_data.h" 32#include "xfs_dir2_format.h"
35#include "xfs_dir2_leaf.h" 33#include "xfs_dir2_priv.h"
36#include "xfs_dir2_block.h"
37#include "xfs_error.h" 34#include "xfs_error.h"
38#include "xfs_trace.h" 35#include "xfs_trace.h"
39 36
@@ -67,7 +64,7 @@ xfs_dir2_block_addname(
67 xfs_da_args_t *args) /* directory op arguments */ 64 xfs_da_args_t *args) /* directory op arguments */
68{ 65{
69 xfs_dir2_data_free_t *bf; /* bestfree table in block */ 66 xfs_dir2_data_free_t *bf; /* bestfree table in block */
70 xfs_dir2_block_t *block; /* directory block structure */ 67 xfs_dir2_data_hdr_t *hdr; /* block header */
71 xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ 68 xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
72 xfs_dabuf_t *bp; /* buffer for block */ 69 xfs_dabuf_t *bp; /* buffer for block */
73 xfs_dir2_block_tail_t *btp; /* block tail */ 70 xfs_dir2_block_tail_t *btp; /* block tail */
@@ -105,13 +102,13 @@ xfs_dir2_block_addname(
105 return error; 102 return error;
106 } 103 }
107 ASSERT(bp != NULL); 104 ASSERT(bp != NULL);
108 block = bp->data; 105 hdr = bp->data;
109 /* 106 /*
110 * Check the magic number, corrupted if wrong. 107 * Check the magic number, corrupted if wrong.
111 */ 108 */
112 if (unlikely(be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)) { 109 if (unlikely(hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))) {
113 XFS_CORRUPTION_ERROR("xfs_dir2_block_addname", 110 XFS_CORRUPTION_ERROR("xfs_dir2_block_addname",
114 XFS_ERRLEVEL_LOW, mp, block); 111 XFS_ERRLEVEL_LOW, mp, hdr);
115 xfs_da_brelse(tp, bp); 112 xfs_da_brelse(tp, bp);
116 return XFS_ERROR(EFSCORRUPTED); 113 return XFS_ERROR(EFSCORRUPTED);
117 } 114 }
@@ -119,8 +116,8 @@ xfs_dir2_block_addname(
119 /* 116 /*
120 * Set up pointers to parts of the block. 117 * Set up pointers to parts of the block.
121 */ 118 */
122 bf = block->hdr.bestfree; 119 bf = hdr->bestfree;
123 btp = xfs_dir2_block_tail_p(mp, block); 120 btp = xfs_dir2_block_tail_p(mp, hdr);
124 blp = xfs_dir2_block_leaf_p(btp); 121 blp = xfs_dir2_block_leaf_p(btp);
125 /* 122 /*
126 * No stale entries? Need space for entry and new leaf. 123 * No stale entries? Need space for entry and new leaf.
@@ -133,7 +130,7 @@ xfs_dir2_block_addname(
133 /* 130 /*
134 * Data object just before the first leaf entry. 131 * Data object just before the first leaf entry.
135 */ 132 */
136 enddup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); 133 enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
137 /* 134 /*
138 * If it's not free then can't do this add without cleaning up: 135 * If it's not free then can't do this add without cleaning up:
139 * the space before the first leaf entry needs to be free so it 136 * the space before the first leaf entry needs to be free so it
@@ -146,7 +143,7 @@ xfs_dir2_block_addname(
146 */ 143 */
147 else { 144 else {
148 dup = (xfs_dir2_data_unused_t *) 145 dup = (xfs_dir2_data_unused_t *)
149 ((char *)block + be16_to_cpu(bf[0].offset)); 146 ((char *)hdr + be16_to_cpu(bf[0].offset));
150 if (dup == enddup) { 147 if (dup == enddup) {
151 /* 148 /*
152 * It is the biggest freespace, is it too small 149 * It is the biggest freespace, is it too small
@@ -159,7 +156,7 @@ xfs_dir2_block_addname(
159 */ 156 */
160 if (be16_to_cpu(bf[1].length) >= len) 157 if (be16_to_cpu(bf[1].length) >= len)
161 dup = (xfs_dir2_data_unused_t *) 158 dup = (xfs_dir2_data_unused_t *)
162 ((char *)block + 159 ((char *)hdr +
163 be16_to_cpu(bf[1].offset)); 160 be16_to_cpu(bf[1].offset));
164 else 161 else
165 dup = NULL; 162 dup = NULL;
@@ -182,7 +179,7 @@ xfs_dir2_block_addname(
182 */ 179 */
183 else if (be16_to_cpu(bf[0].length) >= len) { 180 else if (be16_to_cpu(bf[0].length) >= len) {
184 dup = (xfs_dir2_data_unused_t *) 181 dup = (xfs_dir2_data_unused_t *)
185 ((char *)block + be16_to_cpu(bf[0].offset)); 182 ((char *)hdr + be16_to_cpu(bf[0].offset));
186 compact = 0; 183 compact = 0;
187 } 184 }
188 /* 185 /*
@@ -196,7 +193,7 @@ xfs_dir2_block_addname(
196 /* 193 /*
197 * Data object just before the first leaf entry. 194 * Data object just before the first leaf entry.
198 */ 195 */
199 dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); 196 dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
200 /* 197 /*
201 * If it's not free then the data will go where the 198 * If it's not free then the data will go where the
202 * leaf data starts now, if it works at all. 199 * leaf data starts now, if it works at all.
@@ -255,7 +252,8 @@ xfs_dir2_block_addname(
255 highstale = lfloghigh = -1; 252 highstale = lfloghigh = -1;
256 fromidx >= 0; 253 fromidx >= 0;
257 fromidx--) { 254 fromidx--) {
258 if (be32_to_cpu(blp[fromidx].address) == XFS_DIR2_NULL_DATAPTR) { 255 if (blp[fromidx].address ==
256 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
259 if (highstale == -1) 257 if (highstale == -1)
260 highstale = toidx; 258 highstale = toidx;
261 else { 259 else {
@@ -272,7 +270,7 @@ xfs_dir2_block_addname(
272 lfloghigh -= be32_to_cpu(btp->stale) - 1; 270 lfloghigh -= be32_to_cpu(btp->stale) - 1;
273 be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); 271 be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1));
274 xfs_dir2_data_make_free(tp, bp, 272 xfs_dir2_data_make_free(tp, bp,
275 (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), 273 (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
276 (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)), 274 (xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
277 &needlog, &needscan); 275 &needlog, &needscan);
278 blp += be32_to_cpu(btp->stale) - 1; 276 blp += be32_to_cpu(btp->stale) - 1;
@@ -282,7 +280,7 @@ xfs_dir2_block_addname(
282 * This needs to happen before the next call to use_free. 280 * This needs to happen before the next call to use_free.
283 */ 281 */
284 if (needscan) { 282 if (needscan) {
285 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); 283 xfs_dir2_data_freescan(mp, hdr, &needlog);
286 needscan = 0; 284 needscan = 0;
287 } 285 }
288 } 286 }
@@ -318,7 +316,7 @@ xfs_dir2_block_addname(
318 */ 316 */
319 xfs_dir2_data_use_free(tp, bp, enddup, 317 xfs_dir2_data_use_free(tp, bp, enddup,
320 (xfs_dir2_data_aoff_t) 318 (xfs_dir2_data_aoff_t)
321 ((char *)enddup - (char *)block + be16_to_cpu(enddup->length) - 319 ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -
322 sizeof(*blp)), 320 sizeof(*blp)),
323 (xfs_dir2_data_aoff_t)sizeof(*blp), 321 (xfs_dir2_data_aoff_t)sizeof(*blp),
324 &needlog, &needscan); 322 &needlog, &needscan);
@@ -331,8 +329,7 @@ xfs_dir2_block_addname(
331 * This needs to happen before the next call to use_free. 329 * This needs to happen before the next call to use_free.
332 */ 330 */
333 if (needscan) { 331 if (needscan) {
334 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, 332 xfs_dir2_data_freescan(mp, hdr, &needlog);
335 &needlog);
336 needscan = 0; 333 needscan = 0;
337 } 334 }
338 /* 335 /*
@@ -353,12 +350,14 @@ xfs_dir2_block_addname(
353 else { 350 else {
354 for (lowstale = mid; 351 for (lowstale = mid;
355 lowstale >= 0 && 352 lowstale >= 0 &&
356 be32_to_cpu(blp[lowstale].address) != XFS_DIR2_NULL_DATAPTR; 353 blp[lowstale].address !=
354 cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
357 lowstale--) 355 lowstale--)
358 continue; 356 continue;
359 for (highstale = mid + 1; 357 for (highstale = mid + 1;
360 highstale < be32_to_cpu(btp->count) && 358 highstale < be32_to_cpu(btp->count) &&
361 be32_to_cpu(blp[highstale].address) != XFS_DIR2_NULL_DATAPTR && 359 blp[highstale].address !=
360 cpu_to_be32(XFS_DIR2_NULL_DATAPTR) &&
362 (lowstale < 0 || mid - lowstale > highstale - mid); 361 (lowstale < 0 || mid - lowstale > highstale - mid);
363 highstale++) 362 highstale++)
364 continue; 363 continue;
@@ -397,13 +396,13 @@ xfs_dir2_block_addname(
397 */ 396 */
398 blp[mid].hashval = cpu_to_be32(args->hashval); 397 blp[mid].hashval = cpu_to_be32(args->hashval);
399 blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, 398 blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
400 (char *)dep - (char *)block)); 399 (char *)dep - (char *)hdr));
401 xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh); 400 xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
402 /* 401 /*
403 * Mark space for the data entry used. 402 * Mark space for the data entry used.
404 */ 403 */
405 xfs_dir2_data_use_free(tp, bp, dup, 404 xfs_dir2_data_use_free(tp, bp, dup,
406 (xfs_dir2_data_aoff_t)((char *)dup - (char *)block), 405 (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
407 (xfs_dir2_data_aoff_t)len, &needlog, &needscan); 406 (xfs_dir2_data_aoff_t)len, &needlog, &needscan);
408 /* 407 /*
409 * Create the new data entry. 408 * Create the new data entry.
@@ -412,12 +411,12 @@ xfs_dir2_block_addname(
412 dep->namelen = args->namelen; 411 dep->namelen = args->namelen;
413 memcpy(dep->name, args->name, args->namelen); 412 memcpy(dep->name, args->name, args->namelen);
414 tagp = xfs_dir2_data_entry_tag_p(dep); 413 tagp = xfs_dir2_data_entry_tag_p(dep);
415 *tagp = cpu_to_be16((char *)dep - (char *)block); 414 *tagp = cpu_to_be16((char *)dep - (char *)hdr);
416 /* 415 /*
417 * Clean up the bestfree array and log the header, tail, and entry. 416 * Clean up the bestfree array and log the header, tail, and entry.
418 */ 417 */
419 if (needscan) 418 if (needscan)
420 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); 419 xfs_dir2_data_freescan(mp, hdr, &needlog);
421 if (needlog) 420 if (needlog)
422 xfs_dir2_data_log_header(tp, bp); 421 xfs_dir2_data_log_header(tp, bp);
423 xfs_dir2_block_log_tail(tp, bp); 422 xfs_dir2_block_log_tail(tp, bp);
@@ -437,7 +436,7 @@ xfs_dir2_block_getdents(
437 xfs_off_t *offset, 436 xfs_off_t *offset,
438 filldir_t filldir) 437 filldir_t filldir)
439{ 438{
440 xfs_dir2_block_t *block; /* directory block structure */ 439 xfs_dir2_data_hdr_t *hdr; /* block header */
441 xfs_dabuf_t *bp; /* buffer for block */ 440 xfs_dabuf_t *bp; /* buffer for block */
442 xfs_dir2_block_tail_t *btp; /* block tail */ 441 xfs_dir2_block_tail_t *btp; /* block tail */
443 xfs_dir2_data_entry_t *dep; /* block data entry */ 442 xfs_dir2_data_entry_t *dep; /* block data entry */
@@ -470,13 +469,13 @@ xfs_dir2_block_getdents(
470 * We'll skip entries before this. 469 * We'll skip entries before this.
471 */ 470 */
472 wantoff = xfs_dir2_dataptr_to_off(mp, *offset); 471 wantoff = xfs_dir2_dataptr_to_off(mp, *offset);
473 block = bp->data; 472 hdr = bp->data;
474 xfs_dir2_data_check(dp, bp); 473 xfs_dir2_data_check(dp, bp);
475 /* 474 /*
476 * Set up values for the loop. 475 * Set up values for the loop.
477 */ 476 */
478 btp = xfs_dir2_block_tail_p(mp, block); 477 btp = xfs_dir2_block_tail_p(mp, hdr);
479 ptr = (char *)block->u; 478 ptr = (char *)(hdr + 1);
480 endptr = (char *)xfs_dir2_block_leaf_p(btp); 479 endptr = (char *)xfs_dir2_block_leaf_p(btp);
481 480
482 /* 481 /*
@@ -502,11 +501,11 @@ xfs_dir2_block_getdents(
502 /* 501 /*
503 * The entry is before the desired starting point, skip it. 502 * The entry is before the desired starting point, skip it.
504 */ 503 */
505 if ((char *)dep - (char *)block < wantoff) 504 if ((char *)dep - (char *)hdr < wantoff)
506 continue; 505 continue;
507 506
508 cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 507 cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
509 (char *)dep - (char *)block); 508 (char *)dep - (char *)hdr);
510 509
511 /* 510 /*
512 * If it didn't fit, set the final offset to here & return. 511 * If it didn't fit, set the final offset to here & return.
@@ -540,17 +539,14 @@ xfs_dir2_block_log_leaf(
540 int first, /* index of first logged leaf */ 539 int first, /* index of first logged leaf */
541 int last) /* index of last logged leaf */ 540 int last) /* index of last logged leaf */
542{ 541{
543 xfs_dir2_block_t *block; /* directory block structure */ 542 xfs_dir2_data_hdr_t *hdr = bp->data;
544 xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ 543 xfs_dir2_leaf_entry_t *blp;
545 xfs_dir2_block_tail_t *btp; /* block tail */ 544 xfs_dir2_block_tail_t *btp;
546 xfs_mount_t *mp; /* filesystem mount point */
547 545
548 mp = tp->t_mountp; 546 btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
549 block = bp->data;
550 btp = xfs_dir2_block_tail_p(mp, block);
551 blp = xfs_dir2_block_leaf_p(btp); 547 blp = xfs_dir2_block_leaf_p(btp);
552 xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block), 548 xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),
553 (uint)((char *)&blp[last + 1] - (char *)block - 1)); 549 (uint)((char *)&blp[last + 1] - (char *)hdr - 1));
554} 550}
555 551
556/* 552/*
@@ -561,15 +557,12 @@ xfs_dir2_block_log_tail(
561 xfs_trans_t *tp, /* transaction structure */ 557 xfs_trans_t *tp, /* transaction structure */
562 xfs_dabuf_t *bp) /* block buffer */ 558 xfs_dabuf_t *bp) /* block buffer */
563{ 559{
564 xfs_dir2_block_t *block; /* directory block structure */ 560 xfs_dir2_data_hdr_t *hdr = bp->data;
565 xfs_dir2_block_tail_t *btp; /* block tail */ 561 xfs_dir2_block_tail_t *btp;
566 xfs_mount_t *mp; /* filesystem mount point */
567 562
568 mp = tp->t_mountp; 563 btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
569 block = bp->data; 564 xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),
570 btp = xfs_dir2_block_tail_p(mp, block); 565 (uint)((char *)(btp + 1) - (char *)hdr - 1));
571 xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block),
572 (uint)((char *)(btp + 1) - (char *)block - 1));
573} 566}
574 567
575/* 568/*
@@ -580,7 +573,7 @@ int /* error */
580xfs_dir2_block_lookup( 573xfs_dir2_block_lookup(
581 xfs_da_args_t *args) /* dir lookup arguments */ 574 xfs_da_args_t *args) /* dir lookup arguments */
582{ 575{
583 xfs_dir2_block_t *block; /* block structure */ 576 xfs_dir2_data_hdr_t *hdr; /* block header */
584 xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ 577 xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
585 xfs_dabuf_t *bp; /* block buffer */ 578 xfs_dabuf_t *bp; /* block buffer */
586 xfs_dir2_block_tail_t *btp; /* block tail */ 579 xfs_dir2_block_tail_t *btp; /* block tail */
@@ -600,14 +593,14 @@ xfs_dir2_block_lookup(
600 return error; 593 return error;
601 dp = args->dp; 594 dp = args->dp;
602 mp = dp->i_mount; 595 mp = dp->i_mount;
603 block = bp->data; 596 hdr = bp->data;
604 xfs_dir2_data_check(dp, bp); 597 xfs_dir2_data_check(dp, bp);
605 btp = xfs_dir2_block_tail_p(mp, block); 598 btp = xfs_dir2_block_tail_p(mp, hdr);
606 blp = xfs_dir2_block_leaf_p(btp); 599 blp = xfs_dir2_block_leaf_p(btp);
607 /* 600 /*
608 * Get the offset from the leaf entry, to point to the data. 601 * Get the offset from the leaf entry, to point to the data.
609 */ 602 */
610 dep = (xfs_dir2_data_entry_t *)((char *)block + 603 dep = (xfs_dir2_data_entry_t *)((char *)hdr +
611 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); 604 xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
612 /* 605 /*
613 * Fill in inode number, CI name if appropriate, release the block. 606 * Fill in inode number, CI name if appropriate, release the block.
@@ -628,7 +621,7 @@ xfs_dir2_block_lookup_int(
628 int *entno) /* returned entry number */ 621 int *entno) /* returned entry number */
629{ 622{
630 xfs_dir2_dataptr_t addr; /* data entry address */ 623 xfs_dir2_dataptr_t addr; /* data entry address */
631 xfs_dir2_block_t *block; /* block structure */ 624 xfs_dir2_data_hdr_t *hdr; /* block header */
632 xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ 625 xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
633 xfs_dabuf_t *bp; /* block buffer */ 626 xfs_dabuf_t *bp; /* block buffer */
634 xfs_dir2_block_tail_t *btp; /* block tail */ 627 xfs_dir2_block_tail_t *btp; /* block tail */
@@ -654,9 +647,9 @@ xfs_dir2_block_lookup_int(
654 return error; 647 return error;
655 } 648 }
656 ASSERT(bp != NULL); 649 ASSERT(bp != NULL);
657 block = bp->data; 650 hdr = bp->data;
658 xfs_dir2_data_check(dp, bp); 651 xfs_dir2_data_check(dp, bp);
659 btp = xfs_dir2_block_tail_p(mp, block); 652 btp = xfs_dir2_block_tail_p(mp, hdr);
660 blp = xfs_dir2_block_leaf_p(btp); 653 blp = xfs_dir2_block_leaf_p(btp);
661 /* 654 /*
662 * Loop doing a binary search for our hash value. 655 * Loop doing a binary search for our hash value.
@@ -694,7 +687,7 @@ xfs_dir2_block_lookup_int(
694 * Get pointer to the entry from the leaf. 687 * Get pointer to the entry from the leaf.
695 */ 688 */
696 dep = (xfs_dir2_data_entry_t *) 689 dep = (xfs_dir2_data_entry_t *)
697 ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); 690 ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
698 /* 691 /*
699 * Compare name and if it's an exact match, return the index 692 * Compare name and if it's an exact match, return the index
700 * and buffer. If it's the first case-insensitive match, store 693 * and buffer. If it's the first case-insensitive match, store
@@ -733,7 +726,7 @@ int /* error */
733xfs_dir2_block_removename( 726xfs_dir2_block_removename(
734 xfs_da_args_t *args) /* directory operation args */ 727 xfs_da_args_t *args) /* directory operation args */
735{ 728{
736 xfs_dir2_block_t *block; /* block structure */ 729 xfs_dir2_data_hdr_t *hdr; /* block header */
737 xfs_dir2_leaf_entry_t *blp; /* block leaf pointer */ 730 xfs_dir2_leaf_entry_t *blp; /* block leaf pointer */
738 xfs_dabuf_t *bp; /* block buffer */ 731 xfs_dabuf_t *bp; /* block buffer */
739 xfs_dir2_block_tail_t *btp; /* block tail */ 732 xfs_dir2_block_tail_t *btp; /* block tail */
@@ -760,20 +753,20 @@ xfs_dir2_block_removename(
760 dp = args->dp; 753 dp = args->dp;
761 tp = args->trans; 754 tp = args->trans;
762 mp = dp->i_mount; 755 mp = dp->i_mount;
763 block = bp->data; 756 hdr = bp->data;
764 btp = xfs_dir2_block_tail_p(mp, block); 757 btp = xfs_dir2_block_tail_p(mp, hdr);
765 blp = xfs_dir2_block_leaf_p(btp); 758 blp = xfs_dir2_block_leaf_p(btp);
766 /* 759 /*
767 * Point to the data entry using the leaf entry. 760 * Point to the data entry using the leaf entry.
768 */ 761 */
769 dep = (xfs_dir2_data_entry_t *) 762 dep = (xfs_dir2_data_entry_t *)
770 ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); 763 ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
771 /* 764 /*
772 * Mark the data entry's space free. 765 * Mark the data entry's space free.
773 */ 766 */
774 needlog = needscan = 0; 767 needlog = needscan = 0;
775 xfs_dir2_data_make_free(tp, bp, 768 xfs_dir2_data_make_free(tp, bp,
776 (xfs_dir2_data_aoff_t)((char *)dep - (char *)block), 769 (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
777 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); 770 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
778 /* 771 /*
779 * Fix up the block tail. 772 * Fix up the block tail.
@@ -789,15 +782,15 @@ xfs_dir2_block_removename(
789 * Fix up bestfree, log the header if necessary. 782 * Fix up bestfree, log the header if necessary.
790 */ 783 */
791 if (needscan) 784 if (needscan)
792 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); 785 xfs_dir2_data_freescan(mp, hdr, &needlog);
793 if (needlog) 786 if (needlog)
794 xfs_dir2_data_log_header(tp, bp); 787 xfs_dir2_data_log_header(tp, bp);
795 xfs_dir2_data_check(dp, bp); 788 xfs_dir2_data_check(dp, bp);
796 /* 789 /*
797 * See if the size as a shortform is good enough. 790 * See if the size as a shortform is good enough.
798 */ 791 */
799 if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) > 792 size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
800 XFS_IFORK_DSIZE(dp)) { 793 if (size > XFS_IFORK_DSIZE(dp)) {
801 xfs_da_buf_done(bp); 794 xfs_da_buf_done(bp);
802 return 0; 795 return 0;
803 } 796 }
@@ -815,7 +808,7 @@ int /* error */
815xfs_dir2_block_replace( 808xfs_dir2_block_replace(
816 xfs_da_args_t *args) /* directory operation args */ 809 xfs_da_args_t *args) /* directory operation args */
817{ 810{
818 xfs_dir2_block_t *block; /* block structure */ 811 xfs_dir2_data_hdr_t *hdr; /* block header */
819 xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ 812 xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
820 xfs_dabuf_t *bp; /* block buffer */ 813 xfs_dabuf_t *bp; /* block buffer */
821 xfs_dir2_block_tail_t *btp; /* block tail */ 814 xfs_dir2_block_tail_t *btp; /* block tail */
@@ -836,14 +829,14 @@ xfs_dir2_block_replace(
836 } 829 }
837 dp = args->dp; 830 dp = args->dp;
838 mp = dp->i_mount; 831 mp = dp->i_mount;
839 block = bp->data; 832 hdr = bp->data;
840 btp = xfs_dir2_block_tail_p(mp, block); 833 btp = xfs_dir2_block_tail_p(mp, hdr);
841 blp = xfs_dir2_block_leaf_p(btp); 834 blp = xfs_dir2_block_leaf_p(btp);
842 /* 835 /*
843 * Point to the data entry we need to change. 836 * Point to the data entry we need to change.
844 */ 837 */
845 dep = (xfs_dir2_data_entry_t *) 838 dep = (xfs_dir2_data_entry_t *)
846 ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); 839 ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
847 ASSERT(be64_to_cpu(dep->inumber) != args->inumber); 840 ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
848 /* 841 /*
849 * Change the inode number to the new value. 842 * Change the inode number to the new value.
@@ -882,7 +875,7 @@ xfs_dir2_leaf_to_block(
882 xfs_dabuf_t *dbp) /* data buffer */ 875 xfs_dabuf_t *dbp) /* data buffer */
883{ 876{
884 __be16 *bestsp; /* leaf bests table */ 877 __be16 *bestsp; /* leaf bests table */
885 xfs_dir2_block_t *block; /* block structure */ 878 xfs_dir2_data_hdr_t *hdr; /* block header */
886 xfs_dir2_block_tail_t *btp; /* block tail */ 879 xfs_dir2_block_tail_t *btp; /* block tail */
887 xfs_inode_t *dp; /* incore directory inode */ 880 xfs_inode_t *dp; /* incore directory inode */
888 xfs_dir2_data_unused_t *dup; /* unused data entry */ 881 xfs_dir2_data_unused_t *dup; /* unused data entry */
@@ -906,7 +899,7 @@ xfs_dir2_leaf_to_block(
906 tp = args->trans; 899 tp = args->trans;
907 mp = dp->i_mount; 900 mp = dp->i_mount;
908 leaf = lbp->data; 901 leaf = lbp->data;
909 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); 902 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
910 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 903 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
911 /* 904 /*
912 * If there are data blocks other than the first one, take this 905 * If there are data blocks other than the first one, take this
@@ -917,7 +910,7 @@ xfs_dir2_leaf_to_block(
917 while (dp->i_d.di_size > mp->m_dirblksize) { 910 while (dp->i_d.di_size > mp->m_dirblksize) {
918 bestsp = xfs_dir2_leaf_bests_p(ltp); 911 bestsp = xfs_dir2_leaf_bests_p(ltp);
919 if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == 912 if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
920 mp->m_dirblksize - (uint)sizeof(block->hdr)) { 913 mp->m_dirblksize - (uint)sizeof(*hdr)) {
921 if ((error = 914 if ((error =
922 xfs_dir2_leaf_trim_data(args, lbp, 915 xfs_dir2_leaf_trim_data(args, lbp,
923 (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1)))) 916 (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
@@ -935,18 +928,18 @@ xfs_dir2_leaf_to_block(
935 XFS_DATA_FORK))) { 928 XFS_DATA_FORK))) {
936 goto out; 929 goto out;
937 } 930 }
938 block = dbp->data; 931 hdr = dbp->data;
939 ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_DATA_MAGIC); 932 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
940 /* 933 /*
941 * Size of the "leaf" area in the block. 934 * Size of the "leaf" area in the block.
942 */ 935 */
943 size = (uint)sizeof(block->tail) + 936 size = (uint)sizeof(xfs_dir2_block_tail_t) +
944 (uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); 937 (uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
945 /* 938 /*
946 * Look at the last data entry. 939 * Look at the last data entry.
947 */ 940 */
948 tagp = (__be16 *)((char *)block + mp->m_dirblksize) - 1; 941 tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1;
949 dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp)); 942 dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
950 /* 943 /*
951 * If it's not free or is too short we can't do it. 944 * If it's not free or is too short we can't do it.
952 */ 945 */
@@ -958,7 +951,7 @@ xfs_dir2_leaf_to_block(
958 /* 951 /*
959 * Start converting it to block form. 952 * Start converting it to block form.
960 */ 953 */
961 block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); 954 hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
962 needlog = 1; 955 needlog = 1;
963 needscan = 0; 956 needscan = 0;
964 /* 957 /*
@@ -969,7 +962,7 @@ xfs_dir2_leaf_to_block(
969 /* 962 /*
970 * Initialize the block tail. 963 * Initialize the block tail.
971 */ 964 */
972 btp = xfs_dir2_block_tail_p(mp, block); 965 btp = xfs_dir2_block_tail_p(mp, hdr);
973 btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)); 966 btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
974 btp->stale = 0; 967 btp->stale = 0;
975 xfs_dir2_block_log_tail(tp, dbp); 968 xfs_dir2_block_log_tail(tp, dbp);
@@ -978,7 +971,8 @@ xfs_dir2_leaf_to_block(
978 */ 971 */
979 lep = xfs_dir2_block_leaf_p(btp); 972 lep = xfs_dir2_block_leaf_p(btp);
980 for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) { 973 for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
981 if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) 974 if (leaf->ents[from].address ==
975 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
982 continue; 976 continue;
983 lep[to++] = leaf->ents[from]; 977 lep[to++] = leaf->ents[from];
984 } 978 }
@@ -988,7 +982,7 @@ xfs_dir2_leaf_to_block(
988 * Scan the bestfree if we need it and log the data block header. 982 * Scan the bestfree if we need it and log the data block header.
989 */ 983 */
990 if (needscan) 984 if (needscan)
991 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); 985 xfs_dir2_data_freescan(mp, hdr, &needlog);
992 if (needlog) 986 if (needlog)
993 xfs_dir2_data_log_header(tp, dbp); 987 xfs_dir2_data_log_header(tp, dbp);
994 /* 988 /*
@@ -1002,8 +996,8 @@ xfs_dir2_leaf_to_block(
1002 /* 996 /*
1003 * Now see if the resulting block can be shrunken to shortform. 997 * Now see if the resulting block can be shrunken to shortform.
1004 */ 998 */
1005 if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) > 999 size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
1006 XFS_IFORK_DSIZE(dp)) { 1000 if (size > XFS_IFORK_DSIZE(dp)) {
1007 error = 0; 1001 error = 0;
1008 goto out; 1002 goto out;
1009 } 1003 }
@@ -1024,12 +1018,10 @@ xfs_dir2_sf_to_block(
1024 xfs_da_args_t *args) /* operation arguments */ 1018 xfs_da_args_t *args) /* operation arguments */
1025{ 1019{
1026 xfs_dir2_db_t blkno; /* dir-relative block # (0) */ 1020 xfs_dir2_db_t blkno; /* dir-relative block # (0) */
1027 xfs_dir2_block_t *block; /* block structure */ 1021 xfs_dir2_data_hdr_t *hdr; /* block header */
1028 xfs_dir2_leaf_entry_t *blp; /* block leaf entries */ 1022 xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
1029 xfs_dabuf_t *bp; /* block buffer */ 1023 xfs_dabuf_t *bp; /* block buffer */
1030 xfs_dir2_block_tail_t *btp; /* block tail pointer */ 1024 xfs_dir2_block_tail_t *btp; /* block tail pointer */
1031 char *buf; /* sf buffer */
1032 int buf_len;
1033 xfs_dir2_data_entry_t *dep; /* data entry pointer */ 1025 xfs_dir2_data_entry_t *dep; /* data entry pointer */
1034 xfs_inode_t *dp; /* incore directory inode */ 1026 xfs_inode_t *dp; /* incore directory inode */
1035 int dummy; /* trash */ 1027 int dummy; /* trash */
@@ -1043,7 +1035,8 @@ xfs_dir2_sf_to_block(
1043 int newoffset; /* offset from current entry */ 1035 int newoffset; /* offset from current entry */
1044 int offset; /* target block offset */ 1036 int offset; /* target block offset */
1045 xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */ 1037 xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */
1046 xfs_dir2_sf_t *sfp; /* shortform structure */ 1038 xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */
1039 xfs_dir2_sf_hdr_t *sfp; /* shortform header */
1047 __be16 *tagp; /* end of data entry */ 1040 __be16 *tagp; /* end of data entry */
1048 xfs_trans_t *tp; /* transaction pointer */ 1041 xfs_trans_t *tp; /* transaction pointer */
1049 struct xfs_name name; 1042 struct xfs_name name;
@@ -1061,32 +1054,30 @@ xfs_dir2_sf_to_block(
1061 ASSERT(XFS_FORCED_SHUTDOWN(mp)); 1054 ASSERT(XFS_FORCED_SHUTDOWN(mp));
1062 return XFS_ERROR(EIO); 1055 return XFS_ERROR(EIO);
1063 } 1056 }
1057
1058 oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
1059
1064 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 1060 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
1065 ASSERT(dp->i_df.if_u1.if_data != NULL); 1061 ASSERT(dp->i_df.if_u1.if_data != NULL);
1066 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 1062 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count));
1067 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); 1063
1068 /* 1064 /*
1069 * Copy the directory into the stack buffer. 1065 * Copy the directory into a temporary buffer.
1070 * Then pitch the incore inode data so we can make extents. 1066 * Then pitch the incore inode data so we can make extents.
1071 */ 1067 */
1068 sfp = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP);
1069 memcpy(sfp, oldsfp, dp->i_df.if_bytes);
1072 1070
1073 buf_len = dp->i_df.if_bytes; 1071 xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK);
1074 buf = kmem_alloc(buf_len, KM_SLEEP);
1075
1076 memcpy(buf, sfp, buf_len);
1077 xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK);
1078 dp->i_d.di_size = 0; 1072 dp->i_d.di_size = 0;
1079 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); 1073 xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1080 /* 1074
1081 * Reset pointer - old sfp is gone.
1082 */
1083 sfp = (xfs_dir2_sf_t *)buf;
1084 /* 1075 /*
1085 * Add block 0 to the inode. 1076 * Add block 0 to the inode.
1086 */ 1077 */
1087 error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); 1078 error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
1088 if (error) { 1079 if (error) {
1089 kmem_free(buf); 1080 kmem_free(sfp);
1090 return error; 1081 return error;
1091 } 1082 }
1092 /* 1083 /*
@@ -1094,21 +1085,21 @@ xfs_dir2_sf_to_block(
1094 */ 1085 */
1095 error = xfs_dir2_data_init(args, blkno, &bp); 1086 error = xfs_dir2_data_init(args, blkno, &bp);
1096 if (error) { 1087 if (error) {
1097 kmem_free(buf); 1088 kmem_free(sfp);
1098 return error; 1089 return error;
1099 } 1090 }
1100 block = bp->data; 1091 hdr = bp->data;
1101 block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC); 1092 hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
1102 /* 1093 /*
1103 * Compute size of block "tail" area. 1094 * Compute size of block "tail" area.
1104 */ 1095 */
1105 i = (uint)sizeof(*btp) + 1096 i = (uint)sizeof(*btp) +
1106 (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t); 1097 (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
1107 /* 1098 /*
1108 * The whole thing is initialized to free by the init routine. 1099 * The whole thing is initialized to free by the init routine.
1109 * Say we're using the leaf and tail area. 1100 * Say we're using the leaf and tail area.
1110 */ 1101 */
1111 dup = (xfs_dir2_data_unused_t *)block->u; 1102 dup = (xfs_dir2_data_unused_t *)(hdr + 1);
1112 needlog = needscan = 0; 1103 needlog = needscan = 0;
1113 xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog, 1104 xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog,
1114 &needscan); 1105 &needscan);
@@ -1116,50 +1107,51 @@ xfs_dir2_sf_to_block(
1116 /* 1107 /*
1117 * Fill in the tail. 1108 * Fill in the tail.
1118 */ 1109 */
1119 btp = xfs_dir2_block_tail_p(mp, block); 1110 btp = xfs_dir2_block_tail_p(mp, hdr);
1120 btp->count = cpu_to_be32(sfp->hdr.count + 2); /* ., .. */ 1111 btp->count = cpu_to_be32(sfp->count + 2); /* ., .. */
1121 btp->stale = 0; 1112 btp->stale = 0;
1122 blp = xfs_dir2_block_leaf_p(btp); 1113 blp = xfs_dir2_block_leaf_p(btp);
1123 endoffset = (uint)((char *)blp - (char *)block); 1114 endoffset = (uint)((char *)blp - (char *)hdr);
1124 /* 1115 /*
1125 * Remove the freespace, we'll manage it. 1116 * Remove the freespace, we'll manage it.
1126 */ 1117 */
1127 xfs_dir2_data_use_free(tp, bp, dup, 1118 xfs_dir2_data_use_free(tp, bp, dup,
1128 (xfs_dir2_data_aoff_t)((char *)dup - (char *)block), 1119 (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
1129 be16_to_cpu(dup->length), &needlog, &needscan); 1120 be16_to_cpu(dup->length), &needlog, &needscan);
1130 /* 1121 /*
1131 * Create entry for . 1122 * Create entry for .
1132 */ 1123 */
1133 dep = (xfs_dir2_data_entry_t *) 1124 dep = (xfs_dir2_data_entry_t *)
1134 ((char *)block + XFS_DIR2_DATA_DOT_OFFSET); 1125 ((char *)hdr + XFS_DIR2_DATA_DOT_OFFSET);
1135 dep->inumber = cpu_to_be64(dp->i_ino); 1126 dep->inumber = cpu_to_be64(dp->i_ino);
1136 dep->namelen = 1; 1127 dep->namelen = 1;
1137 dep->name[0] = '.'; 1128 dep->name[0] = '.';
1138 tagp = xfs_dir2_data_entry_tag_p(dep); 1129 tagp = xfs_dir2_data_entry_tag_p(dep);
1139 *tagp = cpu_to_be16((char *)dep - (char *)block); 1130 *tagp = cpu_to_be16((char *)dep - (char *)hdr);
1140 xfs_dir2_data_log_entry(tp, bp, dep); 1131 xfs_dir2_data_log_entry(tp, bp, dep);
1141 blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); 1132 blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
1142 blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, 1133 blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1143 (char *)dep - (char *)block)); 1134 (char *)dep - (char *)hdr));
1144 /* 1135 /*
1145 * Create entry for .. 1136 * Create entry for ..
1146 */ 1137 */
1147 dep = (xfs_dir2_data_entry_t *) 1138 dep = (xfs_dir2_data_entry_t *)
1148 ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET); 1139 ((char *)hdr + XFS_DIR2_DATA_DOTDOT_OFFSET);
1149 dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent)); 1140 dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
1150 dep->namelen = 2; 1141 dep->namelen = 2;
1151 dep->name[0] = dep->name[1] = '.'; 1142 dep->name[0] = dep->name[1] = '.';
1152 tagp = xfs_dir2_data_entry_tag_p(dep); 1143 tagp = xfs_dir2_data_entry_tag_p(dep);
1153 *tagp = cpu_to_be16((char *)dep - (char *)block); 1144 *tagp = cpu_to_be16((char *)dep - (char *)hdr);
1154 xfs_dir2_data_log_entry(tp, bp, dep); 1145 xfs_dir2_data_log_entry(tp, bp, dep);
1155 blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); 1146 blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
1156 blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, 1147 blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1157 (char *)dep - (char *)block)); 1148 (char *)dep - (char *)hdr));
1158 offset = XFS_DIR2_DATA_FIRST_OFFSET; 1149 offset = XFS_DIR2_DATA_FIRST_OFFSET;
1159 /* 1150 /*
1160 * Loop over existing entries, stuff them in. 1151 * Loop over existing entries, stuff them in.
1161 */ 1152 */
1162 if ((i = 0) == sfp->hdr.count) 1153 i = 0;
1154 if (!sfp->count)
1163 sfep = NULL; 1155 sfep = NULL;
1164 else 1156 else
1165 sfep = xfs_dir2_sf_firstentry(sfp); 1157 sfep = xfs_dir2_sf_firstentry(sfp);
@@ -1179,43 +1171,40 @@ xfs_dir2_sf_to_block(
1179 * There should be a hole here, make one. 1171 * There should be a hole here, make one.
1180 */ 1172 */
1181 if (offset < newoffset) { 1173 if (offset < newoffset) {
1182 dup = (xfs_dir2_data_unused_t *) 1174 dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
1183 ((char *)block + offset);
1184 dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 1175 dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
1185 dup->length = cpu_to_be16(newoffset - offset); 1176 dup->length = cpu_to_be16(newoffset - offset);
1186 *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16( 1177 *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
1187 ((char *)dup - (char *)block)); 1178 ((char *)dup - (char *)hdr));
1188 xfs_dir2_data_log_unused(tp, bp, dup); 1179 xfs_dir2_data_log_unused(tp, bp, dup);
1189 (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block, 1180 xfs_dir2_data_freeinsert(hdr, dup, &dummy);
1190 dup, &dummy);
1191 offset += be16_to_cpu(dup->length); 1181 offset += be16_to_cpu(dup->length);
1192 continue; 1182 continue;
1193 } 1183 }
1194 /* 1184 /*
1195 * Copy a real entry. 1185 * Copy a real entry.
1196 */ 1186 */
1197 dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset); 1187 dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset);
1198 dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, 1188 dep->inumber = cpu_to_be64(xfs_dir2_sfe_get_ino(sfp, sfep));
1199 xfs_dir2_sf_inumberp(sfep)));
1200 dep->namelen = sfep->namelen; 1189 dep->namelen = sfep->namelen;
1201 memcpy(dep->name, sfep->name, dep->namelen); 1190 memcpy(dep->name, sfep->name, dep->namelen);
1202 tagp = xfs_dir2_data_entry_tag_p(dep); 1191 tagp = xfs_dir2_data_entry_tag_p(dep);
1203 *tagp = cpu_to_be16((char *)dep - (char *)block); 1192 *tagp = cpu_to_be16((char *)dep - (char *)hdr);
1204 xfs_dir2_data_log_entry(tp, bp, dep); 1193 xfs_dir2_data_log_entry(tp, bp, dep);
1205 name.name = sfep->name; 1194 name.name = sfep->name;
1206 name.len = sfep->namelen; 1195 name.len = sfep->namelen;
1207 blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> 1196 blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
1208 hashname(&name)); 1197 hashname(&name));
1209 blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, 1198 blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
1210 (char *)dep - (char *)block)); 1199 (char *)dep - (char *)hdr));
1211 offset = (int)((char *)(tagp + 1) - (char *)block); 1200 offset = (int)((char *)(tagp + 1) - (char *)hdr);
1212 if (++i == sfp->hdr.count) 1201 if (++i == sfp->count)
1213 sfep = NULL; 1202 sfep = NULL;
1214 else 1203 else
1215 sfep = xfs_dir2_sf_nextentry(sfp, sfep); 1204 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
1216 } 1205 }
1217 /* Done with the temporary buffer */ 1206 /* Done with the temporary buffer */
1218 kmem_free(buf); 1207 kmem_free(sfp);
1219 /* 1208 /*
1220 * Sort the leaf entries by hash value. 1209 * Sort the leaf entries by hash value.
1221 */ 1210 */
diff --git a/fs/xfs/xfs_dir2_block.h b/fs/xfs/xfs_dir2_block.h
deleted file mode 100644
index 10e689676382..000000000000
--- a/fs/xfs/xfs_dir2_block.h
+++ /dev/null
@@ -1,92 +0,0 @@
1/*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DIR2_BLOCK_H__
19#define __XFS_DIR2_BLOCK_H__
20
21/*
22 * xfs_dir2_block.h
23 * Directory version 2, single block format structures
24 */
25
26struct uio;
27struct xfs_dabuf;
28struct xfs_da_args;
29struct xfs_dir2_data_hdr;
30struct xfs_dir2_leaf_entry;
31struct xfs_inode;
32struct xfs_mount;
33struct xfs_trans;
34
35/*
36 * The single block format is as follows:
37 * xfs_dir2_data_hdr_t structure
38 * xfs_dir2_data_entry_t and xfs_dir2_data_unused_t structures
39 * xfs_dir2_leaf_entry_t structures
40 * xfs_dir2_block_tail_t structure
41 */
42
43#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: for one block dirs */
44
45typedef struct xfs_dir2_block_tail {
46 __be32 count; /* count of leaf entries */
47 __be32 stale; /* count of stale lf entries */
48} xfs_dir2_block_tail_t;
49
50/*
51 * Generic single-block structure, for xfs_db.
52 */
53typedef struct xfs_dir2_block {
54 xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_BLOCK_MAGIC */
55 xfs_dir2_data_union_t u[1];
56 xfs_dir2_leaf_entry_t leaf[1];
57 xfs_dir2_block_tail_t tail;
58} xfs_dir2_block_t;
59
60/*
61 * Pointer to the leaf header embedded in a data block (1-block format)
62 */
63static inline xfs_dir2_block_tail_t *
64xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
65{
66 return (((xfs_dir2_block_tail_t *)
67 ((char *)(block) + (mp)->m_dirblksize)) - 1);
68}
69
70/*
71 * Pointer to the leaf entries embedded in a data block (1-block format)
72 */
73static inline struct xfs_dir2_leaf_entry *
74xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
75{
76 return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
77}
78
79/*
80 * Function declarations.
81 */
82extern int xfs_dir2_block_addname(struct xfs_da_args *args);
83extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
84 xfs_off_t *offset, filldir_t filldir);
85extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
86extern int xfs_dir2_block_removename(struct xfs_da_args *args);
87extern int xfs_dir2_block_replace(struct xfs_da_args *args);
88extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
89 struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
90extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
91
92#endif /* __XFS_DIR2_BLOCK_H__ */
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 921595b84f5b..5bbe2a8a023f 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -23,18 +23,18 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_mount.h" 26#include "xfs_mount.h"
28#include "xfs_da_btree.h" 27#include "xfs_da_btree.h"
29#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
30#include "xfs_dir2_sf.h"
31#include "xfs_dinode.h" 29#include "xfs_dinode.h"
32#include "xfs_inode.h" 30#include "xfs_inode.h"
33#include "xfs_dir2_data.h" 31#include "xfs_dir2_format.h"
34#include "xfs_dir2_leaf.h" 32#include "xfs_dir2_priv.h"
35#include "xfs_dir2_block.h"
36#include "xfs_error.h" 33#include "xfs_error.h"
37 34
35STATIC xfs_dir2_data_free_t *
36xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
37
38#ifdef DEBUG 38#ifdef DEBUG
39/* 39/*
40 * Check the consistency of the data block. 40 * Check the consistency of the data block.
@@ -50,7 +50,7 @@ xfs_dir2_data_check(
50 xfs_dir2_data_free_t *bf; /* bestfree table */ 50 xfs_dir2_data_free_t *bf; /* bestfree table */
51 xfs_dir2_block_tail_t *btp=NULL; /* block tail */ 51 xfs_dir2_block_tail_t *btp=NULL; /* block tail */
52 int count; /* count of entries found */ 52 int count; /* count of entries found */
53 xfs_dir2_data_t *d; /* data block pointer */ 53 xfs_dir2_data_hdr_t *hdr; /* data block header */
54 xfs_dir2_data_entry_t *dep; /* data entry */ 54 xfs_dir2_data_entry_t *dep; /* data entry */
55 xfs_dir2_data_free_t *dfp; /* bestfree entry */ 55 xfs_dir2_data_free_t *dfp; /* bestfree entry */
56 xfs_dir2_data_unused_t *dup; /* unused entry */ 56 xfs_dir2_data_unused_t *dup; /* unused entry */
@@ -66,17 +66,19 @@ xfs_dir2_data_check(
66 struct xfs_name name; 66 struct xfs_name name;
67 67
68 mp = dp->i_mount; 68 mp = dp->i_mount;
69 d = bp->data; 69 hdr = bp->data;
70 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || 70 bf = hdr->bestfree;
71 be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); 71 p = (char *)(hdr + 1);
72 bf = d->hdr.bestfree; 72
73 p = (char *)d->u; 73 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
74 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { 74 btp = xfs_dir2_block_tail_p(mp, hdr);
75 btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
76 lep = xfs_dir2_block_leaf_p(btp); 75 lep = xfs_dir2_block_leaf_p(btp);
77 endp = (char *)lep; 76 endp = (char *)lep;
78 } else 77 } else {
79 endp = (char *)d + mp->m_dirblksize; 78 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
79 endp = (char *)hdr + mp->m_dirblksize;
80 }
81
80 count = lastfree = freeseen = 0; 82 count = lastfree = freeseen = 0;
81 /* 83 /*
82 * Account for zero bestfree entries. 84 * Account for zero bestfree entries.
@@ -108,8 +110,8 @@ xfs_dir2_data_check(
108 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 110 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
109 ASSERT(lastfree == 0); 111 ASSERT(lastfree == 0);
110 ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) == 112 ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
111 (char *)dup - (char *)d); 113 (char *)dup - (char *)hdr);
112 dfp = xfs_dir2_data_freefind(d, dup); 114 dfp = xfs_dir2_data_freefind(hdr, dup);
113 if (dfp) { 115 if (dfp) {
114 i = (int)(dfp - bf); 116 i = (int)(dfp - bf);
115 ASSERT((freeseen & (1 << i)) == 0); 117 ASSERT((freeseen & (1 << i)) == 0);
@@ -132,13 +134,13 @@ xfs_dir2_data_check(
132 ASSERT(dep->namelen != 0); 134 ASSERT(dep->namelen != 0);
133 ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0); 135 ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0);
134 ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) == 136 ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
135 (char *)dep - (char *)d); 137 (char *)dep - (char *)hdr);
136 count++; 138 count++;
137 lastfree = 0; 139 lastfree = 0;
138 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { 140 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
139 addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 141 addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
140 (xfs_dir2_data_aoff_t) 142 (xfs_dir2_data_aoff_t)
141 ((char *)dep - (char *)d)); 143 ((char *)dep - (char *)hdr));
142 name.name = dep->name; 144 name.name = dep->name;
143 name.len = dep->namelen; 145 name.len = dep->namelen;
144 hash = mp->m_dirnameops->hashname(&name); 146 hash = mp->m_dirnameops->hashname(&name);
@@ -155,9 +157,10 @@ xfs_dir2_data_check(
155 * Need to have seen all the entries and all the bestfree slots. 157 * Need to have seen all the entries and all the bestfree slots.
156 */ 158 */
157 ASSERT(freeseen == 7); 159 ASSERT(freeseen == 7);
158 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { 160 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
159 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) { 161 for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
160 if (be32_to_cpu(lep[i].address) == XFS_DIR2_NULL_DATAPTR) 162 if (lep[i].address ==
163 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
161 stale++; 164 stale++;
162 if (i > 0) 165 if (i > 0)
163 ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval)); 166 ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval));
@@ -172,9 +175,9 @@ xfs_dir2_data_check(
172 * Given a data block and an unused entry from that block, 175 * Given a data block and an unused entry from that block,
173 * return the bestfree entry if any that corresponds to it. 176 * return the bestfree entry if any that corresponds to it.
174 */ 177 */
175xfs_dir2_data_free_t * 178STATIC xfs_dir2_data_free_t *
176xfs_dir2_data_freefind( 179xfs_dir2_data_freefind(
177 xfs_dir2_data_t *d, /* data block */ 180 xfs_dir2_data_hdr_t *hdr, /* data block */
178 xfs_dir2_data_unused_t *dup) /* data unused entry */ 181 xfs_dir2_data_unused_t *dup) /* data unused entry */
179{ 182{
180 xfs_dir2_data_free_t *dfp; /* bestfree entry */ 183 xfs_dir2_data_free_t *dfp; /* bestfree entry */
@@ -184,17 +187,17 @@ xfs_dir2_data_freefind(
184 int seenzero; /* saw a 0 bestfree entry */ 187 int seenzero; /* saw a 0 bestfree entry */
185#endif 188#endif
186 189
187 off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)d); 190 off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
188#if defined(DEBUG) && defined(__KERNEL__) 191#if defined(DEBUG) && defined(__KERNEL__)
189 /* 192 /*
190 * Validate some consistency in the bestfree table. 193 * Validate some consistency in the bestfree table.
191 * Check order, non-overlapping entries, and if we find the 194 * Check order, non-overlapping entries, and if we find the
192 * one we're looking for it has to be exact. 195 * one we're looking for it has to be exact.
193 */ 196 */
194 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || 197 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
195 be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); 198 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
196 for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0; 199 for (dfp = &hdr->bestfree[0], seenzero = matched = 0;
197 dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT]; 200 dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
198 dfp++) { 201 dfp++) {
199 if (!dfp->offset) { 202 if (!dfp->offset) {
200 ASSERT(!dfp->length); 203 ASSERT(!dfp->length);
@@ -210,7 +213,7 @@ xfs_dir2_data_freefind(
210 else 213 else
211 ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off); 214 ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
212 ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length)); 215 ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
213 if (dfp > &d->hdr.bestfree[0]) 216 if (dfp > &hdr->bestfree[0])
214 ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length)); 217 ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
215 } 218 }
216#endif 219#endif
@@ -219,13 +222,13 @@ xfs_dir2_data_freefind(
219 * it can't be there since they're sorted. 222 * it can't be there since they're sorted.
220 */ 223 */
221 if (be16_to_cpu(dup->length) < 224 if (be16_to_cpu(dup->length) <
222 be16_to_cpu(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length)) 225 be16_to_cpu(hdr->bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
223 return NULL; 226 return NULL;
224 /* 227 /*
225 * Look at the three bestfree entries for our guy. 228 * Look at the three bestfree entries for our guy.
226 */ 229 */
227 for (dfp = &d->hdr.bestfree[0]; 230 for (dfp = &hdr->bestfree[0];
228 dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT]; 231 dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
229 dfp++) { 232 dfp++) {
230 if (!dfp->offset) 233 if (!dfp->offset)
231 return NULL; 234 return NULL;
@@ -243,7 +246,7 @@ xfs_dir2_data_freefind(
243 */ 246 */
244xfs_dir2_data_free_t * /* entry inserted */ 247xfs_dir2_data_free_t * /* entry inserted */
245xfs_dir2_data_freeinsert( 248xfs_dir2_data_freeinsert(
246 xfs_dir2_data_t *d, /* data block pointer */ 249 xfs_dir2_data_hdr_t *hdr, /* data block pointer */
247 xfs_dir2_data_unused_t *dup, /* unused space */ 250 xfs_dir2_data_unused_t *dup, /* unused space */
248 int *loghead) /* log the data header (out) */ 251 int *loghead) /* log the data header (out) */
249{ 252{
@@ -251,12 +254,13 @@ xfs_dir2_data_freeinsert(
251 xfs_dir2_data_free_t new; /* new bestfree entry */ 254 xfs_dir2_data_free_t new; /* new bestfree entry */
252 255
253#ifdef __KERNEL__ 256#ifdef __KERNEL__
254 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || 257 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
255 be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); 258 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
256#endif 259#endif
257 dfp = d->hdr.bestfree; 260 dfp = hdr->bestfree;
258 new.length = dup->length; 261 new.length = dup->length;
259 new.offset = cpu_to_be16((char *)dup - (char *)d); 262 new.offset = cpu_to_be16((char *)dup - (char *)hdr);
263
260 /* 264 /*
261 * Insert at position 0, 1, or 2; or not at all. 265 * Insert at position 0, 1, or 2; or not at all.
262 */ 266 */
@@ -286,36 +290,36 @@ xfs_dir2_data_freeinsert(
286 */ 290 */
287STATIC void 291STATIC void
288xfs_dir2_data_freeremove( 292xfs_dir2_data_freeremove(
289 xfs_dir2_data_t *d, /* data block pointer */ 293 xfs_dir2_data_hdr_t *hdr, /* data block header */
290 xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */ 294 xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */
291 int *loghead) /* out: log data header */ 295 int *loghead) /* out: log data header */
292{ 296{
293#ifdef __KERNEL__ 297#ifdef __KERNEL__
294 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || 298 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
295 be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); 299 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
296#endif 300#endif
297 /* 301 /*
298 * It's the first entry, slide the next 2 up. 302 * It's the first entry, slide the next 2 up.
299 */ 303 */
300 if (dfp == &d->hdr.bestfree[0]) { 304 if (dfp == &hdr->bestfree[0]) {
301 d->hdr.bestfree[0] = d->hdr.bestfree[1]; 305 hdr->bestfree[0] = hdr->bestfree[1];
302 d->hdr.bestfree[1] = d->hdr.bestfree[2]; 306 hdr->bestfree[1] = hdr->bestfree[2];
303 } 307 }
304 /* 308 /*
305 * It's the second entry, slide the 3rd entry up. 309 * It's the second entry, slide the 3rd entry up.
306 */ 310 */
307 else if (dfp == &d->hdr.bestfree[1]) 311 else if (dfp == &hdr->bestfree[1])
308 d->hdr.bestfree[1] = d->hdr.bestfree[2]; 312 hdr->bestfree[1] = hdr->bestfree[2];
309 /* 313 /*
310 * Must be the last entry. 314 * Must be the last entry.
311 */ 315 */
312 else 316 else
313 ASSERT(dfp == &d->hdr.bestfree[2]); 317 ASSERT(dfp == &hdr->bestfree[2]);
314 /* 318 /*
315 * Clear the 3rd entry, must be zero now. 319 * Clear the 3rd entry, must be zero now.
316 */ 320 */
317 d->hdr.bestfree[2].length = 0; 321 hdr->bestfree[2].length = 0;
318 d->hdr.bestfree[2].offset = 0; 322 hdr->bestfree[2].offset = 0;
319 *loghead = 1; 323 *loghead = 1;
320} 324}
321 325
@@ -325,7 +329,7 @@ xfs_dir2_data_freeremove(
325void 329void
326xfs_dir2_data_freescan( 330xfs_dir2_data_freescan(
327 xfs_mount_t *mp, /* filesystem mount point */ 331 xfs_mount_t *mp, /* filesystem mount point */
328 xfs_dir2_data_t *d, /* data block pointer */ 332 xfs_dir2_data_hdr_t *hdr, /* data block header */
329 int *loghead) /* out: log data header */ 333 int *loghead) /* out: log data header */
330{ 334{
331 xfs_dir2_block_tail_t *btp; /* block tail */ 335 xfs_dir2_block_tail_t *btp; /* block tail */
@@ -335,23 +339,23 @@ xfs_dir2_data_freescan(
335 char *p; /* current entry pointer */ 339 char *p; /* current entry pointer */
336 340
337#ifdef __KERNEL__ 341#ifdef __KERNEL__
338 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || 342 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
339 be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); 343 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
340#endif 344#endif
341 /* 345 /*
342 * Start by clearing the table. 346 * Start by clearing the table.
343 */ 347 */
344 memset(d->hdr.bestfree, 0, sizeof(d->hdr.bestfree)); 348 memset(hdr->bestfree, 0, sizeof(hdr->bestfree));
345 *loghead = 1; 349 *loghead = 1;
346 /* 350 /*
347 * Set up pointers. 351 * Set up pointers.
348 */ 352 */
349 p = (char *)d->u; 353 p = (char *)(hdr + 1);
350 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) { 354 if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
351 btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); 355 btp = xfs_dir2_block_tail_p(mp, hdr);
352 endp = (char *)xfs_dir2_block_leaf_p(btp); 356 endp = (char *)xfs_dir2_block_leaf_p(btp);
353 } else 357 } else
354 endp = (char *)d + mp->m_dirblksize; 358 endp = (char *)hdr + mp->m_dirblksize;
355 /* 359 /*
356 * Loop over the block's entries. 360 * Loop over the block's entries.
357 */ 361 */
@@ -361,9 +365,9 @@ xfs_dir2_data_freescan(
361 * If it's a free entry, insert it. 365 * If it's a free entry, insert it.
362 */ 366 */
363 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { 367 if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
364 ASSERT((char *)dup - (char *)d == 368 ASSERT((char *)dup - (char *)hdr ==
365 be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); 369 be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
366 xfs_dir2_data_freeinsert(d, dup, loghead); 370 xfs_dir2_data_freeinsert(hdr, dup, loghead);
367 p += be16_to_cpu(dup->length); 371 p += be16_to_cpu(dup->length);
368 } 372 }
369 /* 373 /*
@@ -371,7 +375,7 @@ xfs_dir2_data_freescan(
371 */ 375 */
372 else { 376 else {
373 dep = (xfs_dir2_data_entry_t *)p; 377 dep = (xfs_dir2_data_entry_t *)p;
374 ASSERT((char *)dep - (char *)d == 378 ASSERT((char *)dep - (char *)hdr ==
375 be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep))); 379 be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)));
376 p += xfs_dir2_data_entsize(dep->namelen); 380 p += xfs_dir2_data_entsize(dep->namelen);
377 } 381 }
@@ -389,7 +393,7 @@ xfs_dir2_data_init(
389 xfs_dabuf_t **bpp) /* output block buffer */ 393 xfs_dabuf_t **bpp) /* output block buffer */
390{ 394{
391 xfs_dabuf_t *bp; /* block buffer */ 395 xfs_dabuf_t *bp; /* block buffer */
392 xfs_dir2_data_t *d; /* pointer to block */ 396 xfs_dir2_data_hdr_t *hdr; /* data block header */
393 xfs_inode_t *dp; /* incore directory inode */ 397 xfs_inode_t *dp; /* incore directory inode */
394 xfs_dir2_data_unused_t *dup; /* unused entry pointer */ 398 xfs_dir2_data_unused_t *dup; /* unused entry pointer */
395 int error; /* error return value */ 399 int error; /* error return value */
@@ -410,26 +414,28 @@ xfs_dir2_data_init(
410 return error; 414 return error;
411 } 415 }
412 ASSERT(bp != NULL); 416 ASSERT(bp != NULL);
417
413 /* 418 /*
414 * Initialize the header. 419 * Initialize the header.
415 */ 420 */
416 d = bp->data; 421 hdr = bp->data;
417 d->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); 422 hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
418 d->hdr.bestfree[0].offset = cpu_to_be16(sizeof(d->hdr)); 423 hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr));
419 for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) { 424 for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
420 d->hdr.bestfree[i].length = 0; 425 hdr->bestfree[i].length = 0;
421 d->hdr.bestfree[i].offset = 0; 426 hdr->bestfree[i].offset = 0;
422 } 427 }
428
423 /* 429 /*
424 * Set up an unused entry for the block's body. 430 * Set up an unused entry for the block's body.
425 */ 431 */
426 dup = &d->u[0].unused; 432 dup = (xfs_dir2_data_unused_t *)(hdr + 1);
427 dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 433 dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
428 434
429 t=mp->m_dirblksize - (uint)sizeof(d->hdr); 435 t = mp->m_dirblksize - (uint)sizeof(*hdr);
430 d->hdr.bestfree[0].length = cpu_to_be16(t); 436 hdr->bestfree[0].length = cpu_to_be16(t);
431 dup->length = cpu_to_be16(t); 437 dup->length = cpu_to_be16(t);
432 *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d); 438 *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
433 /* 439 /*
434 * Log it and return it. 440 * Log it and return it.
435 */ 441 */
@@ -448,14 +454,14 @@ xfs_dir2_data_log_entry(
448 xfs_dabuf_t *bp, /* block buffer */ 454 xfs_dabuf_t *bp, /* block buffer */
449 xfs_dir2_data_entry_t *dep) /* data entry pointer */ 455 xfs_dir2_data_entry_t *dep) /* data entry pointer */
450{ 456{
451 xfs_dir2_data_t *d; /* data block pointer */ 457 xfs_dir2_data_hdr_t *hdr = bp->data;
458
459 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
460 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
452 461
453 d = bp->data; 462 xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
454 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
455 be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
456 xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
457 (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) - 463 (uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
458 (char *)d - 1)); 464 (char *)hdr - 1));
459} 465}
460 466
461/* 467/*
@@ -466,13 +472,12 @@ xfs_dir2_data_log_header(
466 xfs_trans_t *tp, /* transaction pointer */ 472 xfs_trans_t *tp, /* transaction pointer */
467 xfs_dabuf_t *bp) /* block buffer */ 473 xfs_dabuf_t *bp) /* block buffer */
468{ 474{
469 xfs_dir2_data_t *d; /* data block pointer */ 475 xfs_dir2_data_hdr_t *hdr = bp->data;
470 476
471 d = bp->data; 477 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
472 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || 478 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
473 be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); 479
474 xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d), 480 xfs_da_log_buf(tp, bp, 0, sizeof(*hdr) - 1);
475 (uint)(sizeof(d->hdr) - 1));
476} 481}
477 482
478/* 483/*
@@ -484,23 +489,23 @@ xfs_dir2_data_log_unused(
484 xfs_dabuf_t *bp, /* block buffer */ 489 xfs_dabuf_t *bp, /* block buffer */
485 xfs_dir2_data_unused_t *dup) /* data unused pointer */ 490 xfs_dir2_data_unused_t *dup) /* data unused pointer */
486{ 491{
487 xfs_dir2_data_t *d; /* data block pointer */ 492 xfs_dir2_data_hdr_t *hdr = bp->data;
493
494 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
495 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
488 496
489 d = bp->data;
490 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
491 be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
492 /* 497 /*
493 * Log the first part of the unused entry. 498 * Log the first part of the unused entry.
494 */ 499 */
495 xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)d), 500 xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr),
496 (uint)((char *)&dup->length + sizeof(dup->length) - 501 (uint)((char *)&dup->length + sizeof(dup->length) -
497 1 - (char *)d)); 502 1 - (char *)hdr));
498 /* 503 /*
499 * Log the end (tag) of the unused entry. 504 * Log the end (tag) of the unused entry.
500 */ 505 */
501 xfs_da_log_buf(tp, bp, 506 xfs_da_log_buf(tp, bp,
502 (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d), 507 (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
503 (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d + 508 (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
504 sizeof(xfs_dir2_data_off_t) - 1)); 509 sizeof(xfs_dir2_data_off_t) - 1));
505} 510}
506 511
@@ -517,7 +522,7 @@ xfs_dir2_data_make_free(
517 int *needlogp, /* out: log header */ 522 int *needlogp, /* out: log header */
518 int *needscanp) /* out: regen bestfree */ 523 int *needscanp) /* out: regen bestfree */
519{ 524{
520 xfs_dir2_data_t *d; /* data block pointer */ 525 xfs_dir2_data_hdr_t *hdr; /* data block pointer */
521 xfs_dir2_data_free_t *dfp; /* bestfree pointer */ 526 xfs_dir2_data_free_t *dfp; /* bestfree pointer */
522 char *endptr; /* end of data area */ 527 char *endptr; /* end of data area */
523 xfs_mount_t *mp; /* filesystem mount point */ 528 xfs_mount_t *mp; /* filesystem mount point */
@@ -527,28 +532,29 @@ xfs_dir2_data_make_free(
527 xfs_dir2_data_unused_t *prevdup; /* unused entry before us */ 532 xfs_dir2_data_unused_t *prevdup; /* unused entry before us */
528 533
529 mp = tp->t_mountp; 534 mp = tp->t_mountp;
530 d = bp->data; 535 hdr = bp->data;
536
531 /* 537 /*
532 * Figure out where the end of the data area is. 538 * Figure out where the end of the data area is.
533 */ 539 */
534 if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC) 540 if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC))
535 endptr = (char *)d + mp->m_dirblksize; 541 endptr = (char *)hdr + mp->m_dirblksize;
536 else { 542 else {
537 xfs_dir2_block_tail_t *btp; /* block tail */ 543 xfs_dir2_block_tail_t *btp; /* block tail */
538 544
539 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); 545 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
540 btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d); 546 btp = xfs_dir2_block_tail_p(mp, hdr);
541 endptr = (char *)xfs_dir2_block_leaf_p(btp); 547 endptr = (char *)xfs_dir2_block_leaf_p(btp);
542 } 548 }
543 /* 549 /*
544 * If this isn't the start of the block, then back up to 550 * If this isn't the start of the block, then back up to
545 * the previous entry and see if it's free. 551 * the previous entry and see if it's free.
546 */ 552 */
547 if (offset > sizeof(d->hdr)) { 553 if (offset > sizeof(*hdr)) {
548 __be16 *tagp; /* tag just before us */ 554 __be16 *tagp; /* tag just before us */
549 555
550 tagp = (__be16 *)((char *)d + offset) - 1; 556 tagp = (__be16 *)((char *)hdr + offset) - 1;
551 prevdup = (xfs_dir2_data_unused_t *)((char *)d + be16_to_cpu(*tagp)); 557 prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
552 if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG) 558 if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
553 prevdup = NULL; 559 prevdup = NULL;
554 } else 560 } else
@@ -557,9 +563,9 @@ xfs_dir2_data_make_free(
557 * If this isn't the end of the block, see if the entry after 563 * If this isn't the end of the block, see if the entry after
558 * us is free. 564 * us is free.
559 */ 565 */
560 if ((char *)d + offset + len < endptr) { 566 if ((char *)hdr + offset + len < endptr) {
561 postdup = 567 postdup =
562 (xfs_dir2_data_unused_t *)((char *)d + offset + len); 568 (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
563 if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG) 569 if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
564 postdup = NULL; 570 postdup = NULL;
565 } else 571 } else
@@ -576,21 +582,21 @@ xfs_dir2_data_make_free(
576 /* 582 /*
577 * See if prevdup and/or postdup are in bestfree table. 583 * See if prevdup and/or postdup are in bestfree table.
578 */ 584 */
579 dfp = xfs_dir2_data_freefind(d, prevdup); 585 dfp = xfs_dir2_data_freefind(hdr, prevdup);
580 dfp2 = xfs_dir2_data_freefind(d, postdup); 586 dfp2 = xfs_dir2_data_freefind(hdr, postdup);
581 /* 587 /*
582 * We need a rescan unless there are exactly 2 free entries 588 * We need a rescan unless there are exactly 2 free entries
583 * namely our two. Then we know what's happening, otherwise 589 * namely our two. Then we know what's happening, otherwise
584 * since the third bestfree is there, there might be more 590 * since the third bestfree is there, there might be more
585 * entries. 591 * entries.
586 */ 592 */
587 needscan = (d->hdr.bestfree[2].length != 0); 593 needscan = (hdr->bestfree[2].length != 0);
588 /* 594 /*
589 * Fix up the new big freespace. 595 * Fix up the new big freespace.
590 */ 596 */
591 be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length)); 597 be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));
592 *xfs_dir2_data_unused_tag_p(prevdup) = 598 *xfs_dir2_data_unused_tag_p(prevdup) =
593 cpu_to_be16((char *)prevdup - (char *)d); 599 cpu_to_be16((char *)prevdup - (char *)hdr);
594 xfs_dir2_data_log_unused(tp, bp, prevdup); 600 xfs_dir2_data_log_unused(tp, bp, prevdup);
595 if (!needscan) { 601 if (!needscan) {
596 /* 602 /*
@@ -600,18 +606,18 @@ xfs_dir2_data_make_free(
600 * Remove entry 1 first then entry 0. 606 * Remove entry 1 first then entry 0.
601 */ 607 */
602 ASSERT(dfp && dfp2); 608 ASSERT(dfp && dfp2);
603 if (dfp == &d->hdr.bestfree[1]) { 609 if (dfp == &hdr->bestfree[1]) {
604 dfp = &d->hdr.bestfree[0]; 610 dfp = &hdr->bestfree[0];
605 ASSERT(dfp2 == dfp); 611 ASSERT(dfp2 == dfp);
606 dfp2 = &d->hdr.bestfree[1]; 612 dfp2 = &hdr->bestfree[1];
607 } 613 }
608 xfs_dir2_data_freeremove(d, dfp2, needlogp); 614 xfs_dir2_data_freeremove(hdr, dfp2, needlogp);
609 xfs_dir2_data_freeremove(d, dfp, needlogp); 615 xfs_dir2_data_freeremove(hdr, dfp, needlogp);
610 /* 616 /*
611 * Now insert the new entry. 617 * Now insert the new entry.
612 */ 618 */
613 dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp); 619 dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
614 ASSERT(dfp == &d->hdr.bestfree[0]); 620 ASSERT(dfp == &hdr->bestfree[0]);
615 ASSERT(dfp->length == prevdup->length); 621 ASSERT(dfp->length == prevdup->length);
616 ASSERT(!dfp[1].length); 622 ASSERT(!dfp[1].length);
617 ASSERT(!dfp[2].length); 623 ASSERT(!dfp[2].length);
@@ -621,10 +627,10 @@ xfs_dir2_data_make_free(
621 * The entry before us is free, merge with it. 627 * The entry before us is free, merge with it.
622 */ 628 */
623 else if (prevdup) { 629 else if (prevdup) {
624 dfp = xfs_dir2_data_freefind(d, prevdup); 630 dfp = xfs_dir2_data_freefind(hdr, prevdup);
625 be16_add_cpu(&prevdup->length, len); 631 be16_add_cpu(&prevdup->length, len);
626 *xfs_dir2_data_unused_tag_p(prevdup) = 632 *xfs_dir2_data_unused_tag_p(prevdup) =
627 cpu_to_be16((char *)prevdup - (char *)d); 633 cpu_to_be16((char *)prevdup - (char *)hdr);
628 xfs_dir2_data_log_unused(tp, bp, prevdup); 634 xfs_dir2_data_log_unused(tp, bp, prevdup);
629 /* 635 /*
630 * If the previous entry was in the table, the new entry 636 * If the previous entry was in the table, the new entry
@@ -632,27 +638,27 @@ xfs_dir2_data_make_free(
632 * the old one and add the new one. 638 * the old one and add the new one.
633 */ 639 */
634 if (dfp) { 640 if (dfp) {
635 xfs_dir2_data_freeremove(d, dfp, needlogp); 641 xfs_dir2_data_freeremove(hdr, dfp, needlogp);
636 (void)xfs_dir2_data_freeinsert(d, prevdup, needlogp); 642 xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
637 } 643 }
638 /* 644 /*
639 * Otherwise we need a scan if the new entry is big enough. 645 * Otherwise we need a scan if the new entry is big enough.
640 */ 646 */
641 else { 647 else {
642 needscan = be16_to_cpu(prevdup->length) > 648 needscan = be16_to_cpu(prevdup->length) >
643 be16_to_cpu(d->hdr.bestfree[2].length); 649 be16_to_cpu(hdr->bestfree[2].length);
644 } 650 }
645 } 651 }
646 /* 652 /*
647 * The following entry is free, merge with it. 653 * The following entry is free, merge with it.
648 */ 654 */
649 else if (postdup) { 655 else if (postdup) {
650 dfp = xfs_dir2_data_freefind(d, postdup); 656 dfp = xfs_dir2_data_freefind(hdr, postdup);
651 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); 657 newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
652 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 658 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
653 newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length)); 659 newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
654 *xfs_dir2_data_unused_tag_p(newdup) = 660 *xfs_dir2_data_unused_tag_p(newdup) =
655 cpu_to_be16((char *)newdup - (char *)d); 661 cpu_to_be16((char *)newdup - (char *)hdr);
656 xfs_dir2_data_log_unused(tp, bp, newdup); 662 xfs_dir2_data_log_unused(tp, bp, newdup);
657 /* 663 /*
658 * If the following entry was in the table, the new entry 664 * If the following entry was in the table, the new entry
@@ -660,28 +666,28 @@ xfs_dir2_data_make_free(
660 * the old one and add the new one. 666 * the old one and add the new one.
661 */ 667 */
662 if (dfp) { 668 if (dfp) {
663 xfs_dir2_data_freeremove(d, dfp, needlogp); 669 xfs_dir2_data_freeremove(hdr, dfp, needlogp);
664 (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); 670 xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
665 } 671 }
666 /* 672 /*
667 * Otherwise we need a scan if the new entry is big enough. 673 * Otherwise we need a scan if the new entry is big enough.
668 */ 674 */
669 else { 675 else {
670 needscan = be16_to_cpu(newdup->length) > 676 needscan = be16_to_cpu(newdup->length) >
671 be16_to_cpu(d->hdr.bestfree[2].length); 677 be16_to_cpu(hdr->bestfree[2].length);
672 } 678 }
673 } 679 }
674 /* 680 /*
675 * Neither neighbor is free. Make a new entry. 681 * Neither neighbor is free. Make a new entry.
676 */ 682 */
677 else { 683 else {
678 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset); 684 newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
679 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 685 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
680 newdup->length = cpu_to_be16(len); 686 newdup->length = cpu_to_be16(len);
681 *xfs_dir2_data_unused_tag_p(newdup) = 687 *xfs_dir2_data_unused_tag_p(newdup) =
682 cpu_to_be16((char *)newdup - (char *)d); 688 cpu_to_be16((char *)newdup - (char *)hdr);
683 xfs_dir2_data_log_unused(tp, bp, newdup); 689 xfs_dir2_data_log_unused(tp, bp, newdup);
684 (void)xfs_dir2_data_freeinsert(d, newdup, needlogp); 690 xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
685 } 691 }
686 *needscanp = needscan; 692 *needscanp = needscan;
687} 693}
@@ -699,7 +705,7 @@ xfs_dir2_data_use_free(
699 int *needlogp, /* out: need to log header */ 705 int *needlogp, /* out: need to log header */
700 int *needscanp) /* out: need regen bestfree */ 706 int *needscanp) /* out: need regen bestfree */
701{ 707{
702 xfs_dir2_data_t *d; /* data block */ 708 xfs_dir2_data_hdr_t *hdr; /* data block header */
703 xfs_dir2_data_free_t *dfp; /* bestfree pointer */ 709 xfs_dir2_data_free_t *dfp; /* bestfree pointer */
704 int matchback; /* matches end of freespace */ 710 int matchback; /* matches end of freespace */
705 int matchfront; /* matches start of freespace */ 711 int matchfront; /* matches start of freespace */
@@ -708,24 +714,24 @@ xfs_dir2_data_use_free(
708 xfs_dir2_data_unused_t *newdup2; /* another new unused entry */ 714 xfs_dir2_data_unused_t *newdup2; /* another new unused entry */
709 int oldlen; /* old unused entry's length */ 715 int oldlen; /* old unused entry's length */
710 716
711 d = bp->data; 717 hdr = bp->data;
712 ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC || 718 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
713 be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC); 719 hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
714 ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG); 720 ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
715 ASSERT(offset >= (char *)dup - (char *)d); 721 ASSERT(offset >= (char *)dup - (char *)hdr);
716 ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d); 722 ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
717 ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); 723 ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
718 /* 724 /*
719 * Look up the entry in the bestfree table. 725 * Look up the entry in the bestfree table.
720 */ 726 */
721 dfp = xfs_dir2_data_freefind(d, dup); 727 dfp = xfs_dir2_data_freefind(hdr, dup);
722 oldlen = be16_to_cpu(dup->length); 728 oldlen = be16_to_cpu(dup->length);
723 ASSERT(dfp || oldlen <= be16_to_cpu(d->hdr.bestfree[2].length)); 729 ASSERT(dfp || oldlen <= be16_to_cpu(hdr->bestfree[2].length));
724 /* 730 /*
725 * Check for alignment with front and back of the entry. 731 * Check for alignment with front and back of the entry.
726 */ 732 */
727 matchfront = (char *)dup - (char *)d == offset; 733 matchfront = (char *)dup - (char *)hdr == offset;
728 matchback = (char *)dup + oldlen - (char *)d == offset + len; 734 matchback = (char *)dup + oldlen - (char *)hdr == offset + len;
729 ASSERT(*needscanp == 0); 735 ASSERT(*needscanp == 0);
730 needscan = 0; 736 needscan = 0;
731 /* 737 /*
@@ -734,9 +740,9 @@ xfs_dir2_data_use_free(
734 */ 740 */
735 if (matchfront && matchback) { 741 if (matchfront && matchback) {
736 if (dfp) { 742 if (dfp) {
737 needscan = (d->hdr.bestfree[2].offset != 0); 743 needscan = (hdr->bestfree[2].offset != 0);
738 if (!needscan) 744 if (!needscan)
739 xfs_dir2_data_freeremove(d, dfp, needlogp); 745 xfs_dir2_data_freeremove(hdr, dfp, needlogp);
740 } 746 }
741 } 747 }
742 /* 748 /*
@@ -744,27 +750,27 @@ xfs_dir2_data_use_free(
744 * Make a new entry with the remaining freespace. 750 * Make a new entry with the remaining freespace.
745 */ 751 */
746 else if (matchfront) { 752 else if (matchfront) {
747 newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len); 753 newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
748 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 754 newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
749 newdup->length = cpu_to_be16(oldlen - len); 755 newdup->length = cpu_to_be16(oldlen - len);
750 *xfs_dir2_data_unused_tag_p(newdup) = 756 *xfs_dir2_data_unused_tag_p(newdup) =
751 cpu_to_be16((char *)newdup - (char *)d); 757 cpu_to_be16((char *)newdup - (char *)hdr);
752 xfs_dir2_data_log_unused(tp, bp, newdup); 758 xfs_dir2_data_log_unused(tp, bp, newdup);
753 /* 759 /*
754 * If it was in the table, remove it and add the new one. 760 * If it was in the table, remove it and add the new one.
755 */ 761 */
756 if (dfp) { 762 if (dfp) {
757 xfs_dir2_data_freeremove(d, dfp, needlogp); 763 xfs_dir2_data_freeremove(hdr, dfp, needlogp);
758 dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp); 764 dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
759 ASSERT(dfp != NULL); 765 ASSERT(dfp != NULL);
760 ASSERT(dfp->length == newdup->length); 766 ASSERT(dfp->length == newdup->length);
761 ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d); 767 ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
762 /* 768 /*
763 * If we got inserted at the last slot, 769 * If we got inserted at the last slot,
764 * that means we don't know if there was a better 770 * that means we don't know if there was a better
765 * choice for the last slot, or not. Rescan. 771 * choice for the last slot, or not. Rescan.
766 */ 772 */
767 needscan = dfp == &d->hdr.bestfree[2]; 773 needscan = dfp == &hdr->bestfree[2];
768 } 774 }
769 } 775 }
770 /* 776 /*
@@ -773,25 +779,25 @@ xfs_dir2_data_use_free(
773 */ 779 */
774 else if (matchback) { 780 else if (matchback) {
775 newdup = dup; 781 newdup = dup;
776 newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); 782 newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
777 *xfs_dir2_data_unused_tag_p(newdup) = 783 *xfs_dir2_data_unused_tag_p(newdup) =
778 cpu_to_be16((char *)newdup - (char *)d); 784 cpu_to_be16((char *)newdup - (char *)hdr);
779 xfs_dir2_data_log_unused(tp, bp, newdup); 785 xfs_dir2_data_log_unused(tp, bp, newdup);
780 /* 786 /*
781 * If it was in the table, remove it and add the new one. 787 * If it was in the table, remove it and add the new one.
782 */ 788 */
783 if (dfp) { 789 if (dfp) {
784 xfs_dir2_data_freeremove(d, dfp, needlogp); 790 xfs_dir2_data_freeremove(hdr, dfp, needlogp);
785 dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp); 791 dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
786 ASSERT(dfp != NULL); 792 ASSERT(dfp != NULL);
787 ASSERT(dfp->length == newdup->length); 793 ASSERT(dfp->length == newdup->length);
788 ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d); 794 ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
789 /* 795 /*
790 * If we got inserted at the last slot, 796 * If we got inserted at the last slot,
791 * that means we don't know if there was a better 797 * that means we don't know if there was a better
792 * choice for the last slot, or not. Rescan. 798 * choice for the last slot, or not. Rescan.
793 */ 799 */
794 needscan = dfp == &d->hdr.bestfree[2]; 800 needscan = dfp == &hdr->bestfree[2];
795 } 801 }
796 } 802 }
797 /* 803 /*
@@ -800,15 +806,15 @@ xfs_dir2_data_use_free(
800 */ 806 */
801 else { 807 else {
802 newdup = dup; 808 newdup = dup;
803 newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup); 809 newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
804 *xfs_dir2_data_unused_tag_p(newdup) = 810 *xfs_dir2_data_unused_tag_p(newdup) =
805 cpu_to_be16((char *)newdup - (char *)d); 811 cpu_to_be16((char *)newdup - (char *)hdr);
806 xfs_dir2_data_log_unused(tp, bp, newdup); 812 xfs_dir2_data_log_unused(tp, bp, newdup);
807 newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len); 813 newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
808 newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); 814 newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
809 newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length)); 815 newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
810 *xfs_dir2_data_unused_tag_p(newdup2) = 816 *xfs_dir2_data_unused_tag_p(newdup2) =
811 cpu_to_be16((char *)newdup2 - (char *)d); 817 cpu_to_be16((char *)newdup2 - (char *)hdr);
812 xfs_dir2_data_log_unused(tp, bp, newdup2); 818 xfs_dir2_data_log_unused(tp, bp, newdup2);
813 /* 819 /*
814 * If the old entry was in the table, we need to scan 820 * If the old entry was in the table, we need to scan
@@ -819,13 +825,12 @@ xfs_dir2_data_use_free(
819 * the 2 new will work. 825 * the 2 new will work.
820 */ 826 */
821 if (dfp) { 827 if (dfp) {
822 needscan = (d->hdr.bestfree[2].length != 0); 828 needscan = (hdr->bestfree[2].length != 0);
823 if (!needscan) { 829 if (!needscan) {
824 xfs_dir2_data_freeremove(d, dfp, needlogp); 830 xfs_dir2_data_freeremove(hdr, dfp, needlogp);
825 (void)xfs_dir2_data_freeinsert(d, newdup, 831 xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
826 needlogp); 832 xfs_dir2_data_freeinsert(hdr, newdup2,
827 (void)xfs_dir2_data_freeinsert(d, newdup2, 833 needlogp);
828 needlogp);
829 } 834 }
830 } 835 }
831 } 836 }
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
deleted file mode 100644
index efbc290c7fec..000000000000
--- a/fs/xfs/xfs_dir2_data.h
+++ /dev/null
@@ -1,184 +0,0 @@
1/*
2 * Copyright (c) 2000,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DIR2_DATA_H__
19#define __XFS_DIR2_DATA_H__
20
21/*
22 * Directory format 2, data block structures.
23 */
24
25struct xfs_dabuf;
26struct xfs_da_args;
27struct xfs_inode;
28struct xfs_trans;
29
30/*
31 * Constants.
32 */
33#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: for multiblock dirs */
34#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */
35#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG)
36#define XFS_DIR2_DATA_FREE_TAG 0xffff
37#define XFS_DIR2_DATA_FD_COUNT 3
38
39/*
40 * Directory address space divided into sections,
41 * spaces separated by 32GB.
42 */
43#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
44#define XFS_DIR2_DATA_SPACE 0
45#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
46#define XFS_DIR2_DATA_FIRSTDB(mp) \
47 xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
48
49/*
50 * Offsets of . and .. in data space (always block 0)
51 */
52#define XFS_DIR2_DATA_DOT_OFFSET \
53 ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t))
54#define XFS_DIR2_DATA_DOTDOT_OFFSET \
55 (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
56#define XFS_DIR2_DATA_FIRST_OFFSET \
57 (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
58
59/*
60 * Structures.
61 */
62
63/*
64 * Describe a free area in the data block.
65 * The freespace will be formatted as a xfs_dir2_data_unused_t.
66 */
67typedef struct xfs_dir2_data_free {
68 __be16 offset; /* start of freespace */
69 __be16 length; /* length of freespace */
70} xfs_dir2_data_free_t;
71
72/*
73 * Header for the data blocks.
74 * Always at the beginning of a directory-sized block.
75 * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
76 */
77typedef struct xfs_dir2_data_hdr {
78 __be32 magic; /* XFS_DIR2_DATA_MAGIC */
79 /* or XFS_DIR2_BLOCK_MAGIC */
80 xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT];
81} xfs_dir2_data_hdr_t;
82
83/*
84 * Active entry in a data block. Aligned to 8 bytes.
85 * Tag appears as the last 2 bytes.
86 */
87typedef struct xfs_dir2_data_entry {
88 __be64 inumber; /* inode number */
89 __u8 namelen; /* name length */
90 __u8 name[1]; /* name bytes, no null */
91 /* variable offset */
92 __be16 tag; /* starting offset of us */
93} xfs_dir2_data_entry_t;
94
95/*
96 * Unused entry in a data block. Aligned to 8 bytes.
97 * Tag appears as the last 2 bytes.
98 */
99typedef struct xfs_dir2_data_unused {
100 __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */
101 __be16 length; /* total free length */
102 /* variable offset */
103 __be16 tag; /* starting offset of us */
104} xfs_dir2_data_unused_t;
105
106typedef union {
107 xfs_dir2_data_entry_t entry;
108 xfs_dir2_data_unused_t unused;
109} xfs_dir2_data_union_t;
110
111/*
112 * Generic data block structure, for xfs_db.
113 */
114typedef struct xfs_dir2_data {
115 xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_DATA_MAGIC */
116 xfs_dir2_data_union_t u[1];
117} xfs_dir2_data_t;
118
119/*
120 * Macros.
121 */
122
123/*
124 * Size of a data entry.
125 */
126static inline int xfs_dir2_data_entsize(int n)
127{
128 return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
129 (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN);
130}
131
132/*
133 * Pointer to an entry's tag word.
134 */
135static inline __be16 *
136xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
137{
138 return (__be16 *)((char *)dep +
139 xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
140}
141
142/*
143 * Pointer to a freespace's tag word.
144 */
145static inline __be16 *
146xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
147{
148 return (__be16 *)((char *)dup +
149 be16_to_cpu(dup->length) - sizeof(__be16));
150}
151
152/*
153 * Function declarations.
154 */
155#ifdef DEBUG
156extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
157#else
158#define xfs_dir2_data_check(dp,bp)
159#endif
160extern xfs_dir2_data_free_t *xfs_dir2_data_freefind(xfs_dir2_data_t *d,
161 xfs_dir2_data_unused_t *dup);
162extern xfs_dir2_data_free_t *xfs_dir2_data_freeinsert(xfs_dir2_data_t *d,
163 xfs_dir2_data_unused_t *dup, int *loghead);
164extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d,
165 int *loghead);
166extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
167 struct xfs_dabuf **bpp);
168extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
169 xfs_dir2_data_entry_t *dep);
170extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
171 struct xfs_dabuf *bp);
172extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
173 xfs_dir2_data_unused_t *dup);
174extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
175 xfs_dir2_data_aoff_t offset,
176 xfs_dir2_data_aoff_t len, int *needlogp,
177 int *needscanp);
178extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
179 xfs_dir2_data_unused_t *dup,
180 xfs_dir2_data_aoff_t offset,
181 xfs_dir2_data_aoff_t len, int *needlogp,
182 int *needscanp);
183
184#endif /* __XFS_DIR2_DATA_H__ */
diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_dir2_format.h
new file mode 100644
index 000000000000..07270981f48f
--- /dev/null
+++ b/fs/xfs/xfs_dir2_format.h
@@ -0,0 +1,597 @@
1/*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DIR2_FORMAT_H__
19#define __XFS_DIR2_FORMAT_H__
20
21/*
22 * Directory version 2.
23 *
24 * There are 4 possible formats:
25 * - shortform - embedded into the inode
26 * - single block - data with embedded leaf at the end
27 * - multiple data blocks, single leaf+freeindex block
28 * - data blocks, node and leaf blocks (btree), freeindex blocks
29 *
30 * Note: many node blocks structures and constants are shared with the attr
31 * code and defined in xfs_da_btree.h.
32 */
33
34#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: single block dirs */
35#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: multiblock dirs */
36#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F: free index blocks */
37
38/*
39 * Byte offset in data block and shortform entry.
40 */
41typedef __uint16_t xfs_dir2_data_off_t;
42#define NULLDATAOFF 0xffffU
43typedef uint xfs_dir2_data_aoff_t; /* argument form */
44
45/*
46 * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
47 * Only need 16 bits, this is the byte offset into the single block form.
48 */
49typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
50
51/*
52 * Offset in data space of a data entry.
53 */
54typedef __uint32_t xfs_dir2_dataptr_t;
55#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff)
56#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0)
57
58/*
59 * Byte offset in a directory.
60 */
61typedef xfs_off_t xfs_dir2_off_t;
62
63/*
64 * Directory block number (logical dirblk in file)
65 */
66typedef __uint32_t xfs_dir2_db_t;
67
68/*
69 * Inode number stored as 8 8-bit values.
70 */
71typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
72
73/*
74 * Inode number stored as 4 8-bit values.
75 * Works a lot of the time, when all the inode numbers in a directory
76 * fit in 32 bits.
77 */
78typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
79
80typedef union {
81 xfs_dir2_ino8_t i8;
82 xfs_dir2_ino4_t i4;
83} xfs_dir2_inou_t;
84#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL)
85
86/*
87 * Directory layout when stored internal to an inode.
88 *
89 * Small directories are packed as tightly as possible so as to fit into the
90 * literal area of the inode. These "shortform" directories consist of a
91 * single xfs_dir2_sf_hdr header followed by zero or more xfs_dir2_sf_entry
92 * structures. Due the different inode number storage size and the variable
93 * length name field in the xfs_dir2_sf_entry all these structure are
94 * variable length, and the accessors in this file should be used to iterate
95 * over them.
96 */
97typedef struct xfs_dir2_sf_hdr {
98 __uint8_t count; /* count of entries */
99 __uint8_t i8count; /* count of 8-byte inode #s */
100 xfs_dir2_inou_t parent; /* parent dir inode number */
101} __arch_pack xfs_dir2_sf_hdr_t;
102
103typedef struct xfs_dir2_sf_entry {
104 __u8 namelen; /* actual name length */
105 xfs_dir2_sf_off_t offset; /* saved offset */
106 __u8 name[]; /* name, variable size */
107 /*
108 * A xfs_dir2_ino8_t or xfs_dir2_ino4_t follows here, at a
109 * variable offset after the name.
110 */
111} __arch_pack xfs_dir2_sf_entry_t;
112
113static inline int xfs_dir2_sf_hdr_size(int i8count)
114{
115 return sizeof(struct xfs_dir2_sf_hdr) -
116 (i8count == 0) *
117 (sizeof(xfs_dir2_ino8_t) - sizeof(xfs_dir2_ino4_t));
118}
119
120static inline xfs_dir2_data_aoff_t
121xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
122{
123 return get_unaligned_be16(&sfep->offset.i);
124}
125
126static inline void
127xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
128{
129 put_unaligned_be16(off, &sfep->offset.i);
130}
131
132static inline int
133xfs_dir2_sf_entsize(struct xfs_dir2_sf_hdr *hdr, int len)
134{
135 return sizeof(struct xfs_dir2_sf_entry) + /* namelen + offset */
136 len + /* name */
137 (hdr->i8count ? /* ino */
138 sizeof(xfs_dir2_ino8_t) :
139 sizeof(xfs_dir2_ino4_t));
140}
141
142static inline struct xfs_dir2_sf_entry *
143xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)
144{
145 return (struct xfs_dir2_sf_entry *)
146 ((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count));
147}
148
149static inline struct xfs_dir2_sf_entry *
150xfs_dir2_sf_nextentry(struct xfs_dir2_sf_hdr *hdr,
151 struct xfs_dir2_sf_entry *sfep)
152{
153 return (struct xfs_dir2_sf_entry *)
154 ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen));
155}
156
157
158/*
159 * Data block structures.
160 *
161 * A pure data block looks like the following drawing on disk:
162 *
163 * +-------------------------------------------------+
164 * | xfs_dir2_data_hdr_t |
165 * +-------------------------------------------------+
166 * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
167 * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
168 * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
169 * | ... |
170 * +-------------------------------------------------+
171 * | unused space |
172 * +-------------------------------------------------+
173 *
174 * As all the entries are variable size structures the accessors below should
175 * be used to iterate over them.
176 *
177 * In addition to the pure data blocks for the data and node formats,
178 * most structures are also used for the combined data/freespace "block"
179 * format below.
180 */
181
182#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */
183#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG)
184#define XFS_DIR2_DATA_FREE_TAG 0xffff
185#define XFS_DIR2_DATA_FD_COUNT 3
186
187/*
188 * Directory address space divided into sections,
189 * spaces separated by 32GB.
190 */
191#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
192#define XFS_DIR2_DATA_SPACE 0
193#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
194#define XFS_DIR2_DATA_FIRSTDB(mp) \
195 xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
196
197/*
198 * Offsets of . and .. in data space (always block 0)
199 */
200#define XFS_DIR2_DATA_DOT_OFFSET \
201 ((xfs_dir2_data_aoff_t)sizeof(struct xfs_dir2_data_hdr))
202#define XFS_DIR2_DATA_DOTDOT_OFFSET \
203 (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
204#define XFS_DIR2_DATA_FIRST_OFFSET \
205 (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
206
207/*
208 * Describe a free area in the data block.
209 *
210 * The freespace will be formatted as a xfs_dir2_data_unused_t.
211 */
212typedef struct xfs_dir2_data_free {
213 __be16 offset; /* start of freespace */
214 __be16 length; /* length of freespace */
215} xfs_dir2_data_free_t;
216
217/*
218 * Header for the data blocks.
219 *
220 * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
221 */
222typedef struct xfs_dir2_data_hdr {
223 __be32 magic; /* XFS_DIR2_DATA_MAGIC or */
224 /* XFS_DIR2_BLOCK_MAGIC */
225 xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT];
226} xfs_dir2_data_hdr_t;
227
228/*
229 * Active entry in a data block.
230 *
231 * Aligned to 8 bytes. After the variable length name field there is a
232 * 2 byte tag field, which can be accessed using xfs_dir2_data_entry_tag_p.
233 */
234typedef struct xfs_dir2_data_entry {
235 __be64 inumber; /* inode number */
236 __u8 namelen; /* name length */
237 __u8 name[]; /* name bytes, no null */
238 /* __be16 tag; */ /* starting offset of us */
239} xfs_dir2_data_entry_t;
240
241/*
242 * Unused entry in a data block.
243 *
244 * Aligned to 8 bytes. Tag appears as the last 2 bytes and must be accessed
245 * using xfs_dir2_data_unused_tag_p.
246 */
247typedef struct xfs_dir2_data_unused {
248 __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */
249 __be16 length; /* total free length */
250 /* variable offset */
251 __be16 tag; /* starting offset of us */
252} xfs_dir2_data_unused_t;
253
254/*
255 * Size of a data entry.
256 */
257static inline int xfs_dir2_data_entsize(int n)
258{
259 return (int)roundup(offsetof(struct xfs_dir2_data_entry, name[0]) + n +
260 (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN);
261}
262
263/*
264 * Pointer to an entry's tag word.
265 */
266static inline __be16 *
267xfs_dir2_data_entry_tag_p(struct xfs_dir2_data_entry *dep)
268{
269 return (__be16 *)((char *)dep +
270 xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
271}
272
273/*
274 * Pointer to a freespace's tag word.
275 */
276static inline __be16 *
277xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)
278{
279 return (__be16 *)((char *)dup +
280 be16_to_cpu(dup->length) - sizeof(__be16));
281}
282
283/*
284 * Leaf block structures.
285 *
286 * A pure leaf block looks like the following drawing on disk:
287 *
288 * +---------------------------+
289 * | xfs_dir2_leaf_hdr_t |
290 * +---------------------------+
291 * | xfs_dir2_leaf_entry_t |
292 * | xfs_dir2_leaf_entry_t |
293 * | xfs_dir2_leaf_entry_t |
294 * | xfs_dir2_leaf_entry_t |
295 * | ... |
296 * +---------------------------+
297 * | xfs_dir2_data_off_t |
298 * | xfs_dir2_data_off_t |
299 * | xfs_dir2_data_off_t |
300 * | ... |
301 * +---------------------------+
302 * | xfs_dir2_leaf_tail_t |
303 * +---------------------------+
304 *
305 * The xfs_dir2_data_off_t members (bests) and tail are at the end of the block
306 * for single-leaf (magic = XFS_DIR2_LEAF1_MAGIC) blocks only, but not present
307 * for directories with separate leaf nodes and free space blocks
308 * (magic = XFS_DIR2_LEAFN_MAGIC).
309 *
310 * As all the entries are variable size structures the accessors below should
311 * be used to iterate over them.
312 */
313
314/*
315 * Offset of the leaf/node space. First block in this space
316 * is the btree root.
317 */
318#define XFS_DIR2_LEAF_SPACE 1
319#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
320#define XFS_DIR2_LEAF_FIRSTDB(mp) \
321 xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
322
323/*
324 * Leaf block header.
325 */
326typedef struct xfs_dir2_leaf_hdr {
327 xfs_da_blkinfo_t info; /* header for da routines */
328 __be16 count; /* count of entries */
329 __be16 stale; /* count of stale entries */
330} xfs_dir2_leaf_hdr_t;
331
332/*
333 * Leaf block entry.
334 */
335typedef struct xfs_dir2_leaf_entry {
336 __be32 hashval; /* hash value of name */
337 __be32 address; /* address of data entry */
338} xfs_dir2_leaf_entry_t;
339
340/*
341 * Leaf block tail.
342 */
343typedef struct xfs_dir2_leaf_tail {
344 __be32 bestcount;
345} xfs_dir2_leaf_tail_t;
346
347/*
348 * Leaf block.
349 */
350typedef struct xfs_dir2_leaf {
351 xfs_dir2_leaf_hdr_t hdr; /* leaf header */
352 xfs_dir2_leaf_entry_t ents[]; /* entries */
353} xfs_dir2_leaf_t;
354
355/*
356 * DB blocks here are logical directory block numbers, not filesystem blocks.
357 */
358
359static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
360{
361 return (mp->m_dirblksize - (uint)sizeof(struct xfs_dir2_leaf_hdr)) /
362 (uint)sizeof(struct xfs_dir2_leaf_entry);
363}
364
365/*
366 * Get address of the bestcount field in the single-leaf block.
367 */
368static inline struct xfs_dir2_leaf_tail *
369xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp)
370{
371 return (struct xfs_dir2_leaf_tail *)
372 ((char *)lp + mp->m_dirblksize -
373 sizeof(struct xfs_dir2_leaf_tail));
374}
375
376/*
377 * Get address of the bests array in the single-leaf block.
378 */
379static inline __be16 *
380xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
381{
382 return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
383}
384
385/*
386 * Convert dataptr to byte in file space
387 */
388static inline xfs_dir2_off_t
389xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
390{
391 return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG;
392}
393
394/*
395 * Convert byte in file space to dataptr. It had better be aligned.
396 */
397static inline xfs_dir2_dataptr_t
398xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
399{
400 return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG);
401}
402
403/*
404 * Convert byte in space to (DB) block
405 */
406static inline xfs_dir2_db_t
407xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
408{
409 return (xfs_dir2_db_t)
410 (by >> (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog));
411}
412
413/*
414 * Convert dataptr to a block number
415 */
416static inline xfs_dir2_db_t
417xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
418{
419 return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
420}
421
422/*
423 * Convert byte in space to offset in a block
424 */
425static inline xfs_dir2_data_aoff_t
426xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
427{
428 return (xfs_dir2_data_aoff_t)(by &
429 ((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) - 1));
430}
431
432/*
433 * Convert dataptr to a byte offset in a block
434 */
435static inline xfs_dir2_data_aoff_t
436xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
437{
438 return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
439}
440
441/*
442 * Convert block and offset to byte in space
443 */
444static inline xfs_dir2_off_t
445xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
446 xfs_dir2_data_aoff_t o)
447{
448 return ((xfs_dir2_off_t)db <<
449 (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) + o;
450}
451
452/*
453 * Convert block (DB) to block (dablk)
454 */
455static inline xfs_dablk_t
456xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
457{
458 return (xfs_dablk_t)(db << mp->m_sb.sb_dirblklog);
459}
460
461/*
462 * Convert byte in space to (DA) block
463 */
464static inline xfs_dablk_t
465xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
466{
467 return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
468}
469
470/*
471 * Convert block and offset to dataptr
472 */
473static inline xfs_dir2_dataptr_t
474xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
475 xfs_dir2_data_aoff_t o)
476{
477 return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
478}
479
480/*
481 * Convert block (dablk) to block (DB)
482 */
483static inline xfs_dir2_db_t
484xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
485{
486 return (xfs_dir2_db_t)(da >> mp->m_sb.sb_dirblklog);
487}
488
489/*
490 * Convert block (dablk) to byte offset in space
491 */
492static inline xfs_dir2_off_t
493xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
494{
495 return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
496}
497
498/*
499 * Free space block defintions for the node format.
500 */
501
502/*
503 * Offset of the freespace index.
504 */
505#define XFS_DIR2_FREE_SPACE 2
506#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
507#define XFS_DIR2_FREE_FIRSTDB(mp) \
508 xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
509
510typedef struct xfs_dir2_free_hdr {
511 __be32 magic; /* XFS_DIR2_FREE_MAGIC */
512 __be32 firstdb; /* db of first entry */
513 __be32 nvalid; /* count of valid entries */
514 __be32 nused; /* count of used entries */
515} xfs_dir2_free_hdr_t;
516
517typedef struct xfs_dir2_free {
518 xfs_dir2_free_hdr_t hdr; /* block header */
519 __be16 bests[]; /* best free counts */
520 /* unused entries are -1 */
521} xfs_dir2_free_t;
522
523static inline int xfs_dir2_free_max_bests(struct xfs_mount *mp)
524{
525 return (mp->m_dirblksize - sizeof(struct xfs_dir2_free_hdr)) /
526 sizeof(xfs_dir2_data_off_t);
527}
528
529/*
530 * Convert data space db to the corresponding free db.
531 */
532static inline xfs_dir2_db_t
533xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
534{
535 return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir2_free_max_bests(mp);
536}
537
538/*
539 * Convert data space db to the corresponding index in a free db.
540 */
541static inline int
542xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
543{
544 return db % xfs_dir2_free_max_bests(mp);
545}
546
547/*
548 * Single block format.
549 *
550 * The single block format looks like the following drawing on disk:
551 *
552 * +-------------------------------------------------+
553 * | xfs_dir2_data_hdr_t |
554 * +-------------------------------------------------+
555 * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
556 * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t |
557 * | xfs_dir2_data_entry_t OR xfs_dir2_data_unused_t :
558 * | ... |
559 * +-------------------------------------------------+
560 * | unused space |
561 * +-------------------------------------------------+
562 * | ... |
563 * | xfs_dir2_leaf_entry_t |
564 * | xfs_dir2_leaf_entry_t |
565 * +-------------------------------------------------+
566 * | xfs_dir2_block_tail_t |
567 * +-------------------------------------------------+
568 *
569 * As all the entries are variable size structures the accessors below should
570 * be used to iterate over them.
571 */
572
573typedef struct xfs_dir2_block_tail {
574 __be32 count; /* count of leaf entries */
575 __be32 stale; /* count of stale lf entries */
576} xfs_dir2_block_tail_t;
577
578/*
579 * Pointer to the leaf header embedded in a data block (1-block format)
580 */
581static inline struct xfs_dir2_block_tail *
582xfs_dir2_block_tail_p(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr)
583{
584 return ((struct xfs_dir2_block_tail *)
585 ((char *)hdr + mp->m_dirblksize)) - 1;
586}
587
588/*
589 * Pointer to the leaf entries embedded in a data block (1-block format)
590 */
591static inline struct xfs_dir2_leaf_entry *
592xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
593{
594 return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
595}
596
597#endif /* __XFS_DIR2_FORMAT_H__ */
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index ae891223be90..ca2386d82cdf 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -24,18 +24,14 @@
24#include "xfs_trans.h" 24#include "xfs_trans.h"
25#include "xfs_sb.h" 25#include "xfs_sb.h"
26#include "xfs_ag.h" 26#include "xfs_ag.h"
27#include "xfs_dir2.h"
28#include "xfs_mount.h" 27#include "xfs_mount.h"
29#include "xfs_da_btree.h" 28#include "xfs_da_btree.h"
30#include "xfs_bmap_btree.h" 29#include "xfs_bmap_btree.h"
31#include "xfs_dir2_sf.h"
32#include "xfs_dinode.h" 30#include "xfs_dinode.h"
33#include "xfs_inode.h" 31#include "xfs_inode.h"
34#include "xfs_bmap.h" 32#include "xfs_bmap.h"
35#include "xfs_dir2_data.h" 33#include "xfs_dir2_format.h"
36#include "xfs_dir2_leaf.h" 34#include "xfs_dir2_priv.h"
37#include "xfs_dir2_block.h"
38#include "xfs_dir2_node.h"
39#include "xfs_error.h" 35#include "xfs_error.h"
40#include "xfs_trace.h" 36#include "xfs_trace.h"
41 37
@@ -64,7 +60,7 @@ xfs_dir2_block_to_leaf(
64{ 60{
65 __be16 *bestsp; /* leaf's bestsp entries */ 61 __be16 *bestsp; /* leaf's bestsp entries */
66 xfs_dablk_t blkno; /* leaf block's bno */ 62 xfs_dablk_t blkno; /* leaf block's bno */
67 xfs_dir2_block_t *block; /* block structure */ 63 xfs_dir2_data_hdr_t *hdr; /* block header */
68 xfs_dir2_leaf_entry_t *blp; /* block's leaf entries */ 64 xfs_dir2_leaf_entry_t *blp; /* block's leaf entries */
69 xfs_dir2_block_tail_t *btp; /* block's tail */ 65 xfs_dir2_block_tail_t *btp; /* block's tail */
70 xfs_inode_t *dp; /* incore directory inode */ 66 xfs_inode_t *dp; /* incore directory inode */
@@ -101,9 +97,9 @@ xfs_dir2_block_to_leaf(
101 } 97 }
102 ASSERT(lbp != NULL); 98 ASSERT(lbp != NULL);
103 leaf = lbp->data; 99 leaf = lbp->data;
104 block = dbp->data; 100 hdr = dbp->data;
105 xfs_dir2_data_check(dp, dbp); 101 xfs_dir2_data_check(dp, dbp);
106 btp = xfs_dir2_block_tail_p(mp, block); 102 btp = xfs_dir2_block_tail_p(mp, hdr);
107 blp = xfs_dir2_block_leaf_p(btp); 103 blp = xfs_dir2_block_leaf_p(btp);
108 /* 104 /*
109 * Set the counts in the leaf header. 105 * Set the counts in the leaf header.
@@ -123,23 +119,23 @@ xfs_dir2_block_to_leaf(
123 * tail be free. 119 * tail be free.
124 */ 120 */
125 xfs_dir2_data_make_free(tp, dbp, 121 xfs_dir2_data_make_free(tp, dbp,
126 (xfs_dir2_data_aoff_t)((char *)blp - (char *)block), 122 (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
127 (xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize - 123 (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize -
128 (char *)blp), 124 (char *)blp),
129 &needlog, &needscan); 125 &needlog, &needscan);
130 /* 126 /*
131 * Fix up the block header, make it a data block. 127 * Fix up the block header, make it a data block.
132 */ 128 */
133 block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); 129 hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
134 if (needscan) 130 if (needscan)
135 xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog); 131 xfs_dir2_data_freescan(mp, hdr, &needlog);
136 /* 132 /*
137 * Set up leaf tail and bests table. 133 * Set up leaf tail and bests table.
138 */ 134 */
139 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 135 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
140 ltp->bestcount = cpu_to_be32(1); 136 ltp->bestcount = cpu_to_be32(1);
141 bestsp = xfs_dir2_leaf_bests_p(ltp); 137 bestsp = xfs_dir2_leaf_bests_p(ltp);
142 bestsp[0] = block->hdr.bestfree[0].length; 138 bestsp[0] = hdr->bestfree[0].length;
143 /* 139 /*
144 * Log the data header and leaf bests table. 140 * Log the data header and leaf bests table.
145 */ 141 */
@@ -152,6 +148,131 @@ xfs_dir2_block_to_leaf(
152 return 0; 148 return 0;
153} 149}
154 150
151STATIC void
152xfs_dir2_leaf_find_stale(
153 struct xfs_dir2_leaf *leaf,
154 int index,
155 int *lowstale,
156 int *highstale)
157{
158 /*
159 * Find the first stale entry before our index, if any.
160 */
161 for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
162 if (leaf->ents[*lowstale].address ==
163 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
164 break;
165 }
166
167 /*
168 * Find the first stale entry at or after our index, if any.
169 * Stop if the result would require moving more entries than using
170 * lowstale.
171 */
172 for (*highstale = index;
173 *highstale < be16_to_cpu(leaf->hdr.count);
174 ++*highstale) {
175 if (leaf->ents[*highstale].address ==
176 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
177 break;
178 if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
179 break;
180 }
181}
182
183struct xfs_dir2_leaf_entry *
184xfs_dir2_leaf_find_entry(
185 xfs_dir2_leaf_t *leaf, /* leaf structure */
186 int index, /* leaf table position */
187 int compact, /* need to compact leaves */
188 int lowstale, /* index of prev stale leaf */
189 int highstale, /* index of next stale leaf */
190 int *lfloglow, /* low leaf logging index */
191 int *lfloghigh) /* high leaf logging index */
192{
193 if (!leaf->hdr.stale) {
194 xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */
195
196 /*
197 * Now we need to make room to insert the leaf entry.
198 *
199 * If there are no stale entries, just insert a hole at index.
200 */
201 lep = &leaf->ents[index];
202 if (index < be16_to_cpu(leaf->hdr.count))
203 memmove(lep + 1, lep,
204 (be16_to_cpu(leaf->hdr.count) - index) *
205 sizeof(*lep));
206
207 /*
208 * Record low and high logging indices for the leaf.
209 */
210 *lfloglow = index;
211 *lfloghigh = be16_to_cpu(leaf->hdr.count);
212 be16_add_cpu(&leaf->hdr.count, 1);
213 return lep;
214 }
215
216 /*
217 * There are stale entries.
218 *
219 * We will use one of them for the new entry. It's probably not at
220 * the right location, so we'll have to shift some up or down first.
221 *
222 * If we didn't compact before, we need to find the nearest stale
223 * entries before and after our insertion point.
224 */
225 if (compact == 0)
226 xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
227
228 /*
229 * If the low one is better, use it.
230 */
231 if (lowstale >= 0 &&
232 (highstale == be16_to_cpu(leaf->hdr.count) ||
233 index - lowstale - 1 < highstale - index)) {
234 ASSERT(index - lowstale - 1 >= 0);
235 ASSERT(leaf->ents[lowstale].address ==
236 cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
237
238 /*
239 * Copy entries up to cover the stale entry and make room
240 * for the new entry.
241 */
242 if (index - lowstale - 1 > 0) {
243 memmove(&leaf->ents[lowstale],
244 &leaf->ents[lowstale + 1],
245 (index - lowstale - 1) *
246 sizeof(xfs_dir2_leaf_entry_t));
247 }
248 *lfloglow = MIN(lowstale, *lfloglow);
249 *lfloghigh = MAX(index - 1, *lfloghigh);
250 be16_add_cpu(&leaf->hdr.stale, -1);
251 return &leaf->ents[index - 1];
252 }
253
254 /*
255 * The high one is better, so use that one.
256 */
257 ASSERT(highstale - index >= 0);
258 ASSERT(leaf->ents[highstale].address ==
259 cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
260
261 /*
262 * Copy entries down to cover the stale entry and make room for the
263 * new entry.
264 */
265 if (highstale - index > 0) {
266 memmove(&leaf->ents[index + 1],
267 &leaf->ents[index],
268 (highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
269 }
270 *lfloglow = MIN(index, *lfloglow);
271 *lfloghigh = MAX(highstale, *lfloghigh);
272 be16_add_cpu(&leaf->hdr.stale, -1);
273 return &leaf->ents[index];
274}
275
155/* 276/*
156 * Add an entry to a leaf form directory. 277 * Add an entry to a leaf form directory.
157 */ 278 */
@@ -161,7 +282,7 @@ xfs_dir2_leaf_addname(
161{ 282{
162 __be16 *bestsp; /* freespace table in leaf */ 283 __be16 *bestsp; /* freespace table in leaf */
163 int compact; /* need to compact leaves */ 284 int compact; /* need to compact leaves */
164 xfs_dir2_data_t *data; /* data block structure */ 285 xfs_dir2_data_hdr_t *hdr; /* data block header */
165 xfs_dabuf_t *dbp; /* data block buffer */ 286 xfs_dabuf_t *dbp; /* data block buffer */
166 xfs_dir2_data_entry_t *dep; /* data block entry */ 287 xfs_dir2_data_entry_t *dep; /* data block entry */
167 xfs_inode_t *dp; /* incore directory inode */ 288 xfs_inode_t *dp; /* incore directory inode */
@@ -225,7 +346,7 @@ xfs_dir2_leaf_addname(
225 continue; 346 continue;
226 i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); 347 i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
227 ASSERT(i < be32_to_cpu(ltp->bestcount)); 348 ASSERT(i < be32_to_cpu(ltp->bestcount));
228 ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF); 349 ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
229 if (be16_to_cpu(bestsp[i]) >= length) { 350 if (be16_to_cpu(bestsp[i]) >= length) {
230 use_block = i; 351 use_block = i;
231 break; 352 break;
@@ -239,7 +360,8 @@ xfs_dir2_leaf_addname(
239 /* 360 /*
240 * Remember a block we see that's missing. 361 * Remember a block we see that's missing.
241 */ 362 */
242 if (be16_to_cpu(bestsp[i]) == NULLDATAOFF && use_block == -1) 363 if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
364 use_block == -1)
243 use_block = i; 365 use_block = i;
244 else if (be16_to_cpu(bestsp[i]) >= length) { 366 else if (be16_to_cpu(bestsp[i]) >= length) {
245 use_block = i; 367 use_block = i;
@@ -250,14 +372,17 @@ xfs_dir2_leaf_addname(
250 /* 372 /*
251 * How many bytes do we need in the leaf block? 373 * How many bytes do we need in the leaf block?
252 */ 374 */
253 needbytes = 375 needbytes = 0;
254 (leaf->hdr.stale ? 0 : (uint)sizeof(leaf->ents[0])) + 376 if (!leaf->hdr.stale)
255 (use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0])); 377 needbytes += sizeof(xfs_dir2_leaf_entry_t);
378 if (use_block == -1)
379 needbytes += sizeof(xfs_dir2_data_off_t);
380
256 /* 381 /*
257 * Now kill use_block if it refers to a missing block, so we 382 * Now kill use_block if it refers to a missing block, so we
258 * can use it as an indication of allocation needed. 383 * can use it as an indication of allocation needed.
259 */ 384 */
260 if (use_block != -1 && be16_to_cpu(bestsp[use_block]) == NULLDATAOFF) 385 if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
261 use_block = -1; 386 use_block = -1;
262 /* 387 /*
263 * If we don't have enough free bytes but we can make enough 388 * If we don't have enough free bytes but we can make enough
@@ -369,8 +494,8 @@ xfs_dir2_leaf_addname(
369 */ 494 */
370 else 495 else
371 xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); 496 xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
372 data = dbp->data; 497 hdr = dbp->data;
373 bestsp[use_block] = data->hdr.bestfree[0].length; 498 bestsp[use_block] = hdr->bestfree[0].length;
374 grown = 1; 499 grown = 1;
375 } 500 }
376 /* 501 /*
@@ -384,7 +509,7 @@ xfs_dir2_leaf_addname(
384 xfs_da_brelse(tp, lbp); 509 xfs_da_brelse(tp, lbp);
385 return error; 510 return error;
386 } 511 }
387 data = dbp->data; 512 hdr = dbp->data;
388 grown = 0; 513 grown = 0;
389 } 514 }
390 xfs_dir2_data_check(dp, dbp); 515 xfs_dir2_data_check(dp, dbp);
@@ -392,14 +517,14 @@ xfs_dir2_leaf_addname(
392 * Point to the biggest freespace in our data block. 517 * Point to the biggest freespace in our data block.
393 */ 518 */
394 dup = (xfs_dir2_data_unused_t *) 519 dup = (xfs_dir2_data_unused_t *)
395 ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset)); 520 ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
396 ASSERT(be16_to_cpu(dup->length) >= length); 521 ASSERT(be16_to_cpu(dup->length) >= length);
397 needscan = needlog = 0; 522 needscan = needlog = 0;
398 /* 523 /*
399 * Mark the initial part of our freespace in use for the new entry. 524 * Mark the initial part of our freespace in use for the new entry.
400 */ 525 */
401 xfs_dir2_data_use_free(tp, dbp, dup, 526 xfs_dir2_data_use_free(tp, dbp, dup,
402 (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length, 527 (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
403 &needlog, &needscan); 528 &needlog, &needscan);
404 /* 529 /*
405 * Initialize our new entry (at last). 530 * Initialize our new entry (at last).
@@ -409,12 +534,12 @@ xfs_dir2_leaf_addname(
409 dep->namelen = args->namelen; 534 dep->namelen = args->namelen;
410 memcpy(dep->name, args->name, dep->namelen); 535 memcpy(dep->name, args->name, dep->namelen);
411 tagp = xfs_dir2_data_entry_tag_p(dep); 536 tagp = xfs_dir2_data_entry_tag_p(dep);
412 *tagp = cpu_to_be16((char *)dep - (char *)data); 537 *tagp = cpu_to_be16((char *)dep - (char *)hdr);
413 /* 538 /*
414 * Need to scan fix up the bestfree table. 539 * Need to scan fix up the bestfree table.
415 */ 540 */
416 if (needscan) 541 if (needscan)
417 xfs_dir2_data_freescan(mp, data, &needlog); 542 xfs_dir2_data_freescan(mp, hdr, &needlog);
418 /* 543 /*
419 * Need to log the data block's header. 544 * Need to log the data block's header.
420 */ 545 */
@@ -425,107 +550,15 @@ xfs_dir2_leaf_addname(
425 * If the bests table needs to be changed, do it. 550 * If the bests table needs to be changed, do it.
426 * Log the change unless we've already done that. 551 * Log the change unless we've already done that.
427 */ 552 */
428 if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(data->hdr.bestfree[0].length)) { 553 if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) {
429 bestsp[use_block] = data->hdr.bestfree[0].length; 554 bestsp[use_block] = hdr->bestfree[0].length;
430 if (!grown) 555 if (!grown)
431 xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block); 556 xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
432 } 557 }
433 /* 558
434 * Now we need to make room to insert the leaf entry. 559 lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
435 * If there are no stale entries, we just insert a hole at index. 560 highstale, &lfloglow, &lfloghigh);
436 */ 561
437 if (!leaf->hdr.stale) {
438 /*
439 * lep is still good as the index leaf entry.
440 */
441 if (index < be16_to_cpu(leaf->hdr.count))
442 memmove(lep + 1, lep,
443 (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
444 /*
445 * Record low and high logging indices for the leaf.
446 */
447 lfloglow = index;
448 lfloghigh = be16_to_cpu(leaf->hdr.count);
449 be16_add_cpu(&leaf->hdr.count, 1);
450 }
451 /*
452 * There are stale entries.
453 * We will use one of them for the new entry.
454 * It's probably not at the right location, so we'll have to
455 * shift some up or down first.
456 */
457 else {
458 /*
459 * If we didn't compact before, we need to find the nearest
460 * stale entries before and after our insertion point.
461 */
462 if (compact == 0) {
463 /*
464 * Find the first stale entry before the insertion
465 * point, if any.
466 */
467 for (lowstale = index - 1;
468 lowstale >= 0 &&
469 be32_to_cpu(leaf->ents[lowstale].address) !=
470 XFS_DIR2_NULL_DATAPTR;
471 lowstale--)
472 continue;
473 /*
474 * Find the next stale entry at or after the insertion
475 * point, if any. Stop if we go so far that the
476 * lowstale entry would be better.
477 */
478 for (highstale = index;
479 highstale < be16_to_cpu(leaf->hdr.count) &&
480 be32_to_cpu(leaf->ents[highstale].address) !=
481 XFS_DIR2_NULL_DATAPTR &&
482 (lowstale < 0 ||
483 index - lowstale - 1 >= highstale - index);
484 highstale++)
485 continue;
486 }
487 /*
488 * If the low one is better, use it.
489 */
490 if (lowstale >= 0 &&
491 (highstale == be16_to_cpu(leaf->hdr.count) ||
492 index - lowstale - 1 < highstale - index)) {
493 ASSERT(index - lowstale - 1 >= 0);
494 ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
495 XFS_DIR2_NULL_DATAPTR);
496 /*
497 * Copy entries up to cover the stale entry
498 * and make room for the new entry.
499 */
500 if (index - lowstale - 1 > 0)
501 memmove(&leaf->ents[lowstale],
502 &leaf->ents[lowstale + 1],
503 (index - lowstale - 1) * sizeof(*lep));
504 lep = &leaf->ents[index - 1];
505 lfloglow = MIN(lowstale, lfloglow);
506 lfloghigh = MAX(index - 1, lfloghigh);
507 }
508 /*
509 * The high one is better, so use that one.
510 */
511 else {
512 ASSERT(highstale - index >= 0);
513 ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
514 XFS_DIR2_NULL_DATAPTR);
515 /*
516 * Copy entries down to cover the stale entry
517 * and make room for the new entry.
518 */
519 if (highstale - index > 0)
520 memmove(&leaf->ents[index + 1],
521 &leaf->ents[index],
522 (highstale - index) * sizeof(*lep));
523 lep = &leaf->ents[index];
524 lfloglow = MIN(index, lfloglow);
525 lfloghigh = MAX(highstale, lfloghigh);
526 }
527 be16_add_cpu(&leaf->hdr.stale, -1);
528 }
529 /* 562 /*
530 * Fill in the new leaf entry. 563 * Fill in the new leaf entry.
531 */ 564 */
@@ -562,7 +595,7 @@ xfs_dir2_leaf_check(
562 595
563 leaf = bp->data; 596 leaf = bp->data;
564 mp = dp->i_mount; 597 mp = dp->i_mount;
565 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); 598 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
566 /* 599 /*
567 * This value is not restrictive enough. 600 * This value is not restrictive enough.
568 * Should factor in the size of the bests table as well. 601 * Should factor in the size of the bests table as well.
@@ -582,7 +615,7 @@ xfs_dir2_leaf_check(
582 if (i + 1 < be16_to_cpu(leaf->hdr.count)) 615 if (i + 1 < be16_to_cpu(leaf->hdr.count))
583 ASSERT(be32_to_cpu(leaf->ents[i].hashval) <= 616 ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
584 be32_to_cpu(leaf->ents[i + 1].hashval)); 617 be32_to_cpu(leaf->ents[i + 1].hashval));
585 if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR) 618 if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
586 stale++; 619 stale++;
587 } 620 }
588 ASSERT(be16_to_cpu(leaf->hdr.stale) == stale); 621 ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -611,7 +644,8 @@ xfs_dir2_leaf_compact(
611 * Compress out the stale entries in place. 644 * Compress out the stale entries in place.
612 */ 645 */
613 for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) { 646 for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
614 if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) 647 if (leaf->ents[from].address ==
648 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
615 continue; 649 continue;
616 /* 650 /*
617 * Only actually copy the entries that are different. 651 * Only actually copy the entries that are different.
@@ -663,24 +697,9 @@ xfs_dir2_leaf_compact_x1(
663 leaf = bp->data; 697 leaf = bp->data;
664 ASSERT(be16_to_cpu(leaf->hdr.stale) > 1); 698 ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
665 index = *indexp; 699 index = *indexp;
666 /* 700
667 * Find the first stale entry before our index, if any. 701 xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
668 */ 702
669 for (lowstale = index - 1;
670 lowstale >= 0 &&
671 be32_to_cpu(leaf->ents[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
672 lowstale--)
673 continue;
674 /*
675 * Find the first stale entry at or after our index, if any.
676 * Stop if the answer would be worse than lowstale.
677 */
678 for (highstale = index;
679 highstale < be16_to_cpu(leaf->hdr.count) &&
680 be32_to_cpu(leaf->ents[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
681 (lowstale < 0 || index - lowstale > highstale - index);
682 highstale++)
683 continue;
684 /* 703 /*
685 * Pick the better of lowstale and highstale. 704 * Pick the better of lowstale and highstale.
686 */ 705 */
@@ -701,7 +720,8 @@ xfs_dir2_leaf_compact_x1(
701 if (index == from) 720 if (index == from)
702 newindex = to; 721 newindex = to;
703 if (from != keepstale && 722 if (from != keepstale &&
704 be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) { 723 leaf->ents[from].address ==
724 cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
705 if (from == to) 725 if (from == to)
706 *lowlogp = to; 726 *lowlogp = to;
707 continue; 727 continue;
@@ -760,7 +780,7 @@ xfs_dir2_leaf_getdents(
760 int byteoff; /* offset in current block */ 780 int byteoff; /* offset in current block */
761 xfs_dir2_db_t curdb; /* db for current block */ 781 xfs_dir2_db_t curdb; /* db for current block */
762 xfs_dir2_off_t curoff; /* current overall offset */ 782 xfs_dir2_off_t curoff; /* current overall offset */
763 xfs_dir2_data_t *data; /* data block structure */ 783 xfs_dir2_data_hdr_t *hdr; /* data block header */
764 xfs_dir2_data_entry_t *dep; /* data entry */ 784 xfs_dir2_data_entry_t *dep; /* data entry */
765 xfs_dir2_data_unused_t *dup; /* unused entry */ 785 xfs_dir2_data_unused_t *dup; /* unused entry */
766 int error = 0; /* error return value */ 786 int error = 0; /* error return value */
@@ -1018,23 +1038,23 @@ xfs_dir2_leaf_getdents(
1018 else if (curoff > newoff) 1038 else if (curoff > newoff)
1019 ASSERT(xfs_dir2_byte_to_db(mp, curoff) == 1039 ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
1020 curdb); 1040 curdb);
1021 data = bp->data; 1041 hdr = bp->data;
1022 xfs_dir2_data_check(dp, bp); 1042 xfs_dir2_data_check(dp, bp);
1023 /* 1043 /*
1024 * Find our position in the block. 1044 * Find our position in the block.
1025 */ 1045 */
1026 ptr = (char *)&data->u; 1046 ptr = (char *)(hdr + 1);
1027 byteoff = xfs_dir2_byte_to_off(mp, curoff); 1047 byteoff = xfs_dir2_byte_to_off(mp, curoff);
1028 /* 1048 /*
1029 * Skip past the header. 1049 * Skip past the header.
1030 */ 1050 */
1031 if (byteoff == 0) 1051 if (byteoff == 0)
1032 curoff += (uint)sizeof(data->hdr); 1052 curoff += (uint)sizeof(*hdr);
1033 /* 1053 /*
1034 * Skip past entries until we reach our offset. 1054 * Skip past entries until we reach our offset.
1035 */ 1055 */
1036 else { 1056 else {
1037 while ((char *)ptr - (char *)data < byteoff) { 1057 while ((char *)ptr - (char *)hdr < byteoff) {
1038 dup = (xfs_dir2_data_unused_t *)ptr; 1058 dup = (xfs_dir2_data_unused_t *)ptr;
1039 1059
1040 if (be16_to_cpu(dup->freetag) 1060 if (be16_to_cpu(dup->freetag)
@@ -1055,8 +1075,8 @@ xfs_dir2_leaf_getdents(
1055 curoff = 1075 curoff =
1056 xfs_dir2_db_off_to_byte(mp, 1076 xfs_dir2_db_off_to_byte(mp,
1057 xfs_dir2_byte_to_db(mp, curoff), 1077 xfs_dir2_byte_to_db(mp, curoff),
1058 (char *)ptr - (char *)data); 1078 (char *)ptr - (char *)hdr);
1059 if (ptr >= (char *)data + mp->m_dirblksize) { 1079 if (ptr >= (char *)hdr + mp->m_dirblksize) {
1060 continue; 1080 continue;
1061 } 1081 }
1062 } 1082 }
@@ -1179,7 +1199,7 @@ xfs_dir2_leaf_log_bests(
1179 xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */ 1199 xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
1180 1200
1181 leaf = bp->data; 1201 leaf = bp->data;
1182 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); 1202 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
1183 ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf); 1203 ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
1184 firstb = xfs_dir2_leaf_bests_p(ltp) + first; 1204 firstb = xfs_dir2_leaf_bests_p(ltp) + first;
1185 lastb = xfs_dir2_leaf_bests_p(ltp) + last; 1205 lastb = xfs_dir2_leaf_bests_p(ltp) + last;
@@ -1202,8 +1222,8 @@ xfs_dir2_leaf_log_ents(
1202 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1222 xfs_dir2_leaf_t *leaf; /* leaf structure */
1203 1223
1204 leaf = bp->data; 1224 leaf = bp->data;
1205 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC || 1225 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1206 be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 1226 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1207 firstlep = &leaf->ents[first]; 1227 firstlep = &leaf->ents[first];
1208 lastlep = &leaf->ents[last]; 1228 lastlep = &leaf->ents[last];
1209 xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), 1229 xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
@@ -1221,8 +1241,8 @@ xfs_dir2_leaf_log_header(
1221 xfs_dir2_leaf_t *leaf; /* leaf structure */ 1241 xfs_dir2_leaf_t *leaf; /* leaf structure */
1222 1242
1223 leaf = bp->data; 1243 leaf = bp->data;
1224 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC || 1244 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
1225 be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 1245 leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1226 xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), 1246 xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
1227 (uint)(sizeof(leaf->hdr) - 1)); 1247 (uint)(sizeof(leaf->hdr) - 1));
1228} 1248}
@@ -1241,7 +1261,7 @@ xfs_dir2_leaf_log_tail(
1241 1261
1242 mp = tp->t_mountp; 1262 mp = tp->t_mountp;
1243 leaf = bp->data; 1263 leaf = bp->data;
1244 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC); 1264 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
1245 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 1265 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1246 xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), 1266 xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
1247 (uint)(mp->m_dirblksize - 1)); 1267 (uint)(mp->m_dirblksize - 1));
@@ -1437,7 +1457,7 @@ xfs_dir2_leaf_removename(
1437 xfs_da_args_t *args) /* operation arguments */ 1457 xfs_da_args_t *args) /* operation arguments */
1438{ 1458{
1439 __be16 *bestsp; /* leaf block best freespace */ 1459 __be16 *bestsp; /* leaf block best freespace */
1440 xfs_dir2_data_t *data; /* data block structure */ 1460 xfs_dir2_data_hdr_t *hdr; /* data block header */
1441 xfs_dir2_db_t db; /* data block number */ 1461 xfs_dir2_db_t db; /* data block number */
1442 xfs_dabuf_t *dbp; /* data block buffer */ 1462 xfs_dabuf_t *dbp; /* data block buffer */
1443 xfs_dir2_data_entry_t *dep; /* data entry structure */ 1463 xfs_dir2_data_entry_t *dep; /* data entry structure */
@@ -1467,7 +1487,7 @@ xfs_dir2_leaf_removename(
1467 tp = args->trans; 1487 tp = args->trans;
1468 mp = dp->i_mount; 1488 mp = dp->i_mount;
1469 leaf = lbp->data; 1489 leaf = lbp->data;
1470 data = dbp->data; 1490 hdr = dbp->data;
1471 xfs_dir2_data_check(dp, dbp); 1491 xfs_dir2_data_check(dp, dbp);
1472 /* 1492 /*
1473 * Point to the leaf entry, use that to point to the data entry. 1493 * Point to the leaf entry, use that to point to the data entry.
@@ -1475,9 +1495,9 @@ xfs_dir2_leaf_removename(
1475 lep = &leaf->ents[index]; 1495 lep = &leaf->ents[index];
1476 db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); 1496 db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
1477 dep = (xfs_dir2_data_entry_t *) 1497 dep = (xfs_dir2_data_entry_t *)
1478 ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); 1498 ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
1479 needscan = needlog = 0; 1499 needscan = needlog = 0;
1480 oldbest = be16_to_cpu(data->hdr.bestfree[0].length); 1500 oldbest = be16_to_cpu(hdr->bestfree[0].length);
1481 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 1501 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1482 bestsp = xfs_dir2_leaf_bests_p(ltp); 1502 bestsp = xfs_dir2_leaf_bests_p(ltp);
1483 ASSERT(be16_to_cpu(bestsp[db]) == oldbest); 1503 ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
@@ -1485,7 +1505,7 @@ xfs_dir2_leaf_removename(
1485 * Mark the former data entry unused. 1505 * Mark the former data entry unused.
1486 */ 1506 */
1487 xfs_dir2_data_make_free(tp, dbp, 1507 xfs_dir2_data_make_free(tp, dbp,
1488 (xfs_dir2_data_aoff_t)((char *)dep - (char *)data), 1508 (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
1489 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); 1509 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
1490 /* 1510 /*
1491 * We just mark the leaf entry stale by putting a null in it. 1511 * We just mark the leaf entry stale by putting a null in it.
@@ -1499,23 +1519,23 @@ xfs_dir2_leaf_removename(
1499 * log the data block header if necessary. 1519 * log the data block header if necessary.
1500 */ 1520 */
1501 if (needscan) 1521 if (needscan)
1502 xfs_dir2_data_freescan(mp, data, &needlog); 1522 xfs_dir2_data_freescan(mp, hdr, &needlog);
1503 if (needlog) 1523 if (needlog)
1504 xfs_dir2_data_log_header(tp, dbp); 1524 xfs_dir2_data_log_header(tp, dbp);
1505 /* 1525 /*
1506 * If the longest freespace in the data block has changed, 1526 * If the longest freespace in the data block has changed,
1507 * put the new value in the bests table and log that. 1527 * put the new value in the bests table and log that.
1508 */ 1528 */
1509 if (be16_to_cpu(data->hdr.bestfree[0].length) != oldbest) { 1529 if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) {
1510 bestsp[db] = data->hdr.bestfree[0].length; 1530 bestsp[db] = hdr->bestfree[0].length;
1511 xfs_dir2_leaf_log_bests(tp, lbp, db, db); 1531 xfs_dir2_leaf_log_bests(tp, lbp, db, db);
1512 } 1532 }
1513 xfs_dir2_data_check(dp, dbp); 1533 xfs_dir2_data_check(dp, dbp);
1514 /* 1534 /*
1515 * If the data block is now empty then get rid of the data block. 1535 * If the data block is now empty then get rid of the data block.
1516 */ 1536 */
1517 if (be16_to_cpu(data->hdr.bestfree[0].length) == 1537 if (be16_to_cpu(hdr->bestfree[0].length) ==
1518 mp->m_dirblksize - (uint)sizeof(data->hdr)) { 1538 mp->m_dirblksize - (uint)sizeof(*hdr)) {
1519 ASSERT(db != mp->m_dirdatablk); 1539 ASSERT(db != mp->m_dirdatablk);
1520 if ((error = xfs_dir2_shrink_inode(args, db, dbp))) { 1540 if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
1521 /* 1541 /*
@@ -1542,7 +1562,7 @@ xfs_dir2_leaf_removename(
1542 * Look for the last active entry (i). 1562 * Look for the last active entry (i).
1543 */ 1563 */
1544 for (i = db - 1; i > 0; i--) { 1564 for (i = db - 1; i > 0; i--) {
1545 if (be16_to_cpu(bestsp[i]) != NULLDATAOFF) 1565 if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
1546 break; 1566 break;
1547 } 1567 }
1548 /* 1568 /*
@@ -1686,9 +1706,6 @@ xfs_dir2_leaf_trim_data(
1686 xfs_dir2_db_t db) /* data block number */ 1706 xfs_dir2_db_t db) /* data block number */
1687{ 1707{
1688 __be16 *bestsp; /* leaf bests table */ 1708 __be16 *bestsp; /* leaf bests table */
1689#ifdef DEBUG
1690 xfs_dir2_data_t *data; /* data block structure */
1691#endif
1692 xfs_dabuf_t *dbp; /* data block buffer */ 1709 xfs_dabuf_t *dbp; /* data block buffer */
1693 xfs_inode_t *dp; /* incore directory inode */ 1710 xfs_inode_t *dp; /* incore directory inode */
1694 int error; /* error return value */ 1711 int error; /* error return value */
@@ -1707,20 +1724,21 @@ xfs_dir2_leaf_trim_data(
1707 XFS_DATA_FORK))) { 1724 XFS_DATA_FORK))) {
1708 return error; 1725 return error;
1709 } 1726 }
1710#ifdef DEBUG
1711 data = dbp->data;
1712 ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
1713#endif
1714 /* this seems to be an error
1715 * data is only valid if DEBUG is defined?
1716 * RMC 09/08/1999
1717 */
1718 1727
1719 leaf = lbp->data; 1728 leaf = lbp->data;
1720 ltp = xfs_dir2_leaf_tail_p(mp, leaf); 1729 ltp = xfs_dir2_leaf_tail_p(mp, leaf);
1721 ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) == 1730
1722 mp->m_dirblksize - (uint)sizeof(data->hdr)); 1731#ifdef DEBUG
1732{
1733 struct xfs_dir2_data_hdr *hdr = dbp->data;
1734
1735 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
1736 ASSERT(be16_to_cpu(hdr->bestfree[0].length) ==
1737 mp->m_dirblksize - (uint)sizeof(*hdr));
1723 ASSERT(db == be32_to_cpu(ltp->bestcount) - 1); 1738 ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
1739}
1740#endif
1741
1724 /* 1742 /*
1725 * Get rid of the data block. 1743 * Get rid of the data block.
1726 */ 1744 */
@@ -1740,6 +1758,20 @@ xfs_dir2_leaf_trim_data(
1740 return 0; 1758 return 0;
1741} 1759}
1742 1760
1761static inline size_t
1762xfs_dir2_leaf_size(
1763 struct xfs_dir2_leaf_hdr *hdr,
1764 int counts)
1765{
1766 int entries;
1767
1768 entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale);
1769 return sizeof(xfs_dir2_leaf_hdr_t) +
1770 entries * sizeof(xfs_dir2_leaf_entry_t) +
1771 counts * sizeof(xfs_dir2_data_off_t) +
1772 sizeof(xfs_dir2_leaf_tail_t);
1773}
1774
1743/* 1775/*
1744 * Convert node form directory to leaf form directory. 1776 * Convert node form directory to leaf form directory.
1745 * The root of the node form dir needs to already be a LEAFN block. 1777 * The root of the node form dir needs to already be a LEAFN block.
@@ -1810,7 +1842,7 @@ xfs_dir2_node_to_leaf(
1810 return 0; 1842 return 0;
1811 lbp = state->path.blk[0].bp; 1843 lbp = state->path.blk[0].bp;
1812 leaf = lbp->data; 1844 leaf = lbp->data;
1813 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 1845 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1814 /* 1846 /*
1815 * Read the freespace block. 1847 * Read the freespace block.
1816 */ 1848 */
@@ -1819,20 +1851,19 @@ xfs_dir2_node_to_leaf(
1819 return error; 1851 return error;
1820 } 1852 }
1821 free = fbp->data; 1853 free = fbp->data;
1822 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); 1854 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
1823 ASSERT(!free->hdr.firstdb); 1855 ASSERT(!free->hdr.firstdb);
1856
1824 /* 1857 /*
1825 * Now see if the leafn and free data will fit in a leaf1. 1858 * Now see if the leafn and free data will fit in a leaf1.
1826 * If not, release the buffer and give up. 1859 * If not, release the buffer and give up.
1827 */ 1860 */
1828 if ((uint)sizeof(leaf->hdr) + 1861 if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) >
1829 (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)) * (uint)sizeof(leaf->ents[0]) + 1862 mp->m_dirblksize) {
1830 be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) +
1831 (uint)sizeof(leaf->tail) >
1832 mp->m_dirblksize) {
1833 xfs_da_brelse(tp, fbp); 1863 xfs_da_brelse(tp, fbp);
1834 return 0; 1864 return 0;
1835 } 1865 }
1866
1836 /* 1867 /*
1837 * If the leaf has any stale entries in it, compress them out. 1868 * If the leaf has any stale entries in it, compress them out.
1838 * The compact routine will log the header. 1869 * The compact routine will log the header.
@@ -1851,7 +1882,7 @@ xfs_dir2_node_to_leaf(
1851 * Set up the leaf bests table. 1882 * Set up the leaf bests table.
1852 */ 1883 */
1853 memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests, 1884 memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
1854 be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0])); 1885 be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t));
1855 xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); 1886 xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
1856 xfs_dir2_leaf_log_tail(tp, lbp); 1887 xfs_dir2_leaf_log_tail(tp, lbp);
1857 xfs_dir2_leaf_check(dp, lbp); 1888 xfs_dir2_leaf_check(dp, lbp);
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
deleted file mode 100644
index 6c9539f06987..000000000000
--- a/fs/xfs/xfs_dir2_leaf.h
+++ /dev/null
@@ -1,253 +0,0 @@
1/*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DIR2_LEAF_H__
19#define __XFS_DIR2_LEAF_H__
20
21struct uio;
22struct xfs_dabuf;
23struct xfs_da_args;
24struct xfs_inode;
25struct xfs_mount;
26struct xfs_trans;
27
28/*
29 * Offset of the leaf/node space. First block in this space
30 * is the btree root.
31 */
32#define XFS_DIR2_LEAF_SPACE 1
33#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
34#define XFS_DIR2_LEAF_FIRSTDB(mp) \
35 xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
36
37/*
38 * Offset in data space of a data entry.
39 */
40typedef __uint32_t xfs_dir2_dataptr_t;
41#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff)
42#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0)
43
44/*
45 * Leaf block header.
46 */
47typedef struct xfs_dir2_leaf_hdr {
48 xfs_da_blkinfo_t info; /* header for da routines */
49 __be16 count; /* count of entries */
50 __be16 stale; /* count of stale entries */
51} xfs_dir2_leaf_hdr_t;
52
53/*
54 * Leaf block entry.
55 */
56typedef struct xfs_dir2_leaf_entry {
57 __be32 hashval; /* hash value of name */
58 __be32 address; /* address of data entry */
59} xfs_dir2_leaf_entry_t;
60
61/*
62 * Leaf block tail.
63 */
64typedef struct xfs_dir2_leaf_tail {
65 __be32 bestcount;
66} xfs_dir2_leaf_tail_t;
67
68/*
69 * Leaf block.
70 * bests and tail are at the end of the block for single-leaf only
71 * (magic = XFS_DIR2_LEAF1_MAGIC not XFS_DIR2_LEAFN_MAGIC).
72 */
73typedef struct xfs_dir2_leaf {
74 xfs_dir2_leaf_hdr_t hdr; /* leaf header */
75 xfs_dir2_leaf_entry_t ents[1]; /* entries */
76 /* ... */
77 xfs_dir2_data_off_t bests[1]; /* best free counts */
78 xfs_dir2_leaf_tail_t tail; /* leaf tail */
79} xfs_dir2_leaf_t;
80
81/*
82 * DB blocks here are logical directory block numbers, not filesystem blocks.
83 */
84
85static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
86{
87 return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) /
88 (uint)sizeof(xfs_dir2_leaf_entry_t));
89}
90
91/*
92 * Get address of the bestcount field in the single-leaf block.
93 */
94static inline xfs_dir2_leaf_tail_t *
95xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
96{
97 return (xfs_dir2_leaf_tail_t *)
98 ((char *)(lp) + (mp)->m_dirblksize -
99 (uint)sizeof(xfs_dir2_leaf_tail_t));
100}
101
102/*
103 * Get address of the bests array in the single-leaf block.
104 */
105static inline __be16 *
106xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
107{
108 return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
109}
110
111/*
112 * Convert dataptr to byte in file space
113 */
114static inline xfs_dir2_off_t
115xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
116{
117 return (xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG;
118}
119
120/*
121 * Convert byte in file space to dataptr. It had better be aligned.
122 */
123static inline xfs_dir2_dataptr_t
124xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
125{
126 return (xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG);
127}
128
129/*
130 * Convert byte in space to (DB) block
131 */
132static inline xfs_dir2_db_t
133xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
134{
135 return (xfs_dir2_db_t)((by) >> \
136 ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog));
137}
138
139/*
140 * Convert dataptr to a block number
141 */
142static inline xfs_dir2_db_t
143xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
144{
145 return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
146}
147
148/*
149 * Convert byte in space to offset in a block
150 */
151static inline xfs_dir2_data_aoff_t
152xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
153{
154 return (xfs_dir2_data_aoff_t)((by) & \
155 ((1 << ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) - 1));
156}
157
158/*
159 * Convert dataptr to a byte offset in a block
160 */
161static inline xfs_dir2_data_aoff_t
162xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
163{
164 return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
165}
166
167/*
168 * Convert block and offset to byte in space
169 */
170static inline xfs_dir2_off_t
171xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
172 xfs_dir2_data_aoff_t o)
173{
174 return ((xfs_dir2_off_t)(db) << \
175 ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o);
176}
177
178/*
179 * Convert block (DB) to block (dablk)
180 */
181static inline xfs_dablk_t
182xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
183{
184 return (xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog);
185}
186
187/*
188 * Convert byte in space to (DA) block
189 */
190static inline xfs_dablk_t
191xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
192{
193 return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
194}
195
196/*
197 * Convert block and offset to dataptr
198 */
199static inline xfs_dir2_dataptr_t
200xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
201 xfs_dir2_data_aoff_t o)
202{
203 return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
204}
205
206/*
207 * Convert block (dablk) to block (DB)
208 */
209static inline xfs_dir2_db_t
210xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
211{
212 return (xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog);
213}
214
215/*
216 * Convert block (dablk) to byte offset in space
217 */
218static inline xfs_dir2_off_t
219xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
220{
221 return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
222}
223
224/*
225 * Function declarations.
226 */
227extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
228 struct xfs_dabuf *dbp);
229extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
230extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
231 struct xfs_dabuf *bp);
232extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
233 int *lowstalep, int *highstalep,
234 int *lowlogp, int *highlogp);
235extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
236 size_t bufsize, xfs_off_t *offset,
237 filldir_t filldir);
238extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
239 struct xfs_dabuf **bpp, int magic);
240extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
241 int first, int last);
242extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
243 struct xfs_dabuf *bp);
244extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
245extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
246extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
247extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
248 struct xfs_dabuf *lbp);
249extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
250 struct xfs_dabuf *lbp, xfs_dir2_db_t db);
251extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
252
253#endif /* __XFS_DIR2_LEAF_H__ */
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index a0aab7d3294f..084b3247d636 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -23,18 +23,14 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_mount.h" 26#include "xfs_mount.h"
28#include "xfs_da_btree.h" 27#include "xfs_da_btree.h"
29#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
30#include "xfs_dir2_sf.h"
31#include "xfs_dinode.h" 29#include "xfs_dinode.h"
32#include "xfs_inode.h" 30#include "xfs_inode.h"
33#include "xfs_bmap.h" 31#include "xfs_bmap.h"
34#include "xfs_dir2_data.h" 32#include "xfs_dir2_format.h"
35#include "xfs_dir2_leaf.h" 33#include "xfs_dir2_priv.h"
36#include "xfs_dir2_block.h"
37#include "xfs_dir2_node.h"
38#include "xfs_error.h" 34#include "xfs_error.h"
39#include "xfs_trace.h" 35#include "xfs_trace.h"
40 36
@@ -73,7 +69,7 @@ xfs_dir2_free_log_bests(
73 xfs_dir2_free_t *free; /* freespace structure */ 69 xfs_dir2_free_t *free; /* freespace structure */
74 70
75 free = bp->data; 71 free = bp->data;
76 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); 72 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
77 xfs_da_log_buf(tp, bp, 73 xfs_da_log_buf(tp, bp,
78 (uint)((char *)&free->bests[first] - (char *)free), 74 (uint)((char *)&free->bests[first] - (char *)free),
79 (uint)((char *)&free->bests[last] - (char *)free + 75 (uint)((char *)&free->bests[last] - (char *)free +
@@ -91,7 +87,7 @@ xfs_dir2_free_log_header(
91 xfs_dir2_free_t *free; /* freespace structure */ 87 xfs_dir2_free_t *free; /* freespace structure */
92 88
93 free = bp->data; 89 free = bp->data;
94 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); 90 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
95 xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free), 91 xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
96 (uint)(sizeof(xfs_dir2_free_hdr_t) - 1)); 92 (uint)(sizeof(xfs_dir2_free_hdr_t) - 1));
97} 93}
@@ -244,89 +240,13 @@ xfs_dir2_leafn_add(
244 lfloglow = be16_to_cpu(leaf->hdr.count); 240 lfloglow = be16_to_cpu(leaf->hdr.count);
245 lfloghigh = -1; 241 lfloghigh = -1;
246 } 242 }
247 /* 243
248 * No stale entries, just insert a space for the new entry.
249 */
250 if (!leaf->hdr.stale) {
251 lep = &leaf->ents[index];
252 if (index < be16_to_cpu(leaf->hdr.count))
253 memmove(lep + 1, lep,
254 (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
255 lfloglow = index;
256 lfloghigh = be16_to_cpu(leaf->hdr.count);
257 be16_add_cpu(&leaf->hdr.count, 1);
258 }
259 /*
260 * There are stale entries. We'll use one for the new entry.
261 */
262 else {
263 /*
264 * If we didn't do a compact then we need to figure out
265 * which stale entry will be used.
266 */
267 if (compact == 0) {
268 /*
269 * Find first stale entry before our insertion point.
270 */
271 for (lowstale = index - 1;
272 lowstale >= 0 &&
273 be32_to_cpu(leaf->ents[lowstale].address) !=
274 XFS_DIR2_NULL_DATAPTR;
275 lowstale--)
276 continue;
277 /*
278 * Find next stale entry after insertion point.
279 * Stop looking if the answer would be worse than
280 * lowstale already found.
281 */
282 for (highstale = index;
283 highstale < be16_to_cpu(leaf->hdr.count) &&
284 be32_to_cpu(leaf->ents[highstale].address) !=
285 XFS_DIR2_NULL_DATAPTR &&
286 (lowstale < 0 ||
287 index - lowstale - 1 >= highstale - index);
288 highstale++)
289 continue;
290 }
291 /*
292 * Using the low stale entry.
293 * Shift entries up toward the stale slot.
294 */
295 if (lowstale >= 0 &&
296 (highstale == be16_to_cpu(leaf->hdr.count) ||
297 index - lowstale - 1 < highstale - index)) {
298 ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
299 XFS_DIR2_NULL_DATAPTR);
300 ASSERT(index - lowstale - 1 >= 0);
301 if (index - lowstale - 1 > 0)
302 memmove(&leaf->ents[lowstale],
303 &leaf->ents[lowstale + 1],
304 (index - lowstale - 1) * sizeof(*lep));
305 lep = &leaf->ents[index - 1];
306 lfloglow = MIN(lowstale, lfloglow);
307 lfloghigh = MAX(index - 1, lfloghigh);
308 }
309 /*
310 * Using the high stale entry.
311 * Shift entries down toward the stale slot.
312 */
313 else {
314 ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
315 XFS_DIR2_NULL_DATAPTR);
316 ASSERT(highstale - index >= 0);
317 if (highstale - index > 0)
318 memmove(&leaf->ents[index + 1],
319 &leaf->ents[index],
320 (highstale - index) * sizeof(*lep));
321 lep = &leaf->ents[index];
322 lfloglow = MIN(index, lfloglow);
323 lfloghigh = MAX(highstale, lfloghigh);
324 }
325 be16_add_cpu(&leaf->hdr.stale, -1);
326 }
327 /* 244 /*
328 * Insert the new entry, log everything. 245 * Insert the new entry, log everything.
329 */ 246 */
247 lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
248 highstale, &lfloglow, &lfloghigh);
249
330 lep->hashval = cpu_to_be32(args->hashval); 250 lep->hashval = cpu_to_be32(args->hashval);
331 lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, 251 lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
332 args->blkno, args->index)); 252 args->blkno, args->index));
@@ -352,14 +272,14 @@ xfs_dir2_leafn_check(
352 272
353 leaf = bp->data; 273 leaf = bp->data;
354 mp = dp->i_mount; 274 mp = dp->i_mount;
355 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 275 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
356 ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp)); 276 ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
357 for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) { 277 for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
358 if (i + 1 < be16_to_cpu(leaf->hdr.count)) { 278 if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
359 ASSERT(be32_to_cpu(leaf->ents[i].hashval) <= 279 ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
360 be32_to_cpu(leaf->ents[i + 1].hashval)); 280 be32_to_cpu(leaf->ents[i + 1].hashval));
361 } 281 }
362 if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR) 282 if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
363 stale++; 283 stale++;
364 } 284 }
365 ASSERT(be16_to_cpu(leaf->hdr.stale) == stale); 285 ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -378,7 +298,7 @@ xfs_dir2_leafn_lasthash(
378 xfs_dir2_leaf_t *leaf; /* leaf structure */ 298 xfs_dir2_leaf_t *leaf; /* leaf structure */
379 299
380 leaf = bp->data; 300 leaf = bp->data;
381 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 301 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
382 if (count) 302 if (count)
383 *count = be16_to_cpu(leaf->hdr.count); 303 *count = be16_to_cpu(leaf->hdr.count);
384 if (!leaf->hdr.count) 304 if (!leaf->hdr.count)
@@ -417,7 +337,7 @@ xfs_dir2_leafn_lookup_for_addname(
417 tp = args->trans; 337 tp = args->trans;
418 mp = dp->i_mount; 338 mp = dp->i_mount;
419 leaf = bp->data; 339 leaf = bp->data;
420 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 340 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
421#ifdef __KERNEL__ 341#ifdef __KERNEL__
422 ASSERT(be16_to_cpu(leaf->hdr.count) > 0); 342 ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
423#endif 343#endif
@@ -434,7 +354,7 @@ xfs_dir2_leafn_lookup_for_addname(
434 curbp = state->extrablk.bp; 354 curbp = state->extrablk.bp;
435 curfdb = state->extrablk.blkno; 355 curfdb = state->extrablk.blkno;
436 free = curbp->data; 356 free = curbp->data;
437 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); 357 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
438 } 358 }
439 length = xfs_dir2_data_entsize(args->namelen); 359 length = xfs_dir2_data_entsize(args->namelen);
440 /* 360 /*
@@ -488,7 +408,7 @@ xfs_dir2_leafn_lookup_for_addname(
488 ASSERT(be32_to_cpu(free->hdr.magic) == 408 ASSERT(be32_to_cpu(free->hdr.magic) ==
489 XFS_DIR2_FREE_MAGIC); 409 XFS_DIR2_FREE_MAGIC);
490 ASSERT((be32_to_cpu(free->hdr.firstdb) % 410 ASSERT((be32_to_cpu(free->hdr.firstdb) %
491 XFS_DIR2_MAX_FREE_BESTS(mp)) == 0); 411 xfs_dir2_free_max_bests(mp)) == 0);
492 ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb); 412 ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
493 ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) + 413 ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
494 be32_to_cpu(free->hdr.nvalid)); 414 be32_to_cpu(free->hdr.nvalid));
@@ -500,7 +420,8 @@ xfs_dir2_leafn_lookup_for_addname(
500 /* 420 /*
501 * If it has room, return it. 421 * If it has room, return it.
502 */ 422 */
503 if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) { 423 if (unlikely(free->bests[fi] ==
424 cpu_to_be16(NULLDATAOFF))) {
504 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", 425 XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
505 XFS_ERRLEVEL_LOW, mp); 426 XFS_ERRLEVEL_LOW, mp);
506 if (curfdb != newfdb) 427 if (curfdb != newfdb)
@@ -561,7 +482,7 @@ xfs_dir2_leafn_lookup_for_entry(
561 tp = args->trans; 482 tp = args->trans;
562 mp = dp->i_mount; 483 mp = dp->i_mount;
563 leaf = bp->data; 484 leaf = bp->data;
564 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 485 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
565#ifdef __KERNEL__ 486#ifdef __KERNEL__
566 ASSERT(be16_to_cpu(leaf->hdr.count) > 0); 487 ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
567#endif 488#endif
@@ -742,7 +663,8 @@ xfs_dir2_leafn_moveents(
742 int i; /* temp leaf index */ 663 int i; /* temp leaf index */
743 664
744 for (i = start_s, stale = 0; i < start_s + count; i++) { 665 for (i = start_s, stale = 0; i < start_s + count; i++) {
745 if (be32_to_cpu(leaf_s->ents[i].address) == XFS_DIR2_NULL_DATAPTR) 666 if (leaf_s->ents[i].address ==
667 cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
746 stale++; 668 stale++;
747 } 669 }
748 } else 670 } else
@@ -789,8 +711,8 @@ xfs_dir2_leafn_order(
789 711
790 leaf1 = leaf1_bp->data; 712 leaf1 = leaf1_bp->data;
791 leaf2 = leaf2_bp->data; 713 leaf2 = leaf2_bp->data;
792 ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 714 ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
793 ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 715 ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
794 if (be16_to_cpu(leaf1->hdr.count) > 0 && 716 if (be16_to_cpu(leaf1->hdr.count) > 0 &&
795 be16_to_cpu(leaf2->hdr.count) > 0 && 717 be16_to_cpu(leaf2->hdr.count) > 0 &&
796 (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) || 718 (be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) ||
@@ -918,7 +840,7 @@ xfs_dir2_leafn_remove(
918 xfs_da_state_blk_t *dblk, /* data block */ 840 xfs_da_state_blk_t *dblk, /* data block */
919 int *rval) /* resulting block needs join */ 841 int *rval) /* resulting block needs join */
920{ 842{
921 xfs_dir2_data_t *data; /* data block structure */ 843 xfs_dir2_data_hdr_t *hdr; /* data block header */
922 xfs_dir2_db_t db; /* data block number */ 844 xfs_dir2_db_t db; /* data block number */
923 xfs_dabuf_t *dbp; /* data block buffer */ 845 xfs_dabuf_t *dbp; /* data block buffer */
924 xfs_dir2_data_entry_t *dep; /* data block entry */ 846 xfs_dir2_data_entry_t *dep; /* data block entry */
@@ -938,7 +860,7 @@ xfs_dir2_leafn_remove(
938 tp = args->trans; 860 tp = args->trans;
939 mp = dp->i_mount; 861 mp = dp->i_mount;
940 leaf = bp->data; 862 leaf = bp->data;
941 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 863 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
942 /* 864 /*
943 * Point to the entry we're removing. 865 * Point to the entry we're removing.
944 */ 866 */
@@ -963,9 +885,9 @@ xfs_dir2_leafn_remove(
963 * in the data block in case it changes. 885 * in the data block in case it changes.
964 */ 886 */
965 dbp = dblk->bp; 887 dbp = dblk->bp;
966 data = dbp->data; 888 hdr = dbp->data;
967 dep = (xfs_dir2_data_entry_t *)((char *)data + off); 889 dep = (xfs_dir2_data_entry_t *)((char *)hdr + off);
968 longest = be16_to_cpu(data->hdr.bestfree[0].length); 890 longest = be16_to_cpu(hdr->bestfree[0].length);
969 needlog = needscan = 0; 891 needlog = needscan = 0;
970 xfs_dir2_data_make_free(tp, dbp, off, 892 xfs_dir2_data_make_free(tp, dbp, off,
971 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan); 893 xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
@@ -974,7 +896,7 @@ xfs_dir2_leafn_remove(
974 * Log the data block header if needed. 896 * Log the data block header if needed.
975 */ 897 */
976 if (needscan) 898 if (needscan)
977 xfs_dir2_data_freescan(mp, data, &needlog); 899 xfs_dir2_data_freescan(mp, hdr, &needlog);
978 if (needlog) 900 if (needlog)
979 xfs_dir2_data_log_header(tp, dbp); 901 xfs_dir2_data_log_header(tp, dbp);
980 xfs_dir2_data_check(dp, dbp); 902 xfs_dir2_data_check(dp, dbp);
@@ -982,7 +904,7 @@ xfs_dir2_leafn_remove(
982 * If the longest data block freespace changes, need to update 904 * If the longest data block freespace changes, need to update
983 * the corresponding freeblock entry. 905 * the corresponding freeblock entry.
984 */ 906 */
985 if (longest < be16_to_cpu(data->hdr.bestfree[0].length)) { 907 if (longest < be16_to_cpu(hdr->bestfree[0].length)) {
986 int error; /* error return value */ 908 int error; /* error return value */
987 xfs_dabuf_t *fbp; /* freeblock buffer */ 909 xfs_dabuf_t *fbp; /* freeblock buffer */
988 xfs_dir2_db_t fdb; /* freeblock block number */ 910 xfs_dir2_db_t fdb; /* freeblock block number */
@@ -1000,27 +922,27 @@ xfs_dir2_leafn_remove(
1000 return error; 922 return error;
1001 } 923 }
1002 free = fbp->data; 924 free = fbp->data;
1003 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); 925 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
1004 ASSERT(be32_to_cpu(free->hdr.firstdb) == 926 ASSERT(be32_to_cpu(free->hdr.firstdb) ==
1005 XFS_DIR2_MAX_FREE_BESTS(mp) * 927 xfs_dir2_free_max_bests(mp) *
1006 (fdb - XFS_DIR2_FREE_FIRSTDB(mp))); 928 (fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
1007 /* 929 /*
1008 * Calculate which entry we need to fix. 930 * Calculate which entry we need to fix.
1009 */ 931 */
1010 findex = xfs_dir2_db_to_fdindex(mp, db); 932 findex = xfs_dir2_db_to_fdindex(mp, db);
1011 longest = be16_to_cpu(data->hdr.bestfree[0].length); 933 longest = be16_to_cpu(hdr->bestfree[0].length);
1012 /* 934 /*
1013 * If the data block is now empty we can get rid of it 935 * If the data block is now empty we can get rid of it
1014 * (usually). 936 * (usually).
1015 */ 937 */
1016 if (longest == mp->m_dirblksize - (uint)sizeof(data->hdr)) { 938 if (longest == mp->m_dirblksize - (uint)sizeof(*hdr)) {
1017 /* 939 /*
1018 * Try to punch out the data block. 940 * Try to punch out the data block.
1019 */ 941 */
1020 error = xfs_dir2_shrink_inode(args, db, dbp); 942 error = xfs_dir2_shrink_inode(args, db, dbp);
1021 if (error == 0) { 943 if (error == 0) {
1022 dblk->bp = NULL; 944 dblk->bp = NULL;
1023 data = NULL; 945 hdr = NULL;
1024 } 946 }
1025 /* 947 /*
1026 * We can get ENOSPC if there's no space reservation. 948 * We can get ENOSPC if there's no space reservation.
@@ -1036,7 +958,7 @@ xfs_dir2_leafn_remove(
1036 * If we got rid of the data block, we can eliminate that entry 958 * If we got rid of the data block, we can eliminate that entry
1037 * in the free block. 959 * in the free block.
1038 */ 960 */
1039 if (data == NULL) { 961 if (hdr == NULL) {
1040 /* 962 /*
1041 * One less used entry in the free table. 963 * One less used entry in the free table.
1042 */ 964 */
@@ -1052,7 +974,8 @@ xfs_dir2_leafn_remove(
1052 int i; /* free entry index */ 974 int i; /* free entry index */
1053 975
1054 for (i = findex - 1; 976 for (i = findex - 1;
1055 i >= 0 && be16_to_cpu(free->bests[i]) == NULLDATAOFF; 977 i >= 0 &&
978 free->bests[i] == cpu_to_be16(NULLDATAOFF);
1056 i--) 979 i--)
1057 continue; 980 continue;
1058 free->hdr.nvalid = cpu_to_be32(i + 1); 981 free->hdr.nvalid = cpu_to_be32(i + 1);
@@ -1209,7 +1132,7 @@ xfs_dir2_leafn_toosmall(
1209 */ 1132 */
1210 blk = &state->path.blk[state->path.active - 1]; 1133 blk = &state->path.blk[state->path.active - 1];
1211 info = blk->bp->data; 1134 info = blk->bp->data;
1212 ASSERT(be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC); 1135 ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1213 leaf = (xfs_dir2_leaf_t *)info; 1136 leaf = (xfs_dir2_leaf_t *)info;
1214 count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); 1137 count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
1215 bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]); 1138 bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]);
@@ -1268,7 +1191,7 @@ xfs_dir2_leafn_toosmall(
1268 count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); 1191 count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
1269 bytes = state->blocksize - (state->blocksize >> 2); 1192 bytes = state->blocksize - (state->blocksize >> 2);
1270 leaf = bp->data; 1193 leaf = bp->data;
1271 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 1194 ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1272 count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale); 1195 count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
1273 bytes -= count * (uint)sizeof(leaf->ents[0]); 1196 bytes -= count * (uint)sizeof(leaf->ents[0]);
1274 /* 1197 /*
@@ -1327,8 +1250,8 @@ xfs_dir2_leafn_unbalance(
1327 ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC); 1250 ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
1328 drop_leaf = drop_blk->bp->data; 1251 drop_leaf = drop_blk->bp->data;
1329 save_leaf = save_blk->bp->data; 1252 save_leaf = save_blk->bp->data;
1330 ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 1253 ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1331 ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); 1254 ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
1332 /* 1255 /*
1333 * If there are any stale leaf entries, take this opportunity 1256 * If there are any stale leaf entries, take this opportunity
1334 * to purge them. 1257 * to purge them.
@@ -1432,7 +1355,7 @@ xfs_dir2_node_addname_int(
1432 xfs_da_args_t *args, /* operation arguments */ 1355 xfs_da_args_t *args, /* operation arguments */
1433 xfs_da_state_blk_t *fblk) /* optional freespace block */ 1356 xfs_da_state_blk_t *fblk) /* optional freespace block */
1434{ 1357{
1435 xfs_dir2_data_t *data; /* data block structure */ 1358 xfs_dir2_data_hdr_t *hdr; /* data block header */
1436 xfs_dir2_db_t dbno; /* data block number */ 1359 xfs_dir2_db_t dbno; /* data block number */
1437 xfs_dabuf_t *dbp; /* data block buffer */ 1360 xfs_dabuf_t *dbp; /* data block buffer */
1438 xfs_dir2_data_entry_t *dep; /* data entry pointer */ 1361 xfs_dir2_data_entry_t *dep; /* data entry pointer */
@@ -1469,7 +1392,7 @@ xfs_dir2_node_addname_int(
1469 */ 1392 */
1470 ifbno = fblk->blkno; 1393 ifbno = fblk->blkno;
1471 free = fbp->data; 1394 free = fbp->data;
1472 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); 1395 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
1473 findex = fblk->index; 1396 findex = fblk->index;
1474 /* 1397 /*
1475 * This means the free entry showed that the data block had 1398 * This means the free entry showed that the data block had
@@ -1553,7 +1476,7 @@ xfs_dir2_node_addname_int(
1553 continue; 1476 continue;
1554 } 1477 }
1555 free = fbp->data; 1478 free = fbp->data;
1556 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); 1479 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
1557 findex = 0; 1480 findex = 0;
1558 } 1481 }
1559 /* 1482 /*
@@ -1680,12 +1603,12 @@ xfs_dir2_node_addname_int(
1680 free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC); 1603 free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
1681 free->hdr.firstdb = cpu_to_be32( 1604 free->hdr.firstdb = cpu_to_be32(
1682 (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) * 1605 (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
1683 XFS_DIR2_MAX_FREE_BESTS(mp)); 1606 xfs_dir2_free_max_bests(mp));
1684 free->hdr.nvalid = 0; 1607 free->hdr.nvalid = 0;
1685 free->hdr.nused = 0; 1608 free->hdr.nused = 0;
1686 } else { 1609 } else {
1687 free = fbp->data; 1610 free = fbp->data;
1688 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); 1611 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
1689 } 1612 }
1690 1613
1691 /* 1614 /*
@@ -1697,7 +1620,7 @@ xfs_dir2_node_addname_int(
1697 * freespace block, extend that table. 1620 * freespace block, extend that table.
1698 */ 1621 */
1699 if (findex >= be32_to_cpu(free->hdr.nvalid)) { 1622 if (findex >= be32_to_cpu(free->hdr.nvalid)) {
1700 ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp)); 1623 ASSERT(findex < xfs_dir2_free_max_bests(mp));
1701 free->hdr.nvalid = cpu_to_be32(findex + 1); 1624 free->hdr.nvalid = cpu_to_be32(findex + 1);
1702 /* 1625 /*
1703 * Tag new entry so nused will go up. 1626 * Tag new entry so nused will go up.
@@ -1708,7 +1631,7 @@ xfs_dir2_node_addname_int(
1708 * If this entry was for an empty data block 1631 * If this entry was for an empty data block
1709 * (this should always be true) then update the header. 1632 * (this should always be true) then update the header.
1710 */ 1633 */
1711 if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) { 1634 if (free->bests[findex] == cpu_to_be16(NULLDATAOFF)) {
1712 be32_add_cpu(&free->hdr.nused, 1); 1635 be32_add_cpu(&free->hdr.nused, 1);
1713 xfs_dir2_free_log_header(tp, fbp); 1636 xfs_dir2_free_log_header(tp, fbp);
1714 } 1637 }
@@ -1717,8 +1640,8 @@ xfs_dir2_node_addname_int(
1717 * We haven't allocated the data entry yet so this will 1640 * We haven't allocated the data entry yet so this will
1718 * change again. 1641 * change again.
1719 */ 1642 */
1720 data = dbp->data; 1643 hdr = dbp->data;
1721 free->bests[findex] = data->hdr.bestfree[0].length; 1644 free->bests[findex] = hdr->bestfree[0].length;
1722 logfree = 1; 1645 logfree = 1;
1723 } 1646 }
1724 /* 1647 /*
@@ -1743,21 +1666,21 @@ xfs_dir2_node_addname_int(
1743 xfs_da_buf_done(fbp); 1666 xfs_da_buf_done(fbp);
1744 return error; 1667 return error;
1745 } 1668 }
1746 data = dbp->data; 1669 hdr = dbp->data;
1747 logfree = 0; 1670 logfree = 0;
1748 } 1671 }
1749 ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) >= length); 1672 ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length);
1750 /* 1673 /*
1751 * Point to the existing unused space. 1674 * Point to the existing unused space.
1752 */ 1675 */
1753 dup = (xfs_dir2_data_unused_t *) 1676 dup = (xfs_dir2_data_unused_t *)
1754 ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset)); 1677 ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
1755 needscan = needlog = 0; 1678 needscan = needlog = 0;
1756 /* 1679 /*
1757 * Mark the first part of the unused space, inuse for us. 1680 * Mark the first part of the unused space, inuse for us.
1758 */ 1681 */
1759 xfs_dir2_data_use_free(tp, dbp, dup, 1682 xfs_dir2_data_use_free(tp, dbp, dup,
1760 (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length, 1683 (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
1761 &needlog, &needscan); 1684 &needlog, &needscan);
1762 /* 1685 /*
1763 * Fill in the new entry and log it. 1686 * Fill in the new entry and log it.
@@ -1767,13 +1690,13 @@ xfs_dir2_node_addname_int(
1767 dep->namelen = args->namelen; 1690 dep->namelen = args->namelen;
1768 memcpy(dep->name, args->name, dep->namelen); 1691 memcpy(dep->name, args->name, dep->namelen);
1769 tagp = xfs_dir2_data_entry_tag_p(dep); 1692 tagp = xfs_dir2_data_entry_tag_p(dep);
1770 *tagp = cpu_to_be16((char *)dep - (char *)data); 1693 *tagp = cpu_to_be16((char *)dep - (char *)hdr);
1771 xfs_dir2_data_log_entry(tp, dbp, dep); 1694 xfs_dir2_data_log_entry(tp, dbp, dep);
1772 /* 1695 /*
1773 * Rescan the block for bestfree if needed. 1696 * Rescan the block for bestfree if needed.
1774 */ 1697 */
1775 if (needscan) 1698 if (needscan)
1776 xfs_dir2_data_freescan(mp, data, &needlog); 1699 xfs_dir2_data_freescan(mp, hdr, &needlog);
1777 /* 1700 /*
1778 * Log the data block header if needed. 1701 * Log the data block header if needed.
1779 */ 1702 */
@@ -1782,8 +1705,8 @@ xfs_dir2_node_addname_int(
1782 /* 1705 /*
1783 * If the freespace entry is now wrong, update it. 1706 * If the freespace entry is now wrong, update it.
1784 */ 1707 */
1785 if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(data->hdr.bestfree[0].length)) { 1708 if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(hdr->bestfree[0].length)) {
1786 free->bests[findex] = data->hdr.bestfree[0].length; 1709 free->bests[findex] = hdr->bestfree[0].length;
1787 logfree = 1; 1710 logfree = 1;
1788 } 1711 }
1789 /* 1712 /*
@@ -1933,7 +1856,7 @@ xfs_dir2_node_replace(
1933 xfs_da_args_t *args) /* operation arguments */ 1856 xfs_da_args_t *args) /* operation arguments */
1934{ 1857{
1935 xfs_da_state_blk_t *blk; /* leaf block */ 1858 xfs_da_state_blk_t *blk; /* leaf block */
1936 xfs_dir2_data_t *data; /* data block structure */ 1859 xfs_dir2_data_hdr_t *hdr; /* data block header */
1937 xfs_dir2_data_entry_t *dep; /* data entry changed */ 1860 xfs_dir2_data_entry_t *dep; /* data entry changed */
1938 int error; /* error return value */ 1861 int error; /* error return value */
1939 int i; /* btree level */ 1862 int i; /* btree level */
@@ -1977,10 +1900,10 @@ xfs_dir2_node_replace(
1977 /* 1900 /*
1978 * Point to the data entry. 1901 * Point to the data entry.
1979 */ 1902 */
1980 data = state->extrablk.bp->data; 1903 hdr = state->extrablk.bp->data;
1981 ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC); 1904 ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
1982 dep = (xfs_dir2_data_entry_t *) 1905 dep = (xfs_dir2_data_entry_t *)
1983 ((char *)data + 1906 ((char *)hdr +
1984 xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address))); 1907 xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
1985 ASSERT(inum != be64_to_cpu(dep->inumber)); 1908 ASSERT(inum != be64_to_cpu(dep->inumber));
1986 /* 1909 /*
@@ -2044,7 +1967,7 @@ xfs_dir2_node_trim_free(
2044 return 0; 1967 return 0;
2045 } 1968 }
2046 free = bp->data; 1969 free = bp->data;
2047 ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); 1970 ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
2048 /* 1971 /*
2049 * If there are used entries, there's nothing to do. 1972 * If there are used entries, there's nothing to do.
2050 */ 1973 */
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
deleted file mode 100644
index 82dfe7147195..000000000000
--- a/fs/xfs/xfs_dir2_node.h
+++ /dev/null
@@ -1,100 +0,0 @@
1/*
2 * Copyright (c) 2000,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DIR2_NODE_H__
19#define __XFS_DIR2_NODE_H__
20
21/*
22 * Directory version 2, btree node format structures
23 */
24
25struct uio;
26struct xfs_dabuf;
27struct xfs_da_args;
28struct xfs_da_state;
29struct xfs_da_state_blk;
30struct xfs_inode;
31struct xfs_trans;
32
33/*
34 * Offset of the freespace index.
35 */
36#define XFS_DIR2_FREE_SPACE 2
37#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
38#define XFS_DIR2_FREE_FIRSTDB(mp) \
39 xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
40
41#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F */
42
43typedef struct xfs_dir2_free_hdr {
44 __be32 magic; /* XFS_DIR2_FREE_MAGIC */
45 __be32 firstdb; /* db of first entry */
46 __be32 nvalid; /* count of valid entries */
47 __be32 nused; /* count of used entries */
48} xfs_dir2_free_hdr_t;
49
50typedef struct xfs_dir2_free {
51 xfs_dir2_free_hdr_t hdr; /* block header */
52 __be16 bests[1]; /* best free counts */
53 /* unused entries are -1 */
54} xfs_dir2_free_t;
55
56#define XFS_DIR2_MAX_FREE_BESTS(mp) \
57 (((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_free_hdr_t)) / \
58 (uint)sizeof(xfs_dir2_data_off_t))
59
60/*
61 * Convert data space db to the corresponding free db.
62 */
63static inline xfs_dir2_db_t
64xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
65{
66 return (XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp));
67}
68
69/*
70 * Convert data space db to the corresponding index in a free db.
71 */
72static inline int
73xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
74{
75 return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp));
76}
77
78extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
79 struct xfs_dabuf *lbp);
80extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
81extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
82 struct xfs_da_args *args, int *indexp,
83 struct xfs_da_state *state);
84extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
85 struct xfs_dabuf *leaf2_bp);
86extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
87 struct xfs_da_state_blk *oldblk,
88 struct xfs_da_state_blk *newblk);
89extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
90extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
91 struct xfs_da_state_blk *drop_blk,
92 struct xfs_da_state_blk *save_blk);
93extern int xfs_dir2_node_addname(struct xfs_da_args *args);
94extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
95extern int xfs_dir2_node_removename(struct xfs_da_args *args);
96extern int xfs_dir2_node_replace(struct xfs_da_args *args);
97extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
98 int *rvalp);
99
100#endif /* __XFS_DIR2_NODE_H__ */
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h
new file mode 100644
index 000000000000..067f403ecf8a
--- /dev/null
+++ b/fs/xfs/xfs_dir2_priv.h
@@ -0,0 +1,135 @@
1/*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DIR2_PRIV_H__
19#define __XFS_DIR2_PRIV_H__
20
21/* xfs_dir2.c */
22extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
23extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
24extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
25extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
26 xfs_dir2_db_t *dbp);
27extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
28 struct xfs_dabuf *bp);
29extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
30 const unsigned char *name, int len);
31
32/* xfs_dir2_block.c */
33extern int xfs_dir2_block_addname(struct xfs_da_args *args);
34extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
35 xfs_off_t *offset, filldir_t filldir);
36extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
37extern int xfs_dir2_block_removename(struct xfs_da_args *args);
38extern int xfs_dir2_block_replace(struct xfs_da_args *args);
39extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
40 struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
41
42/* xfs_dir2_data.c */
43#ifdef DEBUG
44extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
45#else
46#define xfs_dir2_data_check(dp,bp)
47#endif
48extern struct xfs_dir2_data_free *
49xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
50 struct xfs_dir2_data_unused *dup, int *loghead);
51extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
52 struct xfs_dir2_data_hdr *hdr, int *loghead);
53extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
54 struct xfs_dabuf **bpp);
55extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
56 struct xfs_dir2_data_entry *dep);
57extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
58 struct xfs_dabuf *bp);
59extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
60 struct xfs_dir2_data_unused *dup);
61extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
62 xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
63 int *needlogp, int *needscanp);
64extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
65 struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset,
66 xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
67
68/* xfs_dir2_leaf.c */
69extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
70 struct xfs_dabuf *dbp);
71extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
72extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
73 struct xfs_dabuf *bp);
74extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
75 int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
76extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
77 size_t bufsize, xfs_off_t *offset, filldir_t filldir);
78extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
79 struct xfs_dabuf **bpp, int magic);
80extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
81 int first, int last);
82extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
83 struct xfs_dabuf *bp);
84extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
85extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
86extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
87extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
88 struct xfs_dabuf *lbp);
89extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
90 struct xfs_dabuf *lbp, xfs_dir2_db_t db);
91extern struct xfs_dir2_leaf_entry *
92xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact,
93 int lowstale, int highstale,
94 int *lfloglow, int *lfloghigh);
95extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
96
97/* xfs_dir2_node.c */
98extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
99 struct xfs_dabuf *lbp);
100extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
101extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
102 struct xfs_da_args *args, int *indexp,
103 struct xfs_da_state *state);
104extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
105 struct xfs_dabuf *leaf2_bp);
106extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
107 struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk);
108extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
109extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
110 struct xfs_da_state_blk *drop_blk,
111 struct xfs_da_state_blk *save_blk);
112extern int xfs_dir2_node_addname(struct xfs_da_args *args);
113extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
114extern int xfs_dir2_node_removename(struct xfs_da_args *args);
115extern int xfs_dir2_node_replace(struct xfs_da_args *args);
116extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
117 int *rvalp);
118
119/* xfs_dir2_sf.c */
120extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp);
121extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp,
122 struct xfs_dir2_sf_entry *sfep);
123extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
124 struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp);
125extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
126 int size, xfs_dir2_sf_hdr_t *sfhp);
127extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
128extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
129extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent,
130 xfs_off_t *offset, filldir_t filldir);
131extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
132extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
133extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
134
135#endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index b1bae6b1eed9..79d05e84e296 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -23,18 +23,16 @@
23#include "xfs_trans.h" 23#include "xfs_trans.h"
24#include "xfs_sb.h" 24#include "xfs_sb.h"
25#include "xfs_ag.h" 25#include "xfs_ag.h"
26#include "xfs_dir2.h"
27#include "xfs_mount.h" 26#include "xfs_mount.h"
28#include "xfs_da_btree.h" 27#include "xfs_da_btree.h"
29#include "xfs_bmap_btree.h" 28#include "xfs_bmap_btree.h"
30#include "xfs_dir2_sf.h"
31#include "xfs_dinode.h" 29#include "xfs_dinode.h"
32#include "xfs_inode.h" 30#include "xfs_inode.h"
33#include "xfs_inode_item.h" 31#include "xfs_inode_item.h"
34#include "xfs_error.h" 32#include "xfs_error.h"
35#include "xfs_dir2_data.h" 33#include "xfs_dir2.h"
36#include "xfs_dir2_leaf.h" 34#include "xfs_dir2_format.h"
37#include "xfs_dir2_block.h" 35#include "xfs_dir2_priv.h"
38#include "xfs_trace.h" 36#include "xfs_trace.h"
39 37
40/* 38/*
@@ -60,6 +58,82 @@ static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
60#endif /* XFS_BIG_INUMS */ 58#endif /* XFS_BIG_INUMS */
61 59
62/* 60/*
61 * Inode numbers in short-form directories can come in two versions,
62 * either 4 bytes or 8 bytes wide. These helpers deal with the
63 * two forms transparently by looking at the headers i8count field.
64 *
65 * For 64-bit inode number the most significant byte must be zero.
66 */
67static xfs_ino_t
68xfs_dir2_sf_get_ino(
69 struct xfs_dir2_sf_hdr *hdr,
70 xfs_dir2_inou_t *from)
71{
72 if (hdr->i8count)
73 return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL;
74 else
75 return get_unaligned_be32(&from->i4.i);
76}
77
78static void
79xfs_dir2_sf_put_ino(
80 struct xfs_dir2_sf_hdr *hdr,
81 xfs_dir2_inou_t *to,
82 xfs_ino_t ino)
83{
84 ASSERT((ino & 0xff00000000000000ULL) == 0);
85
86 if (hdr->i8count)
87 put_unaligned_be64(ino, &to->i8.i);
88 else
89 put_unaligned_be32(ino, &to->i4.i);
90}
91
92xfs_ino_t
93xfs_dir2_sf_get_parent_ino(
94 struct xfs_dir2_sf_hdr *hdr)
95{
96 return xfs_dir2_sf_get_ino(hdr, &hdr->parent);
97}
98
99static void
100xfs_dir2_sf_put_parent_ino(
101 struct xfs_dir2_sf_hdr *hdr,
102 xfs_ino_t ino)
103{
104 xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino);
105}
106
107/*
108 * In short-form directory entries the inode numbers are stored at variable
109 * offset behind the entry name. The inode numbers may only be accessed
110 * through the helpers below.
111 */
112static xfs_dir2_inou_t *
113xfs_dir2_sfe_inop(
114 struct xfs_dir2_sf_entry *sfep)
115{
116 return (xfs_dir2_inou_t *)&sfep->name[sfep->namelen];
117}
118
119xfs_ino_t
120xfs_dir2_sfe_get_ino(
121 struct xfs_dir2_sf_hdr *hdr,
122 struct xfs_dir2_sf_entry *sfep)
123{
124 return xfs_dir2_sf_get_ino(hdr, xfs_dir2_sfe_inop(sfep));
125}
126
127static void
128xfs_dir2_sfe_put_ino(
129 struct xfs_dir2_sf_hdr *hdr,
130 struct xfs_dir2_sf_entry *sfep,
131 xfs_ino_t ino)
132{
133 xfs_dir2_sf_put_ino(hdr, xfs_dir2_sfe_inop(sfep), ino);
134}
135
136/*
63 * Given a block directory (dp/block), calculate its size as a shortform (sf) 137 * Given a block directory (dp/block), calculate its size as a shortform (sf)
64 * directory and a header for the sf directory, if it will fit it the 138 * directory and a header for the sf directory, if it will fit it the
65 * space currently present in the inode. If it won't fit, the output 139 * space currently present in the inode. If it won't fit, the output
@@ -68,7 +142,7 @@ static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
68int /* size for sf form */ 142int /* size for sf form */
69xfs_dir2_block_sfsize( 143xfs_dir2_block_sfsize(
70 xfs_inode_t *dp, /* incore inode pointer */ 144 xfs_inode_t *dp, /* incore inode pointer */
71 xfs_dir2_block_t *block, /* block directory data */ 145 xfs_dir2_data_hdr_t *hdr, /* block directory data */
72 xfs_dir2_sf_hdr_t *sfhp) /* output: header for sf form */ 146 xfs_dir2_sf_hdr_t *sfhp) /* output: header for sf form */
73{ 147{
74 xfs_dir2_dataptr_t addr; /* data entry address */ 148 xfs_dir2_dataptr_t addr; /* data entry address */
@@ -88,7 +162,7 @@ xfs_dir2_block_sfsize(
88 mp = dp->i_mount; 162 mp = dp->i_mount;
89 163
90 count = i8count = namelen = 0; 164 count = i8count = namelen = 0;
91 btp = xfs_dir2_block_tail_p(mp, block); 165 btp = xfs_dir2_block_tail_p(mp, hdr);
92 blp = xfs_dir2_block_leaf_p(btp); 166 blp = xfs_dir2_block_leaf_p(btp);
93 167
94 /* 168 /*
@@ -101,7 +175,7 @@ xfs_dir2_block_sfsize(
101 * Calculate the pointer to the entry at hand. 175 * Calculate the pointer to the entry at hand.
102 */ 176 */
103 dep = (xfs_dir2_data_entry_t *) 177 dep = (xfs_dir2_data_entry_t *)
104 ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); 178 ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
105 /* 179 /*
106 * Detect . and .., so we can special-case them. 180 * Detect . and .., so we can special-case them.
107 * . is not included in sf directories. 181 * . is not included in sf directories.
@@ -138,7 +212,7 @@ xfs_dir2_block_sfsize(
138 */ 212 */
139 sfhp->count = count; 213 sfhp->count = count;
140 sfhp->i8count = i8count; 214 sfhp->i8count = i8count;
141 xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent); 215 xfs_dir2_sf_put_parent_ino(sfhp, parent);
142 return size; 216 return size;
143} 217}
144 218
@@ -153,7 +227,7 @@ xfs_dir2_block_to_sf(
153 int size, /* shortform directory size */ 227 int size, /* shortform directory size */
154 xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */ 228 xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */
155{ 229{
156 xfs_dir2_block_t *block; /* block structure */ 230 xfs_dir2_data_hdr_t *hdr; /* block header */
157 xfs_dir2_block_tail_t *btp; /* block tail pointer */ 231 xfs_dir2_block_tail_t *btp; /* block tail pointer */
158 xfs_dir2_data_entry_t *dep; /* data entry pointer */ 232 xfs_dir2_data_entry_t *dep; /* data entry pointer */
159 xfs_inode_t *dp; /* incore directory inode */ 233 xfs_inode_t *dp; /* incore directory inode */
@@ -164,8 +238,7 @@ xfs_dir2_block_to_sf(
164 xfs_mount_t *mp; /* filesystem mount point */ 238 xfs_mount_t *mp; /* filesystem mount point */
165 char *ptr; /* current data pointer */ 239 char *ptr; /* current data pointer */
166 xfs_dir2_sf_entry_t *sfep; /* shortform entry */ 240 xfs_dir2_sf_entry_t *sfep; /* shortform entry */
167 xfs_dir2_sf_t *sfp; /* shortform structure */ 241 xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */
168 xfs_ino_t temp;
169 242
170 trace_xfs_dir2_block_to_sf(args); 243 trace_xfs_dir2_block_to_sf(args);
171 244
@@ -176,13 +249,14 @@ xfs_dir2_block_to_sf(
176 * Make a copy of the block data, so we can shrink the inode 249 * Make a copy of the block data, so we can shrink the inode
177 * and add local data. 250 * and add local data.
178 */ 251 */
179 block = kmem_alloc(mp->m_dirblksize, KM_SLEEP); 252 hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
180 memcpy(block, bp->data, mp->m_dirblksize); 253 memcpy(hdr, bp->data, mp->m_dirblksize);
181 logflags = XFS_ILOG_CORE; 254 logflags = XFS_ILOG_CORE;
182 if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) { 255 if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) {
183 ASSERT(error != ENOSPC); 256 ASSERT(error != ENOSPC);
184 goto out; 257 goto out;
185 } 258 }
259
186 /* 260 /*
187 * The buffer is now unconditionally gone, whether 261 * The buffer is now unconditionally gone, whether
188 * xfs_dir2_shrink_inode worked or not. 262 * xfs_dir2_shrink_inode worked or not.
@@ -198,14 +272,14 @@ xfs_dir2_block_to_sf(
198 /* 272 /*
199 * Copy the header into the newly allocate local space. 273 * Copy the header into the newly allocate local space.
200 */ 274 */
201 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 275 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
202 memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); 276 memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
203 dp->i_d.di_size = size; 277 dp->i_d.di_size = size;
204 /* 278 /*
205 * Set up to loop over the block's entries. 279 * Set up to loop over the block's entries.
206 */ 280 */
207 btp = xfs_dir2_block_tail_p(mp, block); 281 btp = xfs_dir2_block_tail_p(mp, hdr);
208 ptr = (char *)block->u; 282 ptr = (char *)(hdr + 1);
209 endptr = (char *)xfs_dir2_block_leaf_p(btp); 283 endptr = (char *)xfs_dir2_block_leaf_p(btp);
210 sfep = xfs_dir2_sf_firstentry(sfp); 284 sfep = xfs_dir2_sf_firstentry(sfp);
211 /* 285 /*
@@ -233,7 +307,7 @@ xfs_dir2_block_to_sf(
233 else if (dep->namelen == 2 && 307 else if (dep->namelen == 2 &&
234 dep->name[0] == '.' && dep->name[1] == '.') 308 dep->name[0] == '.' && dep->name[1] == '.')
235 ASSERT(be64_to_cpu(dep->inumber) == 309 ASSERT(be64_to_cpu(dep->inumber) ==
236 xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent)); 310 xfs_dir2_sf_get_parent_ino(sfp));
237 /* 311 /*
238 * Normal entry, copy it into shortform. 312 * Normal entry, copy it into shortform.
239 */ 313 */
@@ -241,11 +315,11 @@ xfs_dir2_block_to_sf(
241 sfep->namelen = dep->namelen; 315 sfep->namelen = dep->namelen;
242 xfs_dir2_sf_put_offset(sfep, 316 xfs_dir2_sf_put_offset(sfep,
243 (xfs_dir2_data_aoff_t) 317 (xfs_dir2_data_aoff_t)
244 ((char *)dep - (char *)block)); 318 ((char *)dep - (char *)hdr));
245 memcpy(sfep->name, dep->name, dep->namelen); 319 memcpy(sfep->name, dep->name, dep->namelen);
246 temp = be64_to_cpu(dep->inumber); 320 xfs_dir2_sfe_put_ino(sfp, sfep,
247 xfs_dir2_sf_put_inumber(sfp, &temp, 321 be64_to_cpu(dep->inumber));
248 xfs_dir2_sf_inumberp(sfep)); 322
249 sfep = xfs_dir2_sf_nextentry(sfp, sfep); 323 sfep = xfs_dir2_sf_nextentry(sfp, sfep);
250 } 324 }
251 ptr += xfs_dir2_data_entsize(dep->namelen); 325 ptr += xfs_dir2_data_entsize(dep->namelen);
@@ -254,7 +328,7 @@ xfs_dir2_block_to_sf(
254 xfs_dir2_sf_check(args); 328 xfs_dir2_sf_check(args);
255out: 329out:
256 xfs_trans_log_inode(args->trans, dp, logflags); 330 xfs_trans_log_inode(args->trans, dp, logflags);
257 kmem_free(block); 331 kmem_free(hdr);
258 return error; 332 return error;
259} 333}
260 334
@@ -277,7 +351,7 @@ xfs_dir2_sf_addname(
277 xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */ 351 xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */
278 int old_isize; /* di_size before adding name */ 352 int old_isize; /* di_size before adding name */
279 int pick; /* which algorithm to use */ 353 int pick; /* which algorithm to use */
280 xfs_dir2_sf_t *sfp; /* shortform structure */ 354 xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
281 xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */ 355 xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */
282 356
283 trace_xfs_dir2_sf_addname(args); 357 trace_xfs_dir2_sf_addname(args);
@@ -294,19 +368,19 @@ xfs_dir2_sf_addname(
294 } 368 }
295 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 369 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
296 ASSERT(dp->i_df.if_u1.if_data != NULL); 370 ASSERT(dp->i_df.if_u1.if_data != NULL);
297 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 371 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
298 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); 372 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
299 /* 373 /*
300 * Compute entry (and change in) size. 374 * Compute entry (and change in) size.
301 */ 375 */
302 add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen); 376 add_entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
303 incr_isize = add_entsize; 377 incr_isize = add_entsize;
304 objchange = 0; 378 objchange = 0;
305#if XFS_BIG_INUMS 379#if XFS_BIG_INUMS
306 /* 380 /*
307 * Do we have to change to 8 byte inodes? 381 * Do we have to change to 8 byte inodes?
308 */ 382 */
309 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) { 383 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
310 /* 384 /*
311 * Yes, adjust the entry size and the total size. 385 * Yes, adjust the entry size and the total size.
312 */ 386 */
@@ -314,7 +388,7 @@ xfs_dir2_sf_addname(
314 (uint)sizeof(xfs_dir2_ino8_t) - 388 (uint)sizeof(xfs_dir2_ino8_t) -
315 (uint)sizeof(xfs_dir2_ino4_t); 389 (uint)sizeof(xfs_dir2_ino4_t);
316 incr_isize += 390 incr_isize +=
317 (sfp->hdr.count + 2) * 391 (sfp->count + 2) *
318 ((uint)sizeof(xfs_dir2_ino8_t) - 392 ((uint)sizeof(xfs_dir2_ino8_t) -
319 (uint)sizeof(xfs_dir2_ino4_t)); 393 (uint)sizeof(xfs_dir2_ino4_t));
320 objchange = 1; 394 objchange = 1;
@@ -384,21 +458,21 @@ xfs_dir2_sf_addname_easy(
384{ 458{
385 int byteoff; /* byte offset in sf dir */ 459 int byteoff; /* byte offset in sf dir */
386 xfs_inode_t *dp; /* incore directory inode */ 460 xfs_inode_t *dp; /* incore directory inode */
387 xfs_dir2_sf_t *sfp; /* shortform structure */ 461 xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
388 462
389 dp = args->dp; 463 dp = args->dp;
390 464
391 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 465 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
392 byteoff = (int)((char *)sfep - (char *)sfp); 466 byteoff = (int)((char *)sfep - (char *)sfp);
393 /* 467 /*
394 * Grow the in-inode space. 468 * Grow the in-inode space.
395 */ 469 */
396 xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen), 470 xfs_idata_realloc(dp, xfs_dir2_sf_entsize(sfp, args->namelen),
397 XFS_DATA_FORK); 471 XFS_DATA_FORK);
398 /* 472 /*
399 * Need to set up again due to realloc of the inode data. 473 * Need to set up again due to realloc of the inode data.
400 */ 474 */
401 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 475 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
402 sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff); 476 sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff);
403 /* 477 /*
404 * Fill in the new entry. 478 * Fill in the new entry.
@@ -406,15 +480,14 @@ xfs_dir2_sf_addname_easy(
406 sfep->namelen = args->namelen; 480 sfep->namelen = args->namelen;
407 xfs_dir2_sf_put_offset(sfep, offset); 481 xfs_dir2_sf_put_offset(sfep, offset);
408 memcpy(sfep->name, args->name, sfep->namelen); 482 memcpy(sfep->name, args->name, sfep->namelen);
409 xfs_dir2_sf_put_inumber(sfp, &args->inumber, 483 xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
410 xfs_dir2_sf_inumberp(sfep));
411 /* 484 /*
412 * Update the header and inode. 485 * Update the header and inode.
413 */ 486 */
414 sfp->hdr.count++; 487 sfp->count++;
415#if XFS_BIG_INUMS 488#if XFS_BIG_INUMS
416 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) 489 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM)
417 sfp->hdr.i8count++; 490 sfp->i8count++;
418#endif 491#endif
419 dp->i_d.di_size = new_isize; 492 dp->i_d.di_size = new_isize;
420 xfs_dir2_sf_check(args); 493 xfs_dir2_sf_check(args);
@@ -444,19 +517,19 @@ xfs_dir2_sf_addname_hard(
444 xfs_dir2_data_aoff_t offset; /* current offset value */ 517 xfs_dir2_data_aoff_t offset; /* current offset value */
445 int old_isize; /* previous di_size */ 518 int old_isize; /* previous di_size */
446 xfs_dir2_sf_entry_t *oldsfep; /* entry in original dir */ 519 xfs_dir2_sf_entry_t *oldsfep; /* entry in original dir */
447 xfs_dir2_sf_t *oldsfp; /* original shortform dir */ 520 xfs_dir2_sf_hdr_t *oldsfp; /* original shortform dir */
448 xfs_dir2_sf_entry_t *sfep; /* entry in new dir */ 521 xfs_dir2_sf_entry_t *sfep; /* entry in new dir */
449 xfs_dir2_sf_t *sfp; /* new shortform dir */ 522 xfs_dir2_sf_hdr_t *sfp; /* new shortform dir */
450 523
451 /* 524 /*
452 * Copy the old directory to the stack buffer. 525 * Copy the old directory to the stack buffer.
453 */ 526 */
454 dp = args->dp; 527 dp = args->dp;
455 528
456 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 529 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
457 old_isize = (int)dp->i_d.di_size; 530 old_isize = (int)dp->i_d.di_size;
458 buf = kmem_alloc(old_isize, KM_SLEEP); 531 buf = kmem_alloc(old_isize, KM_SLEEP);
459 oldsfp = (xfs_dir2_sf_t *)buf; 532 oldsfp = (xfs_dir2_sf_hdr_t *)buf;
460 memcpy(oldsfp, sfp, old_isize); 533 memcpy(oldsfp, sfp, old_isize);
461 /* 534 /*
462 * Loop over the old directory finding the place we're going 535 * Loop over the old directory finding the place we're going
@@ -485,7 +558,7 @@ xfs_dir2_sf_addname_hard(
485 /* 558 /*
486 * Reset the pointer since the buffer was reallocated. 559 * Reset the pointer since the buffer was reallocated.
487 */ 560 */
488 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 561 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
489 /* 562 /*
490 * Copy the first part of the directory, including the header. 563 * Copy the first part of the directory, including the header.
491 */ 564 */
@@ -498,12 +571,11 @@ xfs_dir2_sf_addname_hard(
498 sfep->namelen = args->namelen; 571 sfep->namelen = args->namelen;
499 xfs_dir2_sf_put_offset(sfep, offset); 572 xfs_dir2_sf_put_offset(sfep, offset);
500 memcpy(sfep->name, args->name, sfep->namelen); 573 memcpy(sfep->name, args->name, sfep->namelen);
501 xfs_dir2_sf_put_inumber(sfp, &args->inumber, 574 xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
502 xfs_dir2_sf_inumberp(sfep)); 575 sfp->count++;
503 sfp->hdr.count++;
504#if XFS_BIG_INUMS 576#if XFS_BIG_INUMS
505 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) 577 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
506 sfp->hdr.i8count++; 578 sfp->i8count++;
507#endif 579#endif
508 /* 580 /*
509 * If there's more left to copy, do that. 581 * If there's more left to copy, do that.
@@ -537,14 +609,14 @@ xfs_dir2_sf_addname_pick(
537 xfs_mount_t *mp; /* filesystem mount point */ 609 xfs_mount_t *mp; /* filesystem mount point */
538 xfs_dir2_data_aoff_t offset; /* data block offset */ 610 xfs_dir2_data_aoff_t offset; /* data block offset */
539 xfs_dir2_sf_entry_t *sfep; /* shortform entry */ 611 xfs_dir2_sf_entry_t *sfep; /* shortform entry */
540 xfs_dir2_sf_t *sfp; /* shortform structure */ 612 xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
541 int size; /* entry's data size */ 613 int size; /* entry's data size */
542 int used; /* data bytes used */ 614 int used; /* data bytes used */
543 615
544 dp = args->dp; 616 dp = args->dp;
545 mp = dp->i_mount; 617 mp = dp->i_mount;
546 618
547 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 619 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
548 size = xfs_dir2_data_entsize(args->namelen); 620 size = xfs_dir2_data_entsize(args->namelen);
549 offset = XFS_DIR2_DATA_FIRST_OFFSET; 621 offset = XFS_DIR2_DATA_FIRST_OFFSET;
550 sfep = xfs_dir2_sf_firstentry(sfp); 622 sfep = xfs_dir2_sf_firstentry(sfp);
@@ -554,7 +626,7 @@ xfs_dir2_sf_addname_pick(
554 * Keep track of data offset and whether we've seen a place 626 * Keep track of data offset and whether we've seen a place
555 * to insert the new entry. 627 * to insert the new entry.
556 */ 628 */
557 for (i = 0; i < sfp->hdr.count; i++) { 629 for (i = 0; i < sfp->count; i++) {
558 if (!holefit) 630 if (!holefit)
559 holefit = offset + size <= xfs_dir2_sf_get_offset(sfep); 631 holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
560 offset = xfs_dir2_sf_get_offset(sfep) + 632 offset = xfs_dir2_sf_get_offset(sfep) +
@@ -566,7 +638,7 @@ xfs_dir2_sf_addname_pick(
566 * was a data block (block form directory). 638 * was a data block (block form directory).
567 */ 639 */
568 used = offset + 640 used = offset +
569 (sfp->hdr.count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) + 641 (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
570 (uint)sizeof(xfs_dir2_block_tail_t); 642 (uint)sizeof(xfs_dir2_block_tail_t);
571 /* 643 /*
572 * If it won't fit in a block form then we can't insert it, 644 * If it won't fit in a block form then we can't insert it,
@@ -612,30 +684,30 @@ xfs_dir2_sf_check(
612 xfs_ino_t ino; /* entry inode number */ 684 xfs_ino_t ino; /* entry inode number */
613 int offset; /* data offset */ 685 int offset; /* data offset */
614 xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */ 686 xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */
615 xfs_dir2_sf_t *sfp; /* shortform structure */ 687 xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
616 688
617 dp = args->dp; 689 dp = args->dp;
618 690
619 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 691 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
620 offset = XFS_DIR2_DATA_FIRST_OFFSET; 692 offset = XFS_DIR2_DATA_FIRST_OFFSET;
621 ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); 693 ino = xfs_dir2_sf_get_parent_ino(sfp);
622 i8count = ino > XFS_DIR2_MAX_SHORT_INUM; 694 i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
623 695
624 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); 696 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
625 i < sfp->hdr.count; 697 i < sfp->count;
626 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { 698 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
627 ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset); 699 ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
628 ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); 700 ino = xfs_dir2_sfe_get_ino(sfp, sfep);
629 i8count += ino > XFS_DIR2_MAX_SHORT_INUM; 701 i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
630 offset = 702 offset =
631 xfs_dir2_sf_get_offset(sfep) + 703 xfs_dir2_sf_get_offset(sfep) +
632 xfs_dir2_data_entsize(sfep->namelen); 704 xfs_dir2_data_entsize(sfep->namelen);
633 } 705 }
634 ASSERT(i8count == sfp->hdr.i8count); 706 ASSERT(i8count == sfp->i8count);
635 ASSERT(XFS_BIG_INUMS || i8count == 0); 707 ASSERT(XFS_BIG_INUMS || i8count == 0);
636 ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size); 708 ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
637 ASSERT(offset + 709 ASSERT(offset +
638 (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + 710 (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
639 (uint)sizeof(xfs_dir2_block_tail_t) <= 711 (uint)sizeof(xfs_dir2_block_tail_t) <=
640 dp->i_mount->m_dirblksize); 712 dp->i_mount->m_dirblksize);
641} 713}
@@ -651,7 +723,7 @@ xfs_dir2_sf_create(
651{ 723{
652 xfs_inode_t *dp; /* incore directory inode */ 724 xfs_inode_t *dp; /* incore directory inode */
653 int i8count; /* parent inode is an 8-byte number */ 725 int i8count; /* parent inode is an 8-byte number */
654 xfs_dir2_sf_t *sfp; /* shortform structure */ 726 xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
655 int size; /* directory size */ 727 int size; /* directory size */
656 728
657 trace_xfs_dir2_sf_create(args); 729 trace_xfs_dir2_sf_create(args);
@@ -681,13 +753,13 @@ xfs_dir2_sf_create(
681 /* 753 /*
682 * Fill in the header, 754 * Fill in the header,
683 */ 755 */
684 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 756 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
685 sfp->hdr.i8count = i8count; 757 sfp->i8count = i8count;
686 /* 758 /*
687 * Now can put in the inode number, since i8count is set. 759 * Now can put in the inode number, since i8count is set.
688 */ 760 */
689 xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent); 761 xfs_dir2_sf_put_parent_ino(sfp, pino);
690 sfp->hdr.count = 0; 762 sfp->count = 0;
691 dp->i_d.di_size = size; 763 dp->i_d.di_size = size;
692 xfs_dir2_sf_check(args); 764 xfs_dir2_sf_check(args);
693 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); 765 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
@@ -705,7 +777,7 @@ xfs_dir2_sf_getdents(
705 xfs_mount_t *mp; /* filesystem mount point */ 777 xfs_mount_t *mp; /* filesystem mount point */
706 xfs_dir2_dataptr_t off; /* current entry's offset */ 778 xfs_dir2_dataptr_t off; /* current entry's offset */
707 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ 779 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
708 xfs_dir2_sf_t *sfp; /* shortform structure */ 780 xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
709 xfs_dir2_dataptr_t dot_offset; 781 xfs_dir2_dataptr_t dot_offset;
710 xfs_dir2_dataptr_t dotdot_offset; 782 xfs_dir2_dataptr_t dotdot_offset;
711 xfs_ino_t ino; 783 xfs_ino_t ino;
@@ -724,9 +796,9 @@ xfs_dir2_sf_getdents(
724 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 796 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
725 ASSERT(dp->i_df.if_u1.if_data != NULL); 797 ASSERT(dp->i_df.if_u1.if_data != NULL);
726 798
727 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 799 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
728 800
729 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); 801 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
730 802
731 /* 803 /*
732 * If the block number in the offset is out of range, we're done. 804 * If the block number in the offset is out of range, we're done.
@@ -759,7 +831,7 @@ xfs_dir2_sf_getdents(
759 * Put .. entry unless we're starting past it. 831 * Put .. entry unless we're starting past it.
760 */ 832 */
761 if (*offset <= dotdot_offset) { 833 if (*offset <= dotdot_offset) {
762 ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); 834 ino = xfs_dir2_sf_get_parent_ino(sfp);
763 if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) { 835 if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) {
764 *offset = dotdot_offset & 0x7fffffff; 836 *offset = dotdot_offset & 0x7fffffff;
765 return 0; 837 return 0;
@@ -770,7 +842,7 @@ xfs_dir2_sf_getdents(
770 * Loop while there are more entries and put'ing works. 842 * Loop while there are more entries and put'ing works.
771 */ 843 */
772 sfep = xfs_dir2_sf_firstentry(sfp); 844 sfep = xfs_dir2_sf_firstentry(sfp);
773 for (i = 0; i < sfp->hdr.count; i++) { 845 for (i = 0; i < sfp->count; i++) {
774 off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, 846 off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
775 xfs_dir2_sf_get_offset(sfep)); 847 xfs_dir2_sf_get_offset(sfep));
776 848
@@ -779,7 +851,7 @@ xfs_dir2_sf_getdents(
779 continue; 851 continue;
780 } 852 }
781 853
782 ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); 854 ino = xfs_dir2_sfe_get_ino(sfp, sfep);
783 if (filldir(dirent, (char *)sfep->name, sfep->namelen, 855 if (filldir(dirent, (char *)sfep->name, sfep->namelen,
784 off & 0x7fffffff, ino, DT_UNKNOWN)) { 856 off & 0x7fffffff, ino, DT_UNKNOWN)) {
785 *offset = off & 0x7fffffff; 857 *offset = off & 0x7fffffff;
@@ -805,7 +877,7 @@ xfs_dir2_sf_lookup(
805 int i; /* entry index */ 877 int i; /* entry index */
806 int error; 878 int error;
807 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ 879 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
808 xfs_dir2_sf_t *sfp; /* shortform structure */ 880 xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
809 enum xfs_dacmp cmp; /* comparison result */ 881 enum xfs_dacmp cmp; /* comparison result */
810 xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */ 882 xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */
811 883
@@ -824,8 +896,8 @@ xfs_dir2_sf_lookup(
824 } 896 }
825 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 897 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
826 ASSERT(dp->i_df.if_u1.if_data != NULL); 898 ASSERT(dp->i_df.if_u1.if_data != NULL);
827 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 899 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
828 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); 900 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
829 /* 901 /*
830 * Special case for . 902 * Special case for .
831 */ 903 */
@@ -839,7 +911,7 @@ xfs_dir2_sf_lookup(
839 */ 911 */
840 if (args->namelen == 2 && 912 if (args->namelen == 2 &&
841 args->name[0] == '.' && args->name[1] == '.') { 913 args->name[0] == '.' && args->name[1] == '.') {
842 args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); 914 args->inumber = xfs_dir2_sf_get_parent_ino(sfp);
843 args->cmpresult = XFS_CMP_EXACT; 915 args->cmpresult = XFS_CMP_EXACT;
844 return XFS_ERROR(EEXIST); 916 return XFS_ERROR(EEXIST);
845 } 917 }
@@ -847,7 +919,7 @@ xfs_dir2_sf_lookup(
847 * Loop over all the entries trying to match ours. 919 * Loop over all the entries trying to match ours.
848 */ 920 */
849 ci_sfep = NULL; 921 ci_sfep = NULL;
850 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; 922 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
851 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { 923 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
852 /* 924 /*
853 * Compare name and if it's an exact match, return the inode 925 * Compare name and if it's an exact match, return the inode
@@ -858,8 +930,7 @@ xfs_dir2_sf_lookup(
858 sfep->namelen); 930 sfep->namelen);
859 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { 931 if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
860 args->cmpresult = cmp; 932 args->cmpresult = cmp;
861 args->inumber = xfs_dir2_sf_get_inumber(sfp, 933 args->inumber = xfs_dir2_sfe_get_ino(sfp, sfep);
862 xfs_dir2_sf_inumberp(sfep));
863 if (cmp == XFS_CMP_EXACT) 934 if (cmp == XFS_CMP_EXACT)
864 return XFS_ERROR(EEXIST); 935 return XFS_ERROR(EEXIST);
865 ci_sfep = sfep; 936 ci_sfep = sfep;
@@ -891,7 +962,7 @@ xfs_dir2_sf_removename(
891 int newsize; /* new inode size */ 962 int newsize; /* new inode size */
892 int oldsize; /* old inode size */ 963 int oldsize; /* old inode size */
893 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ 964 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
894 xfs_dir2_sf_t *sfp; /* shortform structure */ 965 xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
895 966
896 trace_xfs_dir2_sf_removename(args); 967 trace_xfs_dir2_sf_removename(args);
897 968
@@ -908,32 +979,31 @@ xfs_dir2_sf_removename(
908 } 979 }
909 ASSERT(dp->i_df.if_bytes == oldsize); 980 ASSERT(dp->i_df.if_bytes == oldsize);
910 ASSERT(dp->i_df.if_u1.if_data != NULL); 981 ASSERT(dp->i_df.if_u1.if_data != NULL);
911 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 982 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
912 ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); 983 ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count));
913 /* 984 /*
914 * Loop over the old directory entries. 985 * Loop over the old directory entries.
915 * Find the one we're deleting. 986 * Find the one we're deleting.
916 */ 987 */
917 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; 988 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
918 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { 989 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
919 if (xfs_da_compname(args, sfep->name, sfep->namelen) == 990 if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
920 XFS_CMP_EXACT) { 991 XFS_CMP_EXACT) {
921 ASSERT(xfs_dir2_sf_get_inumber(sfp, 992 ASSERT(xfs_dir2_sfe_get_ino(sfp, sfep) ==
922 xfs_dir2_sf_inumberp(sfep)) == 993 args->inumber);
923 args->inumber);
924 break; 994 break;
925 } 995 }
926 } 996 }
927 /* 997 /*
928 * Didn't find it. 998 * Didn't find it.
929 */ 999 */
930 if (i == sfp->hdr.count) 1000 if (i == sfp->count)
931 return XFS_ERROR(ENOENT); 1001 return XFS_ERROR(ENOENT);
932 /* 1002 /*
933 * Calculate sizes. 1003 * Calculate sizes.
934 */ 1004 */
935 byteoff = (int)((char *)sfep - (char *)sfp); 1005 byteoff = (int)((char *)sfep - (char *)sfp);
936 entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen); 1006 entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
937 newsize = oldsize - entsize; 1007 newsize = oldsize - entsize;
938 /* 1008 /*
939 * Copy the part if any after the removed entry, sliding it down. 1009 * Copy the part if any after the removed entry, sliding it down.
@@ -944,22 +1014,22 @@ xfs_dir2_sf_removename(
944 /* 1014 /*
945 * Fix up the header and file size. 1015 * Fix up the header and file size.
946 */ 1016 */
947 sfp->hdr.count--; 1017 sfp->count--;
948 dp->i_d.di_size = newsize; 1018 dp->i_d.di_size = newsize;
949 /* 1019 /*
950 * Reallocate, making it smaller. 1020 * Reallocate, making it smaller.
951 */ 1021 */
952 xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK); 1022 xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
953 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 1023 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
954#if XFS_BIG_INUMS 1024#if XFS_BIG_INUMS
955 /* 1025 /*
956 * Are we changing inode number size? 1026 * Are we changing inode number size?
957 */ 1027 */
958 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) { 1028 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) {
959 if (sfp->hdr.i8count == 1) 1029 if (sfp->i8count == 1)
960 xfs_dir2_sf_toino4(args); 1030 xfs_dir2_sf_toino4(args);
961 else 1031 else
962 sfp->hdr.i8count--; 1032 sfp->i8count--;
963 } 1033 }
964#endif 1034#endif
965 xfs_dir2_sf_check(args); 1035 xfs_dir2_sf_check(args);
@@ -983,7 +1053,7 @@ xfs_dir2_sf_replace(
983 int i8elevated; /* sf_toino8 set i8count=1 */ 1053 int i8elevated; /* sf_toino8 set i8count=1 */
984#endif 1054#endif
985 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ 1055 xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
986 xfs_dir2_sf_t *sfp; /* shortform structure */ 1056 xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
987 1057
988 trace_xfs_dir2_sf_replace(args); 1058 trace_xfs_dir2_sf_replace(args);
989 1059
@@ -999,19 +1069,19 @@ xfs_dir2_sf_replace(
999 } 1069 }
1000 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size); 1070 ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
1001 ASSERT(dp->i_df.if_u1.if_data != NULL); 1071 ASSERT(dp->i_df.if_u1.if_data != NULL);
1002 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 1072 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
1003 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count)); 1073 ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
1004#if XFS_BIG_INUMS 1074#if XFS_BIG_INUMS
1005 /* 1075 /*
1006 * New inode number is large, and need to convert to 8-byte inodes. 1076 * New inode number is large, and need to convert to 8-byte inodes.
1007 */ 1077 */
1008 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) { 1078 if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
1009 int error; /* error return value */ 1079 int error; /* error return value */
1010 int newsize; /* new inode size */ 1080 int newsize; /* new inode size */
1011 1081
1012 newsize = 1082 newsize =
1013 dp->i_df.if_bytes + 1083 dp->i_df.if_bytes +
1014 (sfp->hdr.count + 1) * 1084 (sfp->count + 1) *
1015 ((uint)sizeof(xfs_dir2_ino8_t) - 1085 ((uint)sizeof(xfs_dir2_ino8_t) -
1016 (uint)sizeof(xfs_dir2_ino4_t)); 1086 (uint)sizeof(xfs_dir2_ino4_t));
1017 /* 1087 /*
@@ -1029,7 +1099,7 @@ xfs_dir2_sf_replace(
1029 */ 1099 */
1030 xfs_dir2_sf_toino8(args); 1100 xfs_dir2_sf_toino8(args);
1031 i8elevated = 1; 1101 i8elevated = 1;
1032 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 1102 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
1033 } else 1103 } else
1034 i8elevated = 0; 1104 i8elevated = 0;
1035#endif 1105#endif
@@ -1040,34 +1110,32 @@ xfs_dir2_sf_replace(
1040 if (args->namelen == 2 && 1110 if (args->namelen == 2 &&
1041 args->name[0] == '.' && args->name[1] == '.') { 1111 args->name[0] == '.' && args->name[1] == '.') {
1042#if XFS_BIG_INUMS || defined(DEBUG) 1112#if XFS_BIG_INUMS || defined(DEBUG)
1043 ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); 1113 ino = xfs_dir2_sf_get_parent_ino(sfp);
1044 ASSERT(args->inumber != ino); 1114 ASSERT(args->inumber != ino);
1045#endif 1115#endif
1046 xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent); 1116 xfs_dir2_sf_put_parent_ino(sfp, args->inumber);
1047 } 1117 }
1048 /* 1118 /*
1049 * Normal entry, look for the name. 1119 * Normal entry, look for the name.
1050 */ 1120 */
1051 else { 1121 else {
1052 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); 1122 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
1053 i < sfp->hdr.count; 1123 i < sfp->count;
1054 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { 1124 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
1055 if (xfs_da_compname(args, sfep->name, sfep->namelen) == 1125 if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
1056 XFS_CMP_EXACT) { 1126 XFS_CMP_EXACT) {
1057#if XFS_BIG_INUMS || defined(DEBUG) 1127#if XFS_BIG_INUMS || defined(DEBUG)
1058 ino = xfs_dir2_sf_get_inumber(sfp, 1128 ino = xfs_dir2_sfe_get_ino(sfp, sfep);
1059 xfs_dir2_sf_inumberp(sfep));
1060 ASSERT(args->inumber != ino); 1129 ASSERT(args->inumber != ino);
1061#endif 1130#endif
1062 xfs_dir2_sf_put_inumber(sfp, &args->inumber, 1131 xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
1063 xfs_dir2_sf_inumberp(sfep));
1064 break; 1132 break;
1065 } 1133 }
1066 } 1134 }
1067 /* 1135 /*
1068 * Didn't find it. 1136 * Didn't find it.
1069 */ 1137 */
1070 if (i == sfp->hdr.count) { 1138 if (i == sfp->count) {
1071 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); 1139 ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
1072#if XFS_BIG_INUMS 1140#if XFS_BIG_INUMS
1073 if (i8elevated) 1141 if (i8elevated)
@@ -1085,10 +1153,10 @@ xfs_dir2_sf_replace(
1085 /* 1153 /*
1086 * And the old count was one, so need to convert to small. 1154 * And the old count was one, so need to convert to small.
1087 */ 1155 */
1088 if (sfp->hdr.i8count == 1) 1156 if (sfp->i8count == 1)
1089 xfs_dir2_sf_toino4(args); 1157 xfs_dir2_sf_toino4(args);
1090 else 1158 else
1091 sfp->hdr.i8count--; 1159 sfp->i8count--;
1092 } 1160 }
1093 /* 1161 /*
1094 * See if the old number was small, the new number is large. 1162 * See if the old number was small, the new number is large.
@@ -1099,9 +1167,9 @@ xfs_dir2_sf_replace(
1099 * add to the i8count unless we just converted to 8-byte 1167 * add to the i8count unless we just converted to 8-byte
1100 * inodes (which does an implied i8count = 1) 1168 * inodes (which does an implied i8count = 1)
1101 */ 1169 */
1102 ASSERT(sfp->hdr.i8count != 0); 1170 ASSERT(sfp->i8count != 0);
1103 if (!i8elevated) 1171 if (!i8elevated)
1104 sfp->hdr.i8count++; 1172 sfp->i8count++;
1105 } 1173 }
1106#endif 1174#endif
1107 xfs_dir2_sf_check(args); 1175 xfs_dir2_sf_check(args);
@@ -1121,13 +1189,12 @@ xfs_dir2_sf_toino4(
1121 char *buf; /* old dir's buffer */ 1189 char *buf; /* old dir's buffer */
1122 xfs_inode_t *dp; /* incore directory inode */ 1190 xfs_inode_t *dp; /* incore directory inode */
1123 int i; /* entry index */ 1191 int i; /* entry index */
1124 xfs_ino_t ino; /* entry inode number */
1125 int newsize; /* new inode size */ 1192 int newsize; /* new inode size */
1126 xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ 1193 xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */
1127 xfs_dir2_sf_t *oldsfp; /* old sf directory */ 1194 xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */
1128 int oldsize; /* old inode size */ 1195 int oldsize; /* old inode size */
1129 xfs_dir2_sf_entry_t *sfep; /* new sf entry */ 1196 xfs_dir2_sf_entry_t *sfep; /* new sf entry */
1130 xfs_dir2_sf_t *sfp; /* new sf directory */ 1197 xfs_dir2_sf_hdr_t *sfp; /* new sf directory */
1131 1198
1132 trace_xfs_dir2_sf_toino4(args); 1199 trace_xfs_dir2_sf_toino4(args);
1133 1200
@@ -1140,44 +1207,42 @@ xfs_dir2_sf_toino4(
1140 */ 1207 */
1141 oldsize = dp->i_df.if_bytes; 1208 oldsize = dp->i_df.if_bytes;
1142 buf = kmem_alloc(oldsize, KM_SLEEP); 1209 buf = kmem_alloc(oldsize, KM_SLEEP);
1143 oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 1210 oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
1144 ASSERT(oldsfp->hdr.i8count == 1); 1211 ASSERT(oldsfp->i8count == 1);
1145 memcpy(buf, oldsfp, oldsize); 1212 memcpy(buf, oldsfp, oldsize);
1146 /* 1213 /*
1147 * Compute the new inode size. 1214 * Compute the new inode size.
1148 */ 1215 */
1149 newsize = 1216 newsize =
1150 oldsize - 1217 oldsize -
1151 (oldsfp->hdr.count + 1) * 1218 (oldsfp->count + 1) *
1152 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); 1219 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
1153 xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); 1220 xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
1154 xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); 1221 xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
1155 /* 1222 /*
1156 * Reset our pointers, the data has moved. 1223 * Reset our pointers, the data has moved.
1157 */ 1224 */
1158 oldsfp = (xfs_dir2_sf_t *)buf; 1225 oldsfp = (xfs_dir2_sf_hdr_t *)buf;
1159 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 1226 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
1160 /* 1227 /*
1161 * Fill in the new header. 1228 * Fill in the new header.
1162 */ 1229 */
1163 sfp->hdr.count = oldsfp->hdr.count; 1230 sfp->count = oldsfp->count;
1164 sfp->hdr.i8count = 0; 1231 sfp->i8count = 0;
1165 ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent); 1232 xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
1166 xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
1167 /* 1233 /*
1168 * Copy the entries field by field. 1234 * Copy the entries field by field.
1169 */ 1235 */
1170 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), 1236 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
1171 oldsfep = xfs_dir2_sf_firstentry(oldsfp); 1237 oldsfep = xfs_dir2_sf_firstentry(oldsfp);
1172 i < sfp->hdr.count; 1238 i < sfp->count;
1173 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep), 1239 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
1174 oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) { 1240 oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
1175 sfep->namelen = oldsfep->namelen; 1241 sfep->namelen = oldsfep->namelen;
1176 sfep->offset = oldsfep->offset; 1242 sfep->offset = oldsfep->offset;
1177 memcpy(sfep->name, oldsfep->name, sfep->namelen); 1243 memcpy(sfep->name, oldsfep->name, sfep->namelen);
1178 ino = xfs_dir2_sf_get_inumber(oldsfp, 1244 xfs_dir2_sfe_put_ino(sfp, sfep,
1179 xfs_dir2_sf_inumberp(oldsfep)); 1245 xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
1180 xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
1181 } 1246 }
1182 /* 1247 /*
1183 * Clean up the inode. 1248 * Clean up the inode.
@@ -1199,13 +1264,12 @@ xfs_dir2_sf_toino8(
1199 char *buf; /* old dir's buffer */ 1264 char *buf; /* old dir's buffer */
1200 xfs_inode_t *dp; /* incore directory inode */ 1265 xfs_inode_t *dp; /* incore directory inode */
1201 int i; /* entry index */ 1266 int i; /* entry index */
1202 xfs_ino_t ino; /* entry inode number */
1203 int newsize; /* new inode size */ 1267 int newsize; /* new inode size */
1204 xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */ 1268 xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */
1205 xfs_dir2_sf_t *oldsfp; /* old sf directory */ 1269 xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */
1206 int oldsize; /* old inode size */ 1270 int oldsize; /* old inode size */
1207 xfs_dir2_sf_entry_t *sfep; /* new sf entry */ 1271 xfs_dir2_sf_entry_t *sfep; /* new sf entry */
1208 xfs_dir2_sf_t *sfp; /* new sf directory */ 1272 xfs_dir2_sf_hdr_t *sfp; /* new sf directory */
1209 1273
1210 trace_xfs_dir2_sf_toino8(args); 1274 trace_xfs_dir2_sf_toino8(args);
1211 1275
@@ -1218,44 +1282,42 @@ xfs_dir2_sf_toino8(
1218 */ 1282 */
1219 oldsize = dp->i_df.if_bytes; 1283 oldsize = dp->i_df.if_bytes;
1220 buf = kmem_alloc(oldsize, KM_SLEEP); 1284 buf = kmem_alloc(oldsize, KM_SLEEP);
1221 oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 1285 oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
1222 ASSERT(oldsfp->hdr.i8count == 0); 1286 ASSERT(oldsfp->i8count == 0);
1223 memcpy(buf, oldsfp, oldsize); 1287 memcpy(buf, oldsfp, oldsize);
1224 /* 1288 /*
1225 * Compute the new inode size. 1289 * Compute the new inode size.
1226 */ 1290 */
1227 newsize = 1291 newsize =
1228 oldsize + 1292 oldsize +
1229 (oldsfp->hdr.count + 1) * 1293 (oldsfp->count + 1) *
1230 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)); 1294 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
1231 xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK); 1295 xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
1232 xfs_idata_realloc(dp, newsize, XFS_DATA_FORK); 1296 xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
1233 /* 1297 /*
1234 * Reset our pointers, the data has moved. 1298 * Reset our pointers, the data has moved.
1235 */ 1299 */
1236 oldsfp = (xfs_dir2_sf_t *)buf; 1300 oldsfp = (xfs_dir2_sf_hdr_t *)buf;
1237 sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data; 1301 sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
1238 /* 1302 /*
1239 * Fill in the new header. 1303 * Fill in the new header.
1240 */ 1304 */
1241 sfp->hdr.count = oldsfp->hdr.count; 1305 sfp->count = oldsfp->count;
1242 sfp->hdr.i8count = 1; 1306 sfp->i8count = 1;
1243 ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent); 1307 xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
1244 xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
1245 /* 1308 /*
1246 * Copy the entries field by field. 1309 * Copy the entries field by field.
1247 */ 1310 */
1248 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp), 1311 for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
1249 oldsfep = xfs_dir2_sf_firstentry(oldsfp); 1312 oldsfep = xfs_dir2_sf_firstentry(oldsfp);
1250 i < sfp->hdr.count; 1313 i < sfp->count;
1251 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep), 1314 i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
1252 oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) { 1315 oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
1253 sfep->namelen = oldsfep->namelen; 1316 sfep->namelen = oldsfep->namelen;
1254 sfep->offset = oldsfep->offset; 1317 sfep->offset = oldsfep->offset;
1255 memcpy(sfep->name, oldsfep->name, sfep->namelen); 1318 memcpy(sfep->name, oldsfep->name, sfep->namelen);
1256 ino = xfs_dir2_sf_get_inumber(oldsfp, 1319 xfs_dir2_sfe_put_ino(sfp, sfep,
1257 xfs_dir2_sf_inumberp(oldsfep)); 1320 xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
1258 xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
1259 } 1321 }
1260 /* 1322 /*
1261 * Clean up the inode. 1323 * Clean up the inode.
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
deleted file mode 100644
index 6ac44b550d39..000000000000
--- a/fs/xfs/xfs_dir2_sf.h
+++ /dev/null
@@ -1,171 +0,0 @@
1/*
2 * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18#ifndef __XFS_DIR2_SF_H__
19#define __XFS_DIR2_SF_H__
20
21/*
22 * Directory layout when stored internal to an inode.
23 *
24 * Small directories are packed as tightly as possible so as to
25 * fit into the literal area of the inode.
26 */
27
28struct uio;
29struct xfs_dabuf;
30struct xfs_da_args;
31struct xfs_dir2_block;
32struct xfs_inode;
33struct xfs_mount;
34struct xfs_trans;
35
36/*
37 * Inode number stored as 8 8-bit values.
38 */
39typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
40
41/*
42 * Inode number stored as 4 8-bit values.
43 * Works a lot of the time, when all the inode numbers in a directory
44 * fit in 32 bits.
45 */
46typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
47
48typedef union {
49 xfs_dir2_ino8_t i8;
50 xfs_dir2_ino4_t i4;
51} xfs_dir2_inou_t;
52#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL)
53
54/*
55 * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
56 * Only need 16 bits, this is the byte offset into the single block form.
57 */
58typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
59
60/*
61 * The parent directory has a dedicated field, and the self-pointer must
62 * be calculated on the fly.
63 *
64 * Entries are packed toward the top as tightly as possible. The header
65 * and the elements must be memcpy'd out into a work area to get correct
66 * alignment for the inode number fields.
67 */
68typedef struct xfs_dir2_sf_hdr {
69 __uint8_t count; /* count of entries */
70 __uint8_t i8count; /* count of 8-byte inode #s */
71 xfs_dir2_inou_t parent; /* parent dir inode number */
72} __arch_pack xfs_dir2_sf_hdr_t;
73
74typedef struct xfs_dir2_sf_entry {
75 __uint8_t namelen; /* actual name length */
76 xfs_dir2_sf_off_t offset; /* saved offset */
77 __uint8_t name[1]; /* name, variable size */
78 xfs_dir2_inou_t inumber; /* inode number, var. offset */
79} __arch_pack xfs_dir2_sf_entry_t;
80
81typedef struct xfs_dir2_sf {
82 xfs_dir2_sf_hdr_t hdr; /* shortform header */
83 xfs_dir2_sf_entry_t list[1]; /* shortform entries */
84} xfs_dir2_sf_t;
85
86static inline int xfs_dir2_sf_hdr_size(int i8count)
87{
88 return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \
89 ((i8count) == 0) * \
90 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
91}
92
93static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep)
94{
95 return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen];
96}
97
98static inline xfs_intino_t
99xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
100{
101 return ((sfp)->hdr.i8count == 0 ? \
102 (xfs_intino_t)XFS_GET_DIR_INO4((from)->i4) : \
103 (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8));
104}
105
106static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
107 xfs_dir2_inou_t *to)
108{
109 if ((sfp)->hdr.i8count == 0)
110 XFS_PUT_DIR_INO4(*(from), (to)->i4);
111 else
112 XFS_PUT_DIR_INO8(*(from), (to)->i8);
113}
114
115static inline xfs_dir2_data_aoff_t
116xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
117{
118 return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i);
119}
120
121static inline void
122xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
123{
124 INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off);
125}
126
127static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
128{
129 return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
130 ((sfp)->hdr.i8count == 0) * \
131 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
132}
133
134static inline int
135xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
136{
137 return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \
138 ((sfp)->hdr.i8count == 0) * \
139 ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
140}
141
142static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp)
143{
144 return ((xfs_dir2_sf_entry_t *) \
145 ((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count)));
146}
147
148static inline xfs_dir2_sf_entry_t *
149xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
150{
151 return ((xfs_dir2_sf_entry_t *) \
152 ((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep)));
153}
154
155/*
156 * Functions.
157 */
158extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
159 struct xfs_dir2_block *block,
160 xfs_dir2_sf_hdr_t *sfhp);
161extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
162 int size, xfs_dir2_sf_hdr_t *sfhp);
163extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
164extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
165extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent,
166 xfs_off_t *offset, filldir_t filldir);
167extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
168extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
169extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
170
171#endif /* __XFS_DIR2_SF_H__ */
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 8f6fc1a96386..c13fed8c394a 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -249,6 +249,11 @@ typedef struct xfs_fsop_resblks {
249#define XFS_MAX_LOG_BYTES \ 249#define XFS_MAX_LOG_BYTES \
250 ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES) 250 ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
251 251
252/* Used for sanity checks on superblock */
253#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks)
254#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) * \
255 (s)->sb_agblocks + XFS_MIN_AG_BLOCKS)
256
252/* 257/*
253 * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT 258 * Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT
254 */ 259 */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 84ebeec16642..dd5628bd8d0b 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -683,7 +683,7 @@ xfs_dialloc(
683 return 0; 683 return 0;
684 } 684 }
685 agi = XFS_BUF_TO_AGI(agbp); 685 agi = XFS_BUF_TO_AGI(agbp);
686 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 686 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
687 } else { 687 } else {
688 /* 688 /*
689 * Continue where we left off before. In this case, we 689 * Continue where we left off before. In this case, we
@@ -691,7 +691,7 @@ xfs_dialloc(
691 */ 691 */
692 agbp = *IO_agbp; 692 agbp = *IO_agbp;
693 agi = XFS_BUF_TO_AGI(agbp); 693 agi = XFS_BUF_TO_AGI(agbp);
694 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 694 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
695 ASSERT(be32_to_cpu(agi->agi_freecount) > 0); 695 ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
696 } 696 }
697 mp = tp->t_mountp; 697 mp = tp->t_mountp;
@@ -775,7 +775,7 @@ nextag:
775 if (error) 775 if (error)
776 goto nextag; 776 goto nextag;
777 agi = XFS_BUF_TO_AGI(agbp); 777 agi = XFS_BUF_TO_AGI(agbp);
778 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 778 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
779 } 779 }
780 /* 780 /*
781 * Here with an allocation group that has a free inode. 781 * Here with an allocation group that has a free inode.
@@ -944,7 +944,7 @@ nextag:
944 * See if the most recently allocated block has any free. 944 * See if the most recently allocated block has any free.
945 */ 945 */
946newino: 946newino:
947 if (be32_to_cpu(agi->agi_newino) != NULLAGINO) { 947 if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
948 error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino), 948 error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
949 XFS_LOOKUP_EQ, &i); 949 XFS_LOOKUP_EQ, &i);
950 if (error) 950 if (error)
@@ -1085,7 +1085,7 @@ xfs_difree(
1085 return error; 1085 return error;
1086 } 1086 }
1087 agi = XFS_BUF_TO_AGI(agbp); 1087 agi = XFS_BUF_TO_AGI(agbp);
1088 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 1088 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
1089 ASSERT(agbno < be32_to_cpu(agi->agi_length)); 1089 ASSERT(agbno < be32_to_cpu(agi->agi_length));
1090 /* 1090 /*
1091 * Initialize the cursor. 1091 * Initialize the cursor.
@@ -1438,7 +1438,7 @@ xfs_ialloc_log_agi(
1438 xfs_agi_t *agi; /* allocation group header */ 1438 xfs_agi_t *agi; /* allocation group header */
1439 1439
1440 agi = XFS_BUF_TO_AGI(bp); 1440 agi = XFS_BUF_TO_AGI(bp);
1441 ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC); 1441 ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
1442#endif 1442#endif
1443 /* 1443 /*
1444 * Compute byte offsets for the first and last fields. 1444 * Compute byte offsets for the first and last fields.
@@ -1492,7 +1492,7 @@ xfs_read_agi(
1492 /* 1492 /*
1493 * Validate the magic number of the agi block. 1493 * Validate the magic number of the agi block.
1494 */ 1494 */
1495 agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC && 1495 agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) &&
1496 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) && 1496 XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
1497 be32_to_cpu(agi->agi_seqno) == agno; 1497 be32_to_cpu(agi->agi_seqno) == agno;
1498 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, 1498 if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 16921f55c542..c6a75815aea0 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -31,7 +31,6 @@
31#include "xfs_dinode.h" 31#include "xfs_dinode.h"
32#include "xfs_inode.h" 32#include "xfs_inode.h"
33#include "xfs_btree.h" 33#include "xfs_btree.h"
34#include "xfs_btree_trace.h"
35#include "xfs_ialloc.h" 34#include "xfs_ialloc.h"
36#include "xfs_alloc.h" 35#include "xfs_alloc.h"
37#include "xfs_error.h" 36#include "xfs_error.h"
@@ -205,72 +204,6 @@ xfs_inobt_recs_inorder(
205} 204}
206#endif /* DEBUG */ 205#endif /* DEBUG */
207 206
208#ifdef XFS_BTREE_TRACE
209ktrace_t *xfs_inobt_trace_buf;
210
211STATIC void
212xfs_inobt_trace_enter(
213 struct xfs_btree_cur *cur,
214 const char *func,
215 char *s,
216 int type,
217 int line,
218 __psunsigned_t a0,
219 __psunsigned_t a1,
220 __psunsigned_t a2,
221 __psunsigned_t a3,
222 __psunsigned_t a4,
223 __psunsigned_t a5,
224 __psunsigned_t a6,
225 __psunsigned_t a7,
226 __psunsigned_t a8,
227 __psunsigned_t a9,
228 __psunsigned_t a10)
229{
230 ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type,
231 (void *)func, (void *)s, NULL, (void *)cur,
232 (void *)a0, (void *)a1, (void *)a2, (void *)a3,
233 (void *)a4, (void *)a5, (void *)a6, (void *)a7,
234 (void *)a8, (void *)a9, (void *)a10);
235}
236
237STATIC void
238xfs_inobt_trace_cursor(
239 struct xfs_btree_cur *cur,
240 __uint32_t *s0,
241 __uint64_t *l0,
242 __uint64_t *l1)
243{
244 *s0 = cur->bc_private.a.agno;
245 *l0 = cur->bc_rec.i.ir_startino;
246 *l1 = cur->bc_rec.i.ir_free;
247}
248
249STATIC void
250xfs_inobt_trace_key(
251 struct xfs_btree_cur *cur,
252 union xfs_btree_key *key,
253 __uint64_t *l0,
254 __uint64_t *l1)
255{
256 *l0 = be32_to_cpu(key->inobt.ir_startino);
257 *l1 = 0;
258}
259
260STATIC void
261xfs_inobt_trace_record(
262 struct xfs_btree_cur *cur,
263 union xfs_btree_rec *rec,
264 __uint64_t *l0,
265 __uint64_t *l1,
266 __uint64_t *l2)
267{
268 *l0 = be32_to_cpu(rec->inobt.ir_startino);
269 *l1 = be32_to_cpu(rec->inobt.ir_freecount);
270 *l2 = be64_to_cpu(rec->inobt.ir_free);
271}
272#endif /* XFS_BTREE_TRACE */
273
274static const struct xfs_btree_ops xfs_inobt_ops = { 207static const struct xfs_btree_ops xfs_inobt_ops = {
275 .rec_len = sizeof(xfs_inobt_rec_t), 208 .rec_len = sizeof(xfs_inobt_rec_t),
276 .key_len = sizeof(xfs_inobt_key_t), 209 .key_len = sizeof(xfs_inobt_key_t),
@@ -286,18 +219,10 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
286 .init_rec_from_cur = xfs_inobt_init_rec_from_cur, 219 .init_rec_from_cur = xfs_inobt_init_rec_from_cur,
287 .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur, 220 .init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
288 .key_diff = xfs_inobt_key_diff, 221 .key_diff = xfs_inobt_key_diff,
289
290#ifdef DEBUG 222#ifdef DEBUG
291 .keys_inorder = xfs_inobt_keys_inorder, 223 .keys_inorder = xfs_inobt_keys_inorder,
292 .recs_inorder = xfs_inobt_recs_inorder, 224 .recs_inorder = xfs_inobt_recs_inorder,
293#endif 225#endif
294
295#ifdef XFS_BTREE_TRACE
296 .trace_enter = xfs_inobt_trace_enter,
297 .trace_cursor = xfs_inobt_trace_cursor,
298 .trace_key = xfs_inobt_trace_key,
299 .trace_record = xfs_inobt_trace_record,
300#endif
301}; 226};
302 227
303/* 228/*
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 3631783b2b53..7759812c1bbe 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -38,7 +38,6 @@
38#include "xfs_trans_priv.h" 38#include "xfs_trans_priv.h"
39#include "xfs_inode_item.h" 39#include "xfs_inode_item.h"
40#include "xfs_bmap.h" 40#include "xfs_bmap.h"
41#include "xfs_btree_trace.h"
42#include "xfs_trace.h" 41#include "xfs_trace.h"
43 42
44 43
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a098a20ca63e..3cc21ddf9f7e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -37,7 +37,6 @@
37#include "xfs_buf_item.h" 37#include "xfs_buf_item.h"
38#include "xfs_inode_item.h" 38#include "xfs_inode_item.h"
39#include "xfs_btree.h" 39#include "xfs_btree.h"
40#include "xfs_btree_trace.h"
41#include "xfs_alloc.h" 40#include "xfs_alloc.h"
42#include "xfs_ialloc.h" 41#include "xfs_ialloc.h"
43#include "xfs_bmap.h" 42#include "xfs_bmap.h"
@@ -52,7 +51,7 @@ kmem_zone_t *xfs_ifork_zone;
52kmem_zone_t *xfs_inode_zone; 51kmem_zone_t *xfs_inode_zone;
53 52
54/* 53/*
55 * Used in xfs_itruncate(). This is the maximum number of extents 54 * Used in xfs_itruncate_extents(). This is the maximum number of extents
56 * freed from a file in a single transaction. 55 * freed from a file in a single transaction.
57 */ 56 */
58#define XFS_ITRUNC_MAX_EXTENTS 2 57#define XFS_ITRUNC_MAX_EXTENTS 2
@@ -167,7 +166,7 @@ xfs_imap_to_bp(
167 166
168 dip = (xfs_dinode_t *)xfs_buf_offset(bp, 167 dip = (xfs_dinode_t *)xfs_buf_offset(bp,
169 (i << mp->m_sb.sb_inodelog)); 168 (i << mp->m_sb.sb_inodelog));
170 di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC && 169 di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
171 XFS_DINODE_GOOD_VERSION(dip->di_version); 170 XFS_DINODE_GOOD_VERSION(dip->di_version);
172 if (unlikely(XFS_TEST_ERROR(!di_ok, mp, 171 if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
173 XFS_ERRTAG_ITOBP_INOTOBP, 172 XFS_ERRTAG_ITOBP_INOTOBP,
@@ -802,7 +801,7 @@ xfs_iread(
802 * If we got something that isn't an inode it means someone 801 * If we got something that isn't an inode it means someone
803 * (nfs or dmi) has a stale handle. 802 * (nfs or dmi) has a stale handle.
804 */ 803 */
805 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) { 804 if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
806#ifdef DEBUG 805#ifdef DEBUG
807 xfs_alert(mp, 806 xfs_alert(mp,
808 "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)", 807 "%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
@@ -1179,15 +1178,15 @@ xfs_ialloc(
1179 * at least do it for regular files. 1178 * at least do it for regular files.
1180 */ 1179 */
1181#ifdef DEBUG 1180#ifdef DEBUG
1182void 1181STATIC void
1183xfs_isize_check( 1182xfs_isize_check(
1184 xfs_mount_t *mp, 1183 struct xfs_inode *ip,
1185 xfs_inode_t *ip, 1184 xfs_fsize_t isize)
1186 xfs_fsize_t isize)
1187{ 1185{
1188 xfs_fileoff_t map_first; 1186 struct xfs_mount *mp = ip->i_mount;
1189 int nimaps; 1187 xfs_fileoff_t map_first;
1190 xfs_bmbt_irec_t imaps[2]; 1188 int nimaps;
1189 xfs_bmbt_irec_t imaps[2];
1191 1190
1192 if ((ip->i_d.di_mode & S_IFMT) != S_IFREG) 1191 if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
1193 return; 1192 return;
@@ -1214,168 +1213,14 @@ xfs_isize_check(
1214 ASSERT(nimaps == 1); 1213 ASSERT(nimaps == 1);
1215 ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK); 1214 ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
1216} 1215}
1216#else /* DEBUG */
1217#define xfs_isize_check(ip, isize)
1217#endif /* DEBUG */ 1218#endif /* DEBUG */
1218 1219
1219/* 1220/*
1220 * Calculate the last possible buffered byte in a file. This must 1221 * Free up the underlying blocks past new_size. The new size must be smaller
1221 * include data that was buffered beyond the EOF by the write code. 1222 * than the current size. This routine can be used both for the attribute and
1222 * This also needs to deal with overflowing the xfs_fsize_t type 1223 * data fork, and does not modify the inode size, which is left to the caller.
1223 * which can happen for sizes near the limit.
1224 *
1225 * We also need to take into account any blocks beyond the EOF. It
1226 * may be the case that they were buffered by a write which failed.
1227 * In that case the pages will still be in memory, but the inode size
1228 * will never have been updated.
1229 */
1230STATIC xfs_fsize_t
1231xfs_file_last_byte(
1232 xfs_inode_t *ip)
1233{
1234 xfs_mount_t *mp;
1235 xfs_fsize_t last_byte;
1236 xfs_fileoff_t last_block;
1237 xfs_fileoff_t size_last_block;
1238 int error;
1239
1240 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
1241
1242 mp = ip->i_mount;
1243 /*
1244 * Only check for blocks beyond the EOF if the extents have
1245 * been read in. This eliminates the need for the inode lock,
1246 * and it also saves us from looking when it really isn't
1247 * necessary.
1248 */
1249 if (ip->i_df.if_flags & XFS_IFEXTENTS) {
1250 xfs_ilock(ip, XFS_ILOCK_SHARED);
1251 error = xfs_bmap_last_offset(NULL, ip, &last_block,
1252 XFS_DATA_FORK);
1253 xfs_iunlock(ip, XFS_ILOCK_SHARED);
1254 if (error) {
1255 last_block = 0;
1256 }
1257 } else {
1258 last_block = 0;
1259 }
1260 size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size);
1261 last_block = XFS_FILEOFF_MAX(last_block, size_last_block);
1262
1263 last_byte = XFS_FSB_TO_B(mp, last_block);
1264 if (last_byte < 0) {
1265 return XFS_MAXIOFFSET(mp);
1266 }
1267 last_byte += (1 << mp->m_writeio_log);
1268 if (last_byte < 0) {
1269 return XFS_MAXIOFFSET(mp);
1270 }
1271 return last_byte;
1272}
1273
1274/*
1275 * Start the truncation of the file to new_size. The new size
1276 * must be smaller than the current size. This routine will
1277 * clear the buffer and page caches of file data in the removed
1278 * range, and xfs_itruncate_finish() will remove the underlying
1279 * disk blocks.
1280 *
1281 * The inode must have its I/O lock locked EXCLUSIVELY, and it
1282 * must NOT have the inode lock held at all. This is because we're
1283 * calling into the buffer/page cache code and we can't hold the
1284 * inode lock when we do so.
1285 *
1286 * We need to wait for any direct I/Os in flight to complete before we
1287 * proceed with the truncate. This is needed to prevent the extents
1288 * being read or written by the direct I/Os from being removed while the
1289 * I/O is in flight as there is no other method of synchronising
1290 * direct I/O with the truncate operation. Also, because we hold
1291 * the IOLOCK in exclusive mode, we prevent new direct I/Os from being
1292 * started until the truncate completes and drops the lock. Essentially,
1293 * the xfs_ioend_wait() call forms an I/O barrier that provides strict
1294 * ordering between direct I/Os and the truncate operation.
1295 *
1296 * The flags parameter can have either the value XFS_ITRUNC_DEFINITE
1297 * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used
1298 * in the case that the caller is locking things out of order and
1299 * may not be able to call xfs_itruncate_finish() with the inode lock
1300 * held without dropping the I/O lock. If the caller must drop the
1301 * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start()
1302 * must be called again with all the same restrictions as the initial
1303 * call.
1304 */
1305int
1306xfs_itruncate_start(
1307 xfs_inode_t *ip,
1308 uint flags,
1309 xfs_fsize_t new_size)
1310{
1311 xfs_fsize_t last_byte;
1312 xfs_off_t toss_start;
1313 xfs_mount_t *mp;
1314 int error = 0;
1315
1316 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1317 ASSERT((new_size == 0) || (new_size <= ip->i_size));
1318 ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
1319 (flags == XFS_ITRUNC_MAYBE));
1320
1321 mp = ip->i_mount;
1322
1323 /* wait for the completion of any pending DIOs */
1324 if (new_size == 0 || new_size < ip->i_size)
1325 xfs_ioend_wait(ip);
1326
1327 /*
1328 * Call toss_pages or flushinval_pages to get rid of pages
1329 * overlapping the region being removed. We have to use
1330 * the less efficient flushinval_pages in the case that the
1331 * caller may not be able to finish the truncate without
1332 * dropping the inode's I/O lock. Make sure
1333 * to catch any pages brought in by buffers overlapping
1334 * the EOF by searching out beyond the isize by our
1335 * block size. We round new_size up to a block boundary
1336 * so that we don't toss things on the same block as
1337 * new_size but before it.
1338 *
1339 * Before calling toss_page or flushinval_pages, make sure to
1340 * call remapf() over the same region if the file is mapped.
1341 * This frees up mapped file references to the pages in the
1342 * given range and for the flushinval_pages case it ensures
1343 * that we get the latest mapped changes flushed out.
1344 */
1345 toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
1346 toss_start = XFS_FSB_TO_B(mp, toss_start);
1347 if (toss_start < 0) {
1348 /*
1349 * The place to start tossing is beyond our maximum
1350 * file size, so there is no way that the data extended
1351 * out there.
1352 */
1353 return 0;
1354 }
1355 last_byte = xfs_file_last_byte(ip);
1356 trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
1357 if (last_byte > toss_start) {
1358 if (flags & XFS_ITRUNC_DEFINITE) {
1359 xfs_tosspages(ip, toss_start,
1360 -1, FI_REMAPF_LOCKED);
1361 } else {
1362 error = xfs_flushinval_pages(ip, toss_start,
1363 -1, FI_REMAPF_LOCKED);
1364 }
1365 }
1366
1367#ifdef DEBUG
1368 if (new_size == 0) {
1369 ASSERT(VN_CACHED(VFS_I(ip)) == 0);
1370 }
1371#endif
1372 return error;
1373}
1374
1375/*
1376 * Shrink the file to the given new_size. The new size must be smaller than
1377 * the current size. This will free up the underlying blocks in the removed
1378 * range after a call to xfs_itruncate_start() or xfs_atruncate_start().
1379 * 1224 *
1380 * The transaction passed to this routine must have made a permanent log 1225 * The transaction passed to this routine must have made a permanent log
1381 * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the 1226 * reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the
@@ -1387,31 +1232,6 @@ xfs_itruncate_start(
1387 * will be "held" within the returned transaction. This routine does NOT 1232 * will be "held" within the returned transaction. This routine does NOT
1388 * require any disk space to be reserved for it within the transaction. 1233 * require any disk space to be reserved for it within the transaction.
1389 * 1234 *
1390 * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it
1391 * indicates the fork which is to be truncated. For the attribute fork we only
1392 * support truncation to size 0.
1393 *
1394 * We use the sync parameter to indicate whether or not the first transaction
1395 * we perform might have to be synchronous. For the attr fork, it needs to be
1396 * so if the unlink of the inode is not yet known to be permanent in the log.
1397 * This keeps us from freeing and reusing the blocks of the attribute fork
1398 * before the unlink of the inode becomes permanent.
1399 *
1400 * For the data fork, we normally have to run synchronously if we're being
1401 * called out of the inactive path or we're being called out of the create path
1402 * where we're truncating an existing file. Either way, the truncate needs to
1403 * be sync so blocks don't reappear in the file with altered data in case of a
1404 * crash. wsync filesystems can run the first case async because anything that
1405 * shrinks the inode has to run sync so by the time we're called here from
1406 * inactive, the inode size is permanently set to 0.
1407 *
1408 * Calls from the truncate path always need to be sync unless we're in a wsync
1409 * filesystem and the file has already been unlinked.
1410 *
1411 * The caller is responsible for correctly setting the sync parameter. It gets
1412 * too hard for us to guess here which path we're being called out of just
1413 * based on inode state.
1414 *
1415 * If we get an error, we must return with the inode locked and linked into the 1235 * If we get an error, we must return with the inode locked and linked into the
1416 * current transaction. This keeps things simple for the higher level code, 1236 * current transaction. This keeps things simple for the higher level code,
1417 * because it always knows that the inode is locked and held in the transaction 1237 * because it always knows that the inode is locked and held in the transaction
@@ -1419,124 +1239,30 @@ xfs_itruncate_start(
1419 * dirty on error so that transactions can be easily aborted if possible. 1239 * dirty on error so that transactions can be easily aborted if possible.
1420 */ 1240 */
1421int 1241int
1422xfs_itruncate_finish( 1242xfs_itruncate_extents(
1423 xfs_trans_t **tp, 1243 struct xfs_trans **tpp,
1424 xfs_inode_t *ip, 1244 struct xfs_inode *ip,
1425 xfs_fsize_t new_size, 1245 int whichfork,
1426 int fork, 1246 xfs_fsize_t new_size)
1427 int sync)
1428{ 1247{
1429 xfs_fsblock_t first_block; 1248 struct xfs_mount *mp = ip->i_mount;
1430 xfs_fileoff_t first_unmap_block; 1249 struct xfs_trans *tp = *tpp;
1431 xfs_fileoff_t last_block; 1250 struct xfs_trans *ntp;
1432 xfs_filblks_t unmap_len=0; 1251 xfs_bmap_free_t free_list;
1433 xfs_mount_t *mp; 1252 xfs_fsblock_t first_block;
1434 xfs_trans_t *ntp; 1253 xfs_fileoff_t first_unmap_block;
1435 int done; 1254 xfs_fileoff_t last_block;
1436 int committed; 1255 xfs_filblks_t unmap_len;
1437 xfs_bmap_free_t free_list; 1256 int committed;
1438 int error; 1257 int error = 0;
1258 int done = 0;
1439 1259
1440 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); 1260 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
1441 ASSERT((new_size == 0) || (new_size <= ip->i_size)); 1261 ASSERT(new_size <= ip->i_size);
1442 ASSERT(*tp != NULL); 1262 ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
1443 ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
1444 ASSERT(ip->i_transp == *tp);
1445 ASSERT(ip->i_itemp != NULL); 1263 ASSERT(ip->i_itemp != NULL);
1446 ASSERT(ip->i_itemp->ili_lock_flags == 0); 1264 ASSERT(ip->i_itemp->ili_lock_flags == 0);
1447 1265 ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1448
1449 ntp = *tp;
1450 mp = (ntp)->t_mountp;
1451 ASSERT(! XFS_NOT_DQATTACHED(mp, ip));
1452
1453 /*
1454 * We only support truncating the entire attribute fork.
1455 */
1456 if (fork == XFS_ATTR_FORK) {
1457 new_size = 0LL;
1458 }
1459 first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
1460 trace_xfs_itruncate_finish_start(ip, new_size);
1461
1462 /*
1463 * The first thing we do is set the size to new_size permanently
1464 * on disk. This way we don't have to worry about anyone ever
1465 * being able to look at the data being freed even in the face
1466 * of a crash. What we're getting around here is the case where
1467 * we free a block, it is allocated to another file, it is written
1468 * to, and then we crash. If the new data gets written to the
1469 * file but the log buffers containing the free and reallocation
1470 * don't, then we'd end up with garbage in the blocks being freed.
1471 * As long as we make the new_size permanent before actually
1472 * freeing any blocks it doesn't matter if they get written to.
1473 *
1474 * The callers must signal into us whether or not the size
1475 * setting here must be synchronous. There are a few cases
1476 * where it doesn't have to be synchronous. Those cases
1477 * occur if the file is unlinked and we know the unlink is
1478 * permanent or if the blocks being truncated are guaranteed
1479 * to be beyond the inode eof (regardless of the link count)
1480 * and the eof value is permanent. Both of these cases occur
1481 * only on wsync-mounted filesystems. In those cases, we're
1482 * guaranteed that no user will ever see the data in the blocks
1483 * that are being truncated so the truncate can run async.
1484 * In the free beyond eof case, the file may wind up with
1485 * more blocks allocated to it than it needs if we crash
1486 * and that won't get fixed until the next time the file
1487 * is re-opened and closed but that's ok as that shouldn't
1488 * be too many blocks.
1489 *
1490 * However, we can't just make all wsync xactions run async
1491 * because there's one call out of the create path that needs
1492 * to run sync where it's truncating an existing file to size
1493 * 0 whose size is > 0.
1494 *
1495 * It's probably possible to come up with a test in this
1496 * routine that would correctly distinguish all the above
1497 * cases from the values of the function parameters and the
1498 * inode state but for sanity's sake, I've decided to let the
1499 * layers above just tell us. It's simpler to correctly figure
1500 * out in the layer above exactly under what conditions we
1501 * can run async and I think it's easier for others read and
1502 * follow the logic in case something has to be changed.
1503 * cscope is your friend -- rcc.
1504 *
1505 * The attribute fork is much simpler.
1506 *
1507 * For the attribute fork we allow the caller to tell us whether
1508 * the unlink of the inode that led to this call is yet permanent
1509 * in the on disk log. If it is not and we will be freeing extents
1510 * in this inode then we make the first transaction synchronous
1511 * to make sure that the unlink is permanent by the time we free
1512 * the blocks.
1513 */
1514 if (fork == XFS_DATA_FORK) {
1515 if (ip->i_d.di_nextents > 0) {
1516 /*
1517 * If we are not changing the file size then do
1518 * not update the on-disk file size - we may be
1519 * called from xfs_inactive_free_eofblocks(). If we
1520 * update the on-disk file size and then the system
1521 * crashes before the contents of the file are
1522 * flushed to disk then the files may be full of
1523 * holes (ie NULL files bug).
1524 */
1525 if (ip->i_size != new_size) {
1526 ip->i_d.di_size = new_size;
1527 ip->i_size = new_size;
1528 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
1529 }
1530 }
1531 } else if (sync) {
1532 ASSERT(!(mp->m_flags & XFS_MOUNT_WSYNC));
1533 if (ip->i_d.di_anextents > 0)
1534 xfs_trans_set_sync(ntp);
1535 }
1536 ASSERT(fork == XFS_DATA_FORK ||
1537 (fork == XFS_ATTR_FORK &&
1538 ((sync && !(mp->m_flags & XFS_MOUNT_WSYNC)) ||
1539 (sync == 0 && (mp->m_flags & XFS_MOUNT_WSYNC)))));
1540 1266
1541 /* 1267 /*
1542 * Since it is possible for space to become allocated beyond 1268 * Since it is possible for space to become allocated beyond
@@ -1547,128 +1273,142 @@ xfs_itruncate_finish(
1547 * beyond the maximum file size (ie it is the same as last_block), 1273 * beyond the maximum file size (ie it is the same as last_block),
1548 * then there is nothing to do. 1274 * then there is nothing to do.
1549 */ 1275 */
1276 first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
1550 last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp)); 1277 last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1551 ASSERT(first_unmap_block <= last_block); 1278 if (first_unmap_block == last_block)
1552 done = 0; 1279 return 0;
1553 if (last_block == first_unmap_block) { 1280
1554 done = 1; 1281 ASSERT(first_unmap_block < last_block);
1555 } else { 1282 unmap_len = last_block - first_unmap_block + 1;
1556 unmap_len = last_block - first_unmap_block + 1;
1557 }
1558 while (!done) { 1283 while (!done) {
1559 /*
1560 * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi()
1561 * will tell us whether it freed the entire range or
1562 * not. If this is a synchronous mount (wsync),
1563 * then we can tell bunmapi to keep all the
1564 * transactions asynchronous since the unlink
1565 * transaction that made this inode inactive has
1566 * already hit the disk. There's no danger of
1567 * the freed blocks being reused, there being a
1568 * crash, and the reused blocks suddenly reappearing
1569 * in this file with garbage in them once recovery
1570 * runs.
1571 */
1572 xfs_bmap_init(&free_list, &first_block); 1284 xfs_bmap_init(&free_list, &first_block);
1573 error = xfs_bunmapi(ntp, ip, 1285 error = xfs_bunmapi(tp, ip,
1574 first_unmap_block, unmap_len, 1286 first_unmap_block, unmap_len,
1575 xfs_bmapi_aflag(fork), 1287 xfs_bmapi_aflag(whichfork),
1576 XFS_ITRUNC_MAX_EXTENTS, 1288 XFS_ITRUNC_MAX_EXTENTS,
1577 &first_block, &free_list, 1289 &first_block, &free_list,
1578 &done); 1290 &done);
1579 if (error) { 1291 if (error)
1580 /* 1292 goto out_bmap_cancel;
1581 * If the bunmapi call encounters an error,
1582 * return to the caller where the transaction
1583 * can be properly aborted. We just need to
1584 * make sure we're not holding any resources
1585 * that we were not when we came in.
1586 */
1587 xfs_bmap_cancel(&free_list);
1588 return error;
1589 }
1590 1293
1591 /* 1294 /*
1592 * Duplicate the transaction that has the permanent 1295 * Duplicate the transaction that has the permanent
1593 * reservation and commit the old transaction. 1296 * reservation and commit the old transaction.
1594 */ 1297 */
1595 error = xfs_bmap_finish(tp, &free_list, &committed); 1298 error = xfs_bmap_finish(&tp, &free_list, &committed);
1596 ntp = *tp;
1597 if (committed) 1299 if (committed)
1598 xfs_trans_ijoin(ntp, ip); 1300 xfs_trans_ijoin(tp, ip);
1599 1301 if (error)
1600 if (error) { 1302 goto out_bmap_cancel;
1601 /*
1602 * If the bmap finish call encounters an error, return
1603 * to the caller where the transaction can be properly
1604 * aborted. We just need to make sure we're not
1605 * holding any resources that we were not when we came
1606 * in.
1607 *
1608 * Aborting from this point might lose some blocks in
1609 * the file system, but oh well.
1610 */
1611 xfs_bmap_cancel(&free_list);
1612 return error;
1613 }
1614 1303
1615 if (committed) { 1304 if (committed) {
1616 /* 1305 /*
1617 * Mark the inode dirty so it will be logged and 1306 * Mark the inode dirty so it will be logged and
1618 * moved forward in the log as part of every commit. 1307 * moved forward in the log as part of every commit.
1619 */ 1308 */
1620 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1309 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
1621 } 1310 }
1622 1311
1623 ntp = xfs_trans_dup(ntp); 1312 ntp = xfs_trans_dup(tp);
1624 error = xfs_trans_commit(*tp, 0); 1313 error = xfs_trans_commit(tp, 0);
1625 *tp = ntp; 1314 tp = ntp;
1626 1315
1627 xfs_trans_ijoin(ntp, ip); 1316 xfs_trans_ijoin(tp, ip);
1628 1317
1629 if (error) 1318 if (error)
1630 return error; 1319 goto out;
1320
1631 /* 1321 /*
1632 * transaction commit worked ok so we can drop the extra ticket 1322 * Transaction commit worked ok so we can drop the extra ticket
1633 * reference that we gained in xfs_trans_dup() 1323 * reference that we gained in xfs_trans_dup()
1634 */ 1324 */
1635 xfs_log_ticket_put(ntp->t_ticket); 1325 xfs_log_ticket_put(tp->t_ticket);
1636 error = xfs_trans_reserve(ntp, 0, 1326 error = xfs_trans_reserve(tp, 0,
1637 XFS_ITRUNCATE_LOG_RES(mp), 0, 1327 XFS_ITRUNCATE_LOG_RES(mp), 0,
1638 XFS_TRANS_PERM_LOG_RES, 1328 XFS_TRANS_PERM_LOG_RES,
1639 XFS_ITRUNCATE_LOG_COUNT); 1329 XFS_ITRUNCATE_LOG_COUNT);
1640 if (error) 1330 if (error)
1641 return error; 1331 goto out;
1642 } 1332 }
1333
1334out:
1335 *tpp = tp;
1336 return error;
1337out_bmap_cancel:
1643 /* 1338 /*
1644 * Only update the size in the case of the data fork, but 1339 * If the bunmapi call encounters an error, return to the caller where
1645 * always re-log the inode so that our permanent transaction 1340 * the transaction can be properly aborted. We just need to make sure
1646 * can keep on rolling it forward in the log. 1341 * we're not holding any resources that we were not when we came in.
1647 */ 1342 */
1648 if (fork == XFS_DATA_FORK) { 1343 xfs_bmap_cancel(&free_list);
1649 xfs_isize_check(mp, ip, new_size); 1344 goto out;
1345}
1346
1347int
1348xfs_itruncate_data(
1349 struct xfs_trans **tpp,
1350 struct xfs_inode *ip,
1351 xfs_fsize_t new_size)
1352{
1353 int error;
1354
1355 trace_xfs_itruncate_data_start(ip, new_size);
1356
1357 /*
1358 * The first thing we do is set the size to new_size permanently on
1359 * disk. This way we don't have to worry about anyone ever being able
1360 * to look at the data being freed even in the face of a crash.
1361 * What we're getting around here is the case where we free a block, it
1362 * is allocated to another file, it is written to, and then we crash.
1363 * If the new data gets written to the file but the log buffers
1364 * containing the free and reallocation don't, then we'd end up with
1365 * garbage in the blocks being freed. As long as we make the new_size
1366 * permanent before actually freeing any blocks it doesn't matter if
1367 * they get written to.
1368 */
1369 if (ip->i_d.di_nextents > 0) {
1650 /* 1370 /*
1651 * If we are not changing the file size then do 1371 * If we are not changing the file size then do not update
1652 * not update the on-disk file size - we may be 1372 * the on-disk file size - we may be called from
1653 * called from xfs_inactive_free_eofblocks(). If we 1373 * xfs_inactive_free_eofblocks(). If we update the on-disk
1654 * update the on-disk file size and then the system 1374 * file size and then the system crashes before the contents
1655 * crashes before the contents of the file are 1375 * of the file are flushed to disk then the files may be
1656 * flushed to disk then the files may be full of 1376 * full of holes (ie NULL files bug).
1657 * holes (ie NULL files bug).
1658 */ 1377 */
1659 if (ip->i_size != new_size) { 1378 if (ip->i_size != new_size) {
1660 ip->i_d.di_size = new_size; 1379 ip->i_d.di_size = new_size;
1661 ip->i_size = new_size; 1380 ip->i_size = new_size;
1381 xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
1662 } 1382 }
1663 } 1383 }
1664 xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE); 1384
1665 ASSERT((new_size != 0) || 1385 error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size);
1666 (fork == XFS_ATTR_FORK) || 1386 if (error)
1667 (ip->i_delayed_blks == 0)); 1387 return error;
1668 ASSERT((new_size != 0) || 1388
1669 (fork == XFS_ATTR_FORK) || 1389 /*
1670 (ip->i_d.di_nextents == 0)); 1390 * If we are not changing the file size then do not update the on-disk
1671 trace_xfs_itruncate_finish_end(ip, new_size); 1391 * file size - we may be called from xfs_inactive_free_eofblocks().
1392 * If we update the on-disk file size and then the system crashes
1393 * before the contents of the file are flushed to disk then the files
1394 * may be full of holes (ie NULL files bug).
1395 */
1396 xfs_isize_check(ip, new_size);
1397 if (ip->i_size != new_size) {
1398 ip->i_d.di_size = new_size;
1399 ip->i_size = new_size;
1400 }
1401
1402 ASSERT(new_size != 0 || ip->i_delayed_blks == 0);
1403 ASSERT(new_size != 0 || ip->i_d.di_nextents == 0);
1404
1405 /*
1406 * Always re-log the inode so that our permanent transaction can keep
1407 * on rolling it forward in the log.
1408 */
1409 xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
1410
1411 trace_xfs_itruncate_data_end(ip, new_size);
1672 return 0; 1412 return 0;
1673} 1413}
1674 1414
@@ -1694,7 +1434,6 @@ xfs_iunlink(
1694 1434
1695 ASSERT(ip->i_d.di_nlink == 0); 1435 ASSERT(ip->i_d.di_nlink == 0);
1696 ASSERT(ip->i_d.di_mode != 0); 1436 ASSERT(ip->i_d.di_mode != 0);
1697 ASSERT(ip->i_transp == tp);
1698 1437
1699 mp = tp->t_mountp; 1438 mp = tp->t_mountp;
1700 1439
@@ -1717,7 +1456,7 @@ xfs_iunlink(
1717 ASSERT(agi->agi_unlinked[bucket_index]); 1456 ASSERT(agi->agi_unlinked[bucket_index]);
1718 ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino); 1457 ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
1719 1458
1720 if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) { 1459 if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {
1721 /* 1460 /*
1722 * There is already another inode in the bucket we need 1461 * There is already another inode in the bucket we need
1723 * to add ourselves to. Add us at the front of the list. 1462 * to add ourselves to. Add us at the front of the list.
@@ -1728,8 +1467,7 @@ xfs_iunlink(
1728 if (error) 1467 if (error)
1729 return error; 1468 return error;
1730 1469
1731 ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO); 1470 ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
1732 /* both on-disk, don't endian flip twice */
1733 dip->di_next_unlinked = agi->agi_unlinked[bucket_index]; 1471 dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
1734 offset = ip->i_imap.im_boffset + 1472 offset = ip->i_imap.im_boffset +
1735 offsetof(xfs_dinode_t, di_next_unlinked); 1473 offsetof(xfs_dinode_t, di_next_unlinked);
@@ -1794,7 +1532,7 @@ xfs_iunlink_remove(
1794 agino = XFS_INO_TO_AGINO(mp, ip->i_ino); 1532 agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
1795 ASSERT(agino != 0); 1533 ASSERT(agino != 0);
1796 bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; 1534 bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
1797 ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO); 1535 ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));
1798 ASSERT(agi->agi_unlinked[bucket_index]); 1536 ASSERT(agi->agi_unlinked[bucket_index]);
1799 1537
1800 if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) { 1538 if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
@@ -1959,7 +1697,7 @@ xfs_ifree_cluster(
1959 * stale first, we will not attempt to lock them in the loop 1697 * stale first, we will not attempt to lock them in the loop
1960 * below as the XFS_ISTALE flag will be set. 1698 * below as the XFS_ISTALE flag will be set.
1961 */ 1699 */
1962 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 1700 lip = bp->b_fspriv;
1963 while (lip) { 1701 while (lip) {
1964 if (lip->li_type == XFS_LI_INODE) { 1702 if (lip->li_type == XFS_LI_INODE) {
1965 iip = (xfs_inode_log_item_t *)lip; 1703 iip = (xfs_inode_log_item_t *)lip;
@@ -2086,7 +1824,6 @@ xfs_ifree(
2086 xfs_buf_t *ibp; 1824 xfs_buf_t *ibp;
2087 1825
2088 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 1826 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2089 ASSERT(ip->i_transp == tp);
2090 ASSERT(ip->i_d.di_nlink == 0); 1827 ASSERT(ip->i_d.di_nlink == 0);
2091 ASSERT(ip->i_d.di_nextents == 0); 1828 ASSERT(ip->i_d.di_nextents == 0);
2092 ASSERT(ip->i_d.di_anextents == 0); 1829 ASSERT(ip->i_d.di_anextents == 0);
@@ -2733,7 +2470,7 @@ cluster_corrupt_out:
2733 * mark the buffer as an error and call them. Otherwise 2470 * mark the buffer as an error and call them. Otherwise
2734 * mark it as stale and brelse. 2471 * mark it as stale and brelse.
2735 */ 2472 */
2736 if (XFS_BUF_IODONE_FUNC(bp)) { 2473 if (bp->b_iodone) {
2737 XFS_BUF_UNDONE(bp); 2474 XFS_BUF_UNDONE(bp);
2738 XFS_BUF_STALE(bp); 2475 XFS_BUF_STALE(bp);
2739 XFS_BUF_ERROR(bp,EIO); 2476 XFS_BUF_ERROR(bp,EIO);
@@ -2920,7 +2657,7 @@ xfs_iflush_int(
2920 */ 2657 */
2921 xfs_synchronize_times(ip); 2658 xfs_synchronize_times(ip);
2922 2659
2923 if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC, 2660 if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
2924 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) { 2661 mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
2925 xfs_alert_tag(mp, XFS_PTAG_IFLUSH, 2662 xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
2926 "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p", 2663 "%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
@@ -3073,8 +2810,8 @@ xfs_iflush_int(
3073 */ 2810 */
3074 xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item); 2811 xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
3075 2812
3076 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 2813 ASSERT(bp->b_fspriv != NULL);
3077 ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL); 2814 ASSERT(bp->b_iodone != NULL);
3078 } else { 2815 } else {
3079 /* 2816 /*
3080 * We're flushing an inode which is not in the AIL and has 2817 * We're flushing an inode which is not in the AIL and has
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 964cfea77686..a97644ab945a 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -241,7 +241,6 @@ typedef struct xfs_inode {
241 xfs_ifork_t i_df; /* data fork */ 241 xfs_ifork_t i_df; /* data fork */
242 242
243 /* Transaction and locking information. */ 243 /* Transaction and locking information. */
244 struct xfs_trans *i_transp; /* ptr to owning transaction*/
245 struct xfs_inode_log_item *i_itemp; /* logging information */ 244 struct xfs_inode_log_item *i_itemp; /* logging information */
246 mrlock_t i_lock; /* inode lock */ 245 mrlock_t i_lock; /* inode lock */
247 mrlock_t i_iolock; /* inode IO lock */ 246 mrlock_t i_iolock; /* inode IO lock */
@@ -458,16 +457,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
458extern struct lock_class_key xfs_iolock_reclaimable; 457extern struct lock_class_key xfs_iolock_reclaimable;
459 458
460/* 459/*
461 * Flags for xfs_itruncate_start().
462 */
463#define XFS_ITRUNC_DEFINITE 0x1
464#define XFS_ITRUNC_MAYBE 0x2
465
466#define XFS_ITRUNC_FLAGS \
467 { XFS_ITRUNC_DEFINITE, "DEFINITE" }, \
468 { XFS_ITRUNC_MAYBE, "MAYBE" }
469
470/*
471 * For multiple groups support: if S_ISGID bit is set in the parent 460 * For multiple groups support: if S_ISGID bit is set in the parent
472 * directory, group of new file is set to that of the parent, and 461 * directory, group of new file is set to that of the parent, and
473 * new subdirectory gets S_ISGID bit from parent. 462 * new subdirectory gets S_ISGID bit from parent.
@@ -501,9 +490,10 @@ uint xfs_ip2xflags(struct xfs_inode *);
501uint xfs_dic2xflags(struct xfs_dinode *); 490uint xfs_dic2xflags(struct xfs_dinode *);
502int xfs_ifree(struct xfs_trans *, xfs_inode_t *, 491int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
503 struct xfs_bmap_free *); 492 struct xfs_bmap_free *);
504int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); 493int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
505int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, 494 int, xfs_fsize_t);
506 xfs_fsize_t, int, int); 495int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *,
496 xfs_fsize_t);
507int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); 497int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
508 498
509void xfs_iext_realloc(xfs_inode_t *, int, int); 499void xfs_iext_realloc(xfs_inode_t *, int, int);
@@ -579,13 +569,6 @@ void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
579 569
580#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount)) 570#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
581 571
582#ifdef DEBUG
583void xfs_isize_check(struct xfs_mount *, struct xfs_inode *,
584 xfs_fsize_t);
585#else /* DEBUG */
586#define xfs_isize_check(mp, ip, isize)
587#endif /* DEBUG */
588
589#if defined(DEBUG) 572#if defined(DEBUG)
590void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *); 573void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
591#else 574#else
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index b1e88d56069c..588406dc6a35 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -632,13 +632,8 @@ xfs_inode_item_unlock(
632 struct xfs_inode *ip = iip->ili_inode; 632 struct xfs_inode *ip = iip->ili_inode;
633 unsigned short lock_flags; 633 unsigned short lock_flags;
634 634
635 ASSERT(iip->ili_inode->i_itemp != NULL); 635 ASSERT(ip->i_itemp != NULL);
636 ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL)); 636 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
637
638 /*
639 * Clear the transaction pointer in the inode.
640 */
641 ip->i_transp = NULL;
642 637
643 /* 638 /*
644 * If the inode needed a separate buffer with which to log 639 * If the inode needed a separate buffer with which to log
@@ -664,8 +659,8 @@ xfs_inode_item_unlock(
664 lock_flags = iip->ili_lock_flags; 659 lock_flags = iip->ili_lock_flags;
665 iip->ili_lock_flags = 0; 660 iip->ili_lock_flags = 0;
666 if (lock_flags) { 661 if (lock_flags) {
667 xfs_iunlock(iip->ili_inode, lock_flags); 662 xfs_iunlock(ip, lock_flags);
668 IRELE(iip->ili_inode); 663 IRELE(ip);
669 } 664 }
670} 665}
671 666
@@ -879,7 +874,7 @@ xfs_iflush_done(
879 * Scan the buffer IO completions for other inodes being completed and 874 * Scan the buffer IO completions for other inodes being completed and
880 * attach them to the current inode log item. 875 * attach them to the current inode log item.
881 */ 876 */
882 blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 877 blip = bp->b_fspriv;
883 prev = NULL; 878 prev = NULL;
884 while (blip != NULL) { 879 while (blip != NULL) {
885 if (lip->li_cb != xfs_iflush_done) { 880 if (lip->li_cb != xfs_iflush_done) {
@@ -891,7 +886,7 @@ xfs_iflush_done(
891 /* remove from list */ 886 /* remove from list */
892 next = blip->li_bio_list; 887 next = blip->li_bio_list;
893 if (!prev) { 888 if (!prev) {
894 XFS_BUF_SET_FSPRIVATE(bp, next); 889 bp->b_fspriv = next;
895 } else { 890 } else {
896 prev->li_bio_list = next; 891 prev->li_bio_list = next;
897 } 892 }
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h
index b8e4ee4e89a4..b253c0ea5bec 100644
--- a/fs/xfs/xfs_inum.h
+++ b/fs/xfs/xfs_inum.h
@@ -28,17 +28,6 @@
28 28
29typedef __uint32_t xfs_agino_t; /* within allocation grp inode number */ 29typedef __uint32_t xfs_agino_t; /* within allocation grp inode number */
30 30
31/*
32 * Useful inode bits for this kernel.
33 * Used in some places where having 64-bits in the 32-bit kernels
34 * costs too much.
35 */
36#if XFS_BIG_INUMS
37typedef xfs_ino_t xfs_intino_t;
38#else
39typedef __uint32_t xfs_intino_t;
40#endif
41
42#define NULLFSINO ((xfs_ino_t)-1) 31#define NULLFSINO ((xfs_ino_t)-1)
43#define NULLAGINO ((xfs_agino_t)-1) 32#define NULLAGINO ((xfs_agino_t)-1)
44 33
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 41d5b8f2bf92..06ff8437ed8e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -871,15 +871,9 @@ xlog_space_left(
871void 871void
872xlog_iodone(xfs_buf_t *bp) 872xlog_iodone(xfs_buf_t *bp)
873{ 873{
874 xlog_in_core_t *iclog; 874 xlog_in_core_t *iclog = bp->b_fspriv;
875 xlog_t *l; 875 xlog_t *l = iclog->ic_log;
876 int aborted; 876 int aborted = 0;
877
878 iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
879 ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2);
880 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
881 aborted = 0;
882 l = iclog->ic_log;
883 877
884 /* 878 /*
885 * Race to shutdown the filesystem if we see an error. 879 * Race to shutdown the filesystem if we see an error.
@@ -1056,10 +1050,9 @@ xlog_alloc_log(xfs_mount_t *mp,
1056 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp); 1050 bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
1057 if (!bp) 1051 if (!bp)
1058 goto out_free_log; 1052 goto out_free_log;
1059 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone); 1053 bp->b_iodone = xlog_iodone;
1060 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1061 ASSERT(XFS_BUF_ISBUSY(bp)); 1054 ASSERT(XFS_BUF_ISBUSY(bp));
1062 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 1055 ASSERT(xfs_buf_islocked(bp));
1063 log->l_xbuf = bp; 1056 log->l_xbuf = bp;
1064 1057
1065 spin_lock_init(&log->l_icloglock); 1058 spin_lock_init(&log->l_icloglock);
@@ -1090,10 +1083,8 @@ xlog_alloc_log(xfs_mount_t *mp,
1090 log->l_iclog_size, 0); 1083 log->l_iclog_size, 0);
1091 if (!bp) 1084 if (!bp)
1092 goto out_free_iclog; 1085 goto out_free_iclog;
1093 if (!XFS_BUF_CPSEMA(bp)) 1086
1094 ASSERT(0); 1087 bp->b_iodone = xlog_iodone;
1095 XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
1096 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
1097 iclog->ic_bp = bp; 1088 iclog->ic_bp = bp;
1098 iclog->ic_data = bp->b_addr; 1089 iclog->ic_data = bp->b_addr;
1099#ifdef DEBUG 1090#ifdef DEBUG
@@ -1118,7 +1109,7 @@ xlog_alloc_log(xfs_mount_t *mp,
1118 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; 1109 iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
1119 1110
1120 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); 1111 ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
1121 ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); 1112 ASSERT(xfs_buf_islocked(iclog->ic_bp));
1122 init_waitqueue_head(&iclog->ic_force_wait); 1113 init_waitqueue_head(&iclog->ic_force_wait);
1123 init_waitqueue_head(&iclog->ic_write_wait); 1114 init_waitqueue_head(&iclog->ic_write_wait);
1124 1115
@@ -1254,9 +1245,8 @@ STATIC int
1254xlog_bdstrat( 1245xlog_bdstrat(
1255 struct xfs_buf *bp) 1246 struct xfs_buf *bp)
1256{ 1247{
1257 struct xlog_in_core *iclog; 1248 struct xlog_in_core *iclog = bp->b_fspriv;
1258 1249
1259 iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
1260 if (iclog->ic_state & XLOG_STATE_IOERROR) { 1250 if (iclog->ic_state & XLOG_STATE_IOERROR) {
1261 XFS_BUF_ERROR(bp, EIO); 1251 XFS_BUF_ERROR(bp, EIO);
1262 XFS_BUF_STALE(bp); 1252 XFS_BUF_STALE(bp);
@@ -1269,7 +1259,6 @@ xlog_bdstrat(
1269 return 0; 1259 return 0;
1270 } 1260 }
1271 1261
1272 bp->b_flags |= _XBF_RUN_QUEUES;
1273 xfs_buf_iorequest(bp); 1262 xfs_buf_iorequest(bp);
1274 return 0; 1263 return 0;
1275} 1264}
@@ -1351,8 +1340,6 @@ xlog_sync(xlog_t *log,
1351 } 1340 }
1352 1341
1353 bp = iclog->ic_bp; 1342 bp = iclog->ic_bp;
1354 ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1);
1355 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
1356 XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); 1343 XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
1357 1344
1358 XFS_STATS_ADD(xs_log_blocks, BTOBB(count)); 1345 XFS_STATS_ADD(xs_log_blocks, BTOBB(count));
@@ -1366,22 +1353,28 @@ xlog_sync(xlog_t *log,
1366 iclog->ic_bwritecnt = 1; 1353 iclog->ic_bwritecnt = 1;
1367 } 1354 }
1368 XFS_BUF_SET_COUNT(bp, count); 1355 XFS_BUF_SET_COUNT(bp, count);
1369 XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */ 1356 bp->b_fspriv = iclog;
1370 XFS_BUF_ZEROFLAGS(bp); 1357 XFS_BUF_ZEROFLAGS(bp);
1371 XFS_BUF_BUSY(bp); 1358 XFS_BUF_BUSY(bp);
1372 XFS_BUF_ASYNC(bp); 1359 XFS_BUF_ASYNC(bp);
1373 bp->b_flags |= XBF_LOG_BUFFER; 1360 bp->b_flags |= XBF_SYNCIO;
1374 1361
1375 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) { 1362 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
1363 bp->b_flags |= XBF_FUA;
1364
1376 /* 1365 /*
1377 * If we have an external log device, flush the data device 1366 * Flush the data device before flushing the log to make
1378 * before flushing the log to make sure all meta data 1367 * sure all meta data written back from the AIL actually made
1379 * written back from the AIL actually made it to disk 1368 * it to disk before stamping the new log tail LSN into the
1380 * before writing out the new log tail LSN in the log buffer. 1369 * log buffer. For an external log we need to issue the
1370 * flush explicitly, and unfortunately synchronously here;
1371 * for an internal log we can simply use the block layer
1372 * state machine for preflushes.
1381 */ 1373 */
1382 if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp) 1374 if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
1383 xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp); 1375 xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
1384 XFS_BUF_ORDERED(bp); 1376 else
1377 bp->b_flags |= XBF_FLUSH;
1385 } 1378 }
1386 1379
1387 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); 1380 ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
@@ -1404,19 +1397,16 @@ xlog_sync(xlog_t *log,
1404 } 1397 }
1405 if (split) { 1398 if (split) {
1406 bp = iclog->ic_log->l_xbuf; 1399 bp = iclog->ic_log->l_xbuf;
1407 ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) ==
1408 (unsigned long)1);
1409 XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
1410 XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */ 1400 XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */
1411 XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+ 1401 XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+
1412 (__psint_t)count), split); 1402 (__psint_t)count), split);
1413 XFS_BUF_SET_FSPRIVATE(bp, iclog); 1403 bp->b_fspriv = iclog;
1414 XFS_BUF_ZEROFLAGS(bp); 1404 XFS_BUF_ZEROFLAGS(bp);
1415 XFS_BUF_BUSY(bp); 1405 XFS_BUF_BUSY(bp);
1416 XFS_BUF_ASYNC(bp); 1406 XFS_BUF_ASYNC(bp);
1417 bp->b_flags |= XBF_LOG_BUFFER; 1407 bp->b_flags |= XBF_SYNCIO;
1418 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) 1408 if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
1419 XFS_BUF_ORDERED(bp); 1409 bp->b_flags |= XBF_FUA;
1420 dptr = XFS_BUF_PTR(bp); 1410 dptr = XFS_BUF_PTR(bp);
1421 /* 1411 /*
1422 * Bump the cycle numbers at the start of each block 1412 * Bump the cycle numbers at the start of each block
@@ -3521,13 +3511,13 @@ xlog_verify_iclog(xlog_t *log,
3521 spin_unlock(&log->l_icloglock); 3511 spin_unlock(&log->l_icloglock);
3522 3512
3523 /* check log magic numbers */ 3513 /* check log magic numbers */
3524 if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM) 3514 if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
3525 xfs_emerg(log->l_mp, "%s: invalid magic num", __func__); 3515 xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
3526 3516
3527 ptr = (xfs_caddr_t) &iclog->ic_header; 3517 ptr = (xfs_caddr_t) &iclog->ic_header;
3528 for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count; 3518 for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count;
3529 ptr += BBSIZE) { 3519 ptr += BBSIZE) {
3530 if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM) 3520 if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
3531 xfs_emerg(log->l_mp, "%s: unexpected magic num", 3521 xfs_emerg(log->l_mp, "%s: unexpected magic num",
3532 __func__); 3522 __func__);
3533 } 3523 }
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 04142caedb2b..8fe4206de057 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -91,6 +91,8 @@ xlog_get_bp(
91 xlog_t *log, 91 xlog_t *log,
92 int nbblks) 92 int nbblks)
93{ 93{
94 struct xfs_buf *bp;
95
94 if (!xlog_buf_bbcount_valid(log, nbblks)) { 96 if (!xlog_buf_bbcount_valid(log, nbblks)) {
95 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", 97 xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
96 nbblks); 98 nbblks);
@@ -118,8 +120,10 @@ xlog_get_bp(
118 nbblks += log->l_sectBBsize; 120 nbblks += log->l_sectBBsize;
119 nbblks = round_up(nbblks, log->l_sectBBsize); 121 nbblks = round_up(nbblks, log->l_sectBBsize);
120 122
121 return xfs_buf_get_uncached(log->l_mp->m_logdev_targp, 123 bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, BBTOB(nbblks), 0);
122 BBTOB(nbblks), 0); 124 if (bp)
125 xfs_buf_unlock(bp);
126 return bp;
123} 127}
124 128
125STATIC void 129STATIC void
@@ -264,7 +268,7 @@ xlog_bwrite(
264 XFS_BUF_ZEROFLAGS(bp); 268 XFS_BUF_ZEROFLAGS(bp);
265 XFS_BUF_BUSY(bp); 269 XFS_BUF_BUSY(bp);
266 XFS_BUF_HOLD(bp); 270 XFS_BUF_HOLD(bp);
267 XFS_BUF_PSEMA(bp, PRIBIO); 271 xfs_buf_lock(bp);
268 XFS_BUF_SET_COUNT(bp, BBTOB(nbblks)); 272 XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
269 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); 273 XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
270 274
@@ -300,14 +304,14 @@ xlog_header_check_recover(
300 xfs_mount_t *mp, 304 xfs_mount_t *mp,
301 xlog_rec_header_t *head) 305 xlog_rec_header_t *head)
302{ 306{
303 ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); 307 ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
304 308
305 /* 309 /*
306 * IRIX doesn't write the h_fmt field and leaves it zeroed 310 * IRIX doesn't write the h_fmt field and leaves it zeroed
307 * (XLOG_FMT_UNKNOWN). This stops us from trying to recover 311 * (XLOG_FMT_UNKNOWN). This stops us from trying to recover
308 * a dirty log created in IRIX. 312 * a dirty log created in IRIX.
309 */ 313 */
310 if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) { 314 if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) {
311 xfs_warn(mp, 315 xfs_warn(mp,
312 "dirty log written in incompatible format - can't recover"); 316 "dirty log written in incompatible format - can't recover");
313 xlog_header_check_dump(mp, head); 317 xlog_header_check_dump(mp, head);
@@ -333,7 +337,7 @@ xlog_header_check_mount(
333 xfs_mount_t *mp, 337 xfs_mount_t *mp,
334 xlog_rec_header_t *head) 338 xlog_rec_header_t *head)
335{ 339{
336 ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM); 340 ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
337 341
338 if (uuid_is_nil(&head->h_fs_uuid)) { 342 if (uuid_is_nil(&head->h_fs_uuid)) {
339 /* 343 /*
@@ -367,7 +371,7 @@ xlog_recover_iodone(
367 xfs_force_shutdown(bp->b_target->bt_mount, 371 xfs_force_shutdown(bp->b_target->bt_mount,
368 SHUTDOWN_META_IO_ERROR); 372 SHUTDOWN_META_IO_ERROR);
369 } 373 }
370 XFS_BUF_CLR_IODONE_FUNC(bp); 374 bp->b_iodone = NULL;
371 xfs_buf_ioend(bp, 0); 375 xfs_buf_ioend(bp, 0);
372} 376}
373 377
@@ -534,7 +538,7 @@ xlog_find_verify_log_record(
534 538
535 head = (xlog_rec_header_t *)offset; 539 head = (xlog_rec_header_t *)offset;
536 540
537 if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno)) 541 if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
538 break; 542 break;
539 543
540 if (!smallmem) 544 if (!smallmem)
@@ -916,7 +920,7 @@ xlog_find_tail(
916 if (error) 920 if (error)
917 goto done; 921 goto done;
918 922
919 if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) { 923 if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
920 found = 1; 924 found = 1;
921 break; 925 break;
922 } 926 }
@@ -933,8 +937,8 @@ xlog_find_tail(
933 if (error) 937 if (error)
934 goto done; 938 goto done;
935 939
936 if (XLOG_HEADER_MAGIC_NUM == 940 if (*(__be32 *)offset ==
937 be32_to_cpu(*(__be32 *)offset)) { 941 cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
938 found = 2; 942 found = 2;
939 break; 943 break;
940 } 944 }
@@ -1947,7 +1951,7 @@ xfs_qm_dqcheck(
1947 * This is all fine; things are still consistent, and we haven't lost 1951 * This is all fine; things are still consistent, and we haven't lost
1948 * any quota information. Just don't complain about bad dquot blks. 1952 * any quota information. Just don't complain about bad dquot blks.
1949 */ 1953 */
1950 if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) { 1954 if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) {
1951 if (flags & XFS_QMOPT_DOWARN) 1955 if (flags & XFS_QMOPT_DOWARN)
1952 xfs_alert(mp, 1956 xfs_alert(mp,
1953 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", 1957 "%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
@@ -2174,7 +2178,7 @@ xlog_recover_buffer_pass2(
2174 error = xfs_bwrite(mp, bp); 2178 error = xfs_bwrite(mp, bp);
2175 } else { 2179 } else {
2176 ASSERT(bp->b_target->bt_mount == mp); 2180 ASSERT(bp->b_target->bt_mount == mp);
2177 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2181 bp->b_iodone = xlog_recover_iodone;
2178 xfs_bdwrite(mp, bp); 2182 xfs_bdwrite(mp, bp);
2179 } 2183 }
2180 2184
@@ -2238,7 +2242,7 @@ xlog_recover_inode_pass2(
2238 * Make sure the place we're flushing out to really looks 2242 * Make sure the place we're flushing out to really looks
2239 * like an inode! 2243 * like an inode!
2240 */ 2244 */
2241 if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) { 2245 if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) {
2242 xfs_buf_relse(bp); 2246 xfs_buf_relse(bp);
2243 xfs_alert(mp, 2247 xfs_alert(mp,
2244 "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld", 2248 "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld",
@@ -2434,7 +2438,7 @@ xlog_recover_inode_pass2(
2434 2438
2435write_inode_buffer: 2439write_inode_buffer:
2436 ASSERT(bp->b_target->bt_mount == mp); 2440 ASSERT(bp->b_target->bt_mount == mp);
2437 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2441 bp->b_iodone = xlog_recover_iodone;
2438 xfs_bdwrite(mp, bp); 2442 xfs_bdwrite(mp, bp);
2439error: 2443error:
2440 if (need_free) 2444 if (need_free)
@@ -2556,7 +2560,7 @@ xlog_recover_dquot_pass2(
2556 2560
2557 ASSERT(dq_f->qlf_size == 2); 2561 ASSERT(dq_f->qlf_size == 2);
2558 ASSERT(bp->b_target->bt_mount == mp); 2562 ASSERT(bp->b_target->bt_mount == mp);
2559 XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); 2563 bp->b_iodone = xlog_recover_iodone;
2560 xfs_bdwrite(mp, bp); 2564 xfs_bdwrite(mp, bp);
2561 2565
2562 return (0); 2566 return (0);
@@ -3295,7 +3299,7 @@ xlog_valid_rec_header(
3295{ 3299{
3296 int hlen; 3300 int hlen;
3297 3301
3298 if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) { 3302 if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) {
3299 XFS_ERROR_REPORT("xlog_valid_rec_header(1)", 3303 XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
3300 XFS_ERRLEVEL_LOW, log->l_mp); 3304 XFS_ERRLEVEL_LOW, log->l_mp);
3301 return XFS_ERROR(EFSCORRUPTED); 3305 return XFS_ERROR(EFSCORRUPTED);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b49b82363d20..7f25245da289 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -348,7 +348,7 @@ xfs_mount_validate_sb(
348 } 348 }
349 349
350 /* 350 /*
351 * More sanity checking. These were stolen directly from 351 * More sanity checking. Most of these were stolen directly from
352 * xfs_repair. 352 * xfs_repair.
353 */ 353 */
354 if (unlikely( 354 if (unlikely(
@@ -371,23 +371,13 @@ xfs_mount_validate_sb(
371 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || 371 (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
372 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || 372 (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
373 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || 373 (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
374 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) { 374 (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) ||
375 sbp->sb_dblocks == 0 ||
376 sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) ||
377 sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) {
375 if (loud) 378 if (loud)
376 xfs_warn(mp, "SB sanity check 1 failed"); 379 XFS_CORRUPTION_ERROR("SB sanity check failed",
377 return XFS_ERROR(EFSCORRUPTED); 380 XFS_ERRLEVEL_LOW, mp, sbp);
378 }
379
380 /*
381 * Sanity check AG count, size fields against data size field
382 */
383 if (unlikely(
384 sbp->sb_dblocks == 0 ||
385 sbp->sb_dblocks >
386 (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks ||
387 sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
388 sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
389 if (loud)
390 xfs_warn(mp, "SB sanity check 2 failed");
391 return XFS_ERROR(EFSCORRUPTED); 381 return XFS_ERROR(EFSCORRUPTED);
392 } 382 }
393 383
@@ -864,7 +854,8 @@ xfs_update_alignment(xfs_mount_t *mp)
864 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) || 854 if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
865 (BBTOB(mp->m_swidth) & mp->m_blockmask)) { 855 (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
866 if (mp->m_flags & XFS_MOUNT_RETERR) { 856 if (mp->m_flags & XFS_MOUNT_RETERR) {
867 xfs_warn(mp, "alignment check 1 failed"); 857 xfs_warn(mp, "alignment check failed: "
858 "(sunit/swidth vs. blocksize)");
868 return XFS_ERROR(EINVAL); 859 return XFS_ERROR(EINVAL);
869 } 860 }
870 mp->m_dalign = mp->m_swidth = 0; 861 mp->m_dalign = mp->m_swidth = 0;
@@ -875,6 +866,8 @@ xfs_update_alignment(xfs_mount_t *mp)
875 mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign); 866 mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
876 if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) { 867 if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
877 if (mp->m_flags & XFS_MOUNT_RETERR) { 868 if (mp->m_flags & XFS_MOUNT_RETERR) {
869 xfs_warn(mp, "alignment check failed: "
870 "(sunit/swidth vs. ag size)");
878 return XFS_ERROR(EINVAL); 871 return XFS_ERROR(EINVAL);
879 } 872 }
880 xfs_warn(mp, 873 xfs_warn(mp,
@@ -889,8 +882,8 @@ xfs_update_alignment(xfs_mount_t *mp)
889 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth); 882 mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
890 } else { 883 } else {
891 if (mp->m_flags & XFS_MOUNT_RETERR) { 884 if (mp->m_flags & XFS_MOUNT_RETERR) {
892 xfs_warn(mp, 885 xfs_warn(mp, "alignment check failed: "
893 "stripe alignment turned off: sunit(%d) less than bsize(%d)", 886 "sunit(%d) less than bsize(%d)",
894 mp->m_dalign, 887 mp->m_dalign,
895 mp->m_blockmask +1); 888 mp->m_blockmask +1);
896 return XFS_ERROR(EINVAL); 889 return XFS_ERROR(EINVAL);
@@ -1096,10 +1089,6 @@ xfs_mount_reset_sbqflags(
1096 if (mp->m_flags & XFS_MOUNT_RDONLY) 1089 if (mp->m_flags & XFS_MOUNT_RDONLY)
1097 return 0; 1090 return 0;
1098 1091
1099#ifdef QUOTADEBUG
1100 xfs_notice(mp, "Writing superblock quota changes");
1101#endif
1102
1103 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE); 1092 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
1104 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, 1093 error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
1105 XFS_DEFAULT_LOG_COUNT); 1094 XFS_DEFAULT_LOG_COUNT);
@@ -1532,7 +1521,7 @@ xfs_unmountfs(
1532 xfs_warn(mp, "Unable to free reserved block pool. " 1521 xfs_warn(mp, "Unable to free reserved block pool. "
1533 "Freespace may not be correct on next mount."); 1522 "Freespace may not be correct on next mount.");
1534 1523
1535 error = xfs_log_sbcount(mp, 1); 1524 error = xfs_log_sbcount(mp);
1536 if (error) 1525 if (error)
1537 xfs_warn(mp, "Unable to update superblock counters. " 1526 xfs_warn(mp, "Unable to update superblock counters. "
1538 "Freespace may not be correct on next mount."); 1527 "Freespace may not be correct on next mount.");
@@ -1568,18 +1557,14 @@ xfs_fs_writable(xfs_mount_t *mp)
1568/* 1557/*
1569 * xfs_log_sbcount 1558 * xfs_log_sbcount
1570 * 1559 *
1571 * Called either periodically to keep the on disk superblock values 1560 * Sync the superblock counters to disk.
1572 * roughly up to date or from unmount to make sure the values are
1573 * correct on a clean unmount.
1574 * 1561 *
1575 * Note this code can be called during the process of freezing, so 1562 * Note this code can be called during the process of freezing, so
1576 * we may need to use the transaction allocator which does not not 1563 * we may need to use the transaction allocator which does not
1577 * block when the transaction subsystem is in its frozen state. 1564 * block when the transaction subsystem is in its frozen state.
1578 */ 1565 */
1579int 1566int
1580xfs_log_sbcount( 1567xfs_log_sbcount(xfs_mount_t *mp)
1581 xfs_mount_t *mp,
1582 uint sync)
1583{ 1568{
1584 xfs_trans_t *tp; 1569 xfs_trans_t *tp;
1585 int error; 1570 int error;
@@ -1605,8 +1590,7 @@ xfs_log_sbcount(
1605 } 1590 }
1606 1591
1607 xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS); 1592 xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
1608 if (sync) 1593 xfs_trans_set_sync(tp);
1609 xfs_trans_set_sync(tp);
1610 error = xfs_trans_commit(tp, 0); 1594 error = xfs_trans_commit(tp, 0);
1611 return error; 1595 return error;
1612} 1596}
@@ -1941,22 +1925,19 @@ unwind:
1941 * the superblock buffer if it can be locked without sleeping. 1925 * the superblock buffer if it can be locked without sleeping.
1942 * If it can't then we'll return NULL. 1926 * If it can't then we'll return NULL.
1943 */ 1927 */
1944xfs_buf_t * 1928struct xfs_buf *
1945xfs_getsb( 1929xfs_getsb(
1946 xfs_mount_t *mp, 1930 struct xfs_mount *mp,
1947 int flags) 1931 int flags)
1948{ 1932{
1949 xfs_buf_t *bp; 1933 struct xfs_buf *bp = mp->m_sb_bp;
1950 1934
1951 ASSERT(mp->m_sb_bp != NULL); 1935 if (!xfs_buf_trylock(bp)) {
1952 bp = mp->m_sb_bp; 1936 if (flags & XBF_TRYLOCK)
1953 if (flags & XBF_TRYLOCK) {
1954 if (!XFS_BUF_CPSEMA(bp)) {
1955 return NULL; 1937 return NULL;
1956 } 1938 xfs_buf_lock(bp);
1957 } else {
1958 XFS_BUF_PSEMA(bp, PRIBIO);
1959 } 1939 }
1940
1960 XFS_BUF_HOLD(bp); 1941 XFS_BUF_HOLD(bp);
1961 ASSERT(XFS_BUF_ISDONE(bp)); 1942 ASSERT(XFS_BUF_ISDONE(bp));
1962 return bp; 1943 return bp;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 3d68bb267c5f..bb24dac42a25 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -371,7 +371,7 @@ typedef struct xfs_mod_sb {
371 int64_t msb_delta; /* Change to make to specified field */ 371 int64_t msb_delta; /* Change to make to specified field */
372} xfs_mod_sb_t; 372} xfs_mod_sb_t;
373 373
374extern int xfs_log_sbcount(xfs_mount_t *, uint); 374extern int xfs_log_sbcount(xfs_mount_t *);
375extern __uint64_t xfs_default_resblks(xfs_mount_t *mp); 375extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
376extern int xfs_mountfs(xfs_mount_t *mp); 376extern int xfs_mountfs(xfs_mount_t *mp);
377 377
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c83f63b33aae..efc147f0e9b6 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1426,6 +1426,7 @@ xfs_trans_committed(
1426static inline void 1426static inline void
1427xfs_log_item_batch_insert( 1427xfs_log_item_batch_insert(
1428 struct xfs_ail *ailp, 1428 struct xfs_ail *ailp,
1429 struct xfs_ail_cursor *cur,
1429 struct xfs_log_item **log_items, 1430 struct xfs_log_item **log_items,
1430 int nr_items, 1431 int nr_items,
1431 xfs_lsn_t commit_lsn) 1432 xfs_lsn_t commit_lsn)
@@ -1434,7 +1435,7 @@ xfs_log_item_batch_insert(
1434 1435
1435 spin_lock(&ailp->xa_lock); 1436 spin_lock(&ailp->xa_lock);
1436 /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ 1437 /* xfs_trans_ail_update_bulk drops ailp->xa_lock */
1437 xfs_trans_ail_update_bulk(ailp, log_items, nr_items, commit_lsn); 1438 xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
1438 1439
1439 for (i = 0; i < nr_items; i++) 1440 for (i = 0; i < nr_items; i++)
1440 IOP_UNPIN(log_items[i], 0); 1441 IOP_UNPIN(log_items[i], 0);
@@ -1452,6 +1453,13 @@ xfs_log_item_batch_insert(
1452 * as an iclog write error even though we haven't started any IO yet. Hence in 1453 * as an iclog write error even though we haven't started any IO yet. Hence in
1453 * this case all we need to do is IOP_COMMITTED processing, followed by an 1454 * this case all we need to do is IOP_COMMITTED processing, followed by an
1454 * IOP_UNPIN(aborted) call. 1455 * IOP_UNPIN(aborted) call.
1456 *
1457 * The AIL cursor is used to optimise the insert process. If commit_lsn is not
1458 * at the end of the AIL, the insert cursor avoids the need to walk
1459 * the AIL to find the insertion point on every xfs_log_item_batch_insert()
1460 * call. This saves a lot of needless list walking and is a net win, even
1461 * though it slightly increases that amount of AIL lock traffic to set it up
1462 * and tear it down.
1455 */ 1463 */
1456void 1464void
1457xfs_trans_committed_bulk( 1465xfs_trans_committed_bulk(
@@ -1463,8 +1471,13 @@ xfs_trans_committed_bulk(
1463#define LOG_ITEM_BATCH_SIZE 32 1471#define LOG_ITEM_BATCH_SIZE 32
1464 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; 1472 struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
1465 struct xfs_log_vec *lv; 1473 struct xfs_log_vec *lv;
1474 struct xfs_ail_cursor cur;
1466 int i = 0; 1475 int i = 0;
1467 1476
1477 spin_lock(&ailp->xa_lock);
1478 xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
1479 spin_unlock(&ailp->xa_lock);
1480
1468 /* unpin all the log items */ 1481 /* unpin all the log items */
1469 for (lv = log_vector; lv; lv = lv->lv_next ) { 1482 for (lv = log_vector; lv; lv = lv->lv_next ) {
1470 struct xfs_log_item *lip = lv->lv_item; 1483 struct xfs_log_item *lip = lv->lv_item;
@@ -1493,7 +1506,9 @@ xfs_trans_committed_bulk(
1493 /* 1506 /*
1494 * Not a bulk update option due to unusual item_lsn. 1507 * Not a bulk update option due to unusual item_lsn.
1495 * Push into AIL immediately, rechecking the lsn once 1508 * Push into AIL immediately, rechecking the lsn once
1496 * we have the ail lock. Then unpin the item. 1509 * we have the ail lock. Then unpin the item. This does
1510 * not affect the AIL cursor the bulk insert path is
1511 * using.
1497 */ 1512 */
1498 spin_lock(&ailp->xa_lock); 1513 spin_lock(&ailp->xa_lock);
1499 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) 1514 if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
@@ -1507,7 +1522,7 @@ xfs_trans_committed_bulk(
1507 /* Item is a candidate for bulk AIL insert. */ 1522 /* Item is a candidate for bulk AIL insert. */
1508 log_items[i++] = lv->lv_item; 1523 log_items[i++] = lv->lv_item;
1509 if (i >= LOG_ITEM_BATCH_SIZE) { 1524 if (i >= LOG_ITEM_BATCH_SIZE) {
1510 xfs_log_item_batch_insert(ailp, log_items, 1525 xfs_log_item_batch_insert(ailp, &cur, log_items,
1511 LOG_ITEM_BATCH_SIZE, commit_lsn); 1526 LOG_ITEM_BATCH_SIZE, commit_lsn);
1512 i = 0; 1527 i = 0;
1513 } 1528 }
@@ -1515,7 +1530,11 @@ xfs_trans_committed_bulk(
1515 1530
1516 /* make sure we insert the remainder! */ 1531 /* make sure we insert the remainder! */
1517 if (i) 1532 if (i)
1518 xfs_log_item_batch_insert(ailp, log_items, i, commit_lsn); 1533 xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
1534
1535 spin_lock(&ailp->xa_lock);
1536 xfs_trans_ail_cursor_done(ailp, &cur);
1537 spin_unlock(&ailp->xa_lock);
1519} 1538}
1520 1539
1521/* 1540/*
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 5fc2380092c8..43233e92f0f6 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -163,17 +163,11 @@ xfs_ail_max_lsn(
163} 163}
164 164
165/* 165/*
166 * AIL traversal cursor initialisation. 166 * The cursor keeps track of where our current traversal is up to by tracking
167 * 167 * the next item in the list for us. However, for this to be safe, removing an
168 * The cursor keeps track of where our current traversal is up 168 * object from the AIL needs to invalidate any cursor that points to it. hence
169 * to by tracking the next ƣtem in the list for us. However, for 169 * the traversal cursor needs to be linked to the struct xfs_ail so that
170 * this to be safe, removing an object from the AIL needs to invalidate 170 * deletion can search all the active cursors for invalidation.
171 * any cursor that points to it. hence the traversal cursor needs to
172 * be linked to the struct xfs_ail so that deletion can search all the
173 * active cursors for invalidation.
174 *
175 * We don't link the push cursor because it is embedded in the struct
176 * xfs_ail and hence easily findable.
177 */ 171 */
178STATIC void 172STATIC void
179xfs_trans_ail_cursor_init( 173xfs_trans_ail_cursor_init(
@@ -181,31 +175,12 @@ xfs_trans_ail_cursor_init(
181 struct xfs_ail_cursor *cur) 175 struct xfs_ail_cursor *cur)
182{ 176{
183 cur->item = NULL; 177 cur->item = NULL;
184 if (cur == &ailp->xa_cursors) 178 list_add_tail(&cur->list, &ailp->xa_cursors);
185 return;
186
187 cur->next = ailp->xa_cursors.next;
188 ailp->xa_cursors.next = cur;
189}
190
191/*
192 * Set the cursor to the next item, because when we look
193 * up the cursor the current item may have been freed.
194 */
195STATIC void
196xfs_trans_ail_cursor_set(
197 struct xfs_ail *ailp,
198 struct xfs_ail_cursor *cur,
199 struct xfs_log_item *lip)
200{
201 if (lip)
202 cur->item = xfs_ail_next(ailp, lip);
203} 179}
204 180
205/* 181/*
206 * Get the next item in the traversal and advance the cursor. 182 * Get the next item in the traversal and advance the cursor. If the cursor
207 * If the cursor was invalidated (inidicated by a lip of 1), 183 * was invalidated (indicated by a lip of 1), restart the traversal.
208 * restart the traversal.
209 */ 184 */
210struct xfs_log_item * 185struct xfs_log_item *
211xfs_trans_ail_cursor_next( 186xfs_trans_ail_cursor_next(
@@ -216,45 +191,31 @@ xfs_trans_ail_cursor_next(
216 191
217 if ((__psint_t)lip & 1) 192 if ((__psint_t)lip & 1)
218 lip = xfs_ail_min(ailp); 193 lip = xfs_ail_min(ailp);
219 xfs_trans_ail_cursor_set(ailp, cur, lip); 194 if (lip)
195 cur->item = xfs_ail_next(ailp, lip);
220 return lip; 196 return lip;
221} 197}
222 198
223/* 199/*
224 * Now that the traversal is complete, we need to remove the cursor 200 * When the traversal is complete, we need to remove the cursor from the list
225 * from the list of traversing cursors. Avoid removing the embedded 201 * of traversing cursors.
226 * push cursor, but use the fact it is always present to make the
227 * list deletion simple.
228 */ 202 */
229void 203void
230xfs_trans_ail_cursor_done( 204xfs_trans_ail_cursor_done(
231 struct xfs_ail *ailp, 205 struct xfs_ail *ailp,
232 struct xfs_ail_cursor *done) 206 struct xfs_ail_cursor *cur)
233{ 207{
234 struct xfs_ail_cursor *prev = NULL; 208 cur->item = NULL;
235 struct xfs_ail_cursor *cur; 209 list_del_init(&cur->list);
236
237 done->item = NULL;
238 if (done == &ailp->xa_cursors)
239 return;
240 prev = &ailp->xa_cursors;
241 for (cur = prev->next; cur; prev = cur, cur = prev->next) {
242 if (cur == done) {
243 prev->next = cur->next;
244 break;
245 }
246 }
247 ASSERT(cur);
248} 210}
249 211
250/* 212/*
251 * Invalidate any cursor that is pointing to this item. This is 213 * Invalidate any cursor that is pointing to this item. This is called when an
252 * called when an item is removed from the AIL. Any cursor pointing 214 * item is removed from the AIL. Any cursor pointing to this object is now
253 * to this object is now invalid and the traversal needs to be 215 * invalid and the traversal needs to be terminated so it doesn't reference a
254 * terminated so it doesn't reference a freed object. We set the 216 * freed object. We set the low bit of the cursor item pointer so we can
255 * cursor item to a value of 1 so we can distinguish between an 217 * distinguish between an invalidation and the end of the list when getting the
256 * invalidation and the end of the list when getting the next item 218 * next item from the cursor.
257 * from the cursor.
258 */ 219 */
259STATIC void 220STATIC void
260xfs_trans_ail_cursor_clear( 221xfs_trans_ail_cursor_clear(
@@ -263,8 +224,7 @@ xfs_trans_ail_cursor_clear(
263{ 224{
264 struct xfs_ail_cursor *cur; 225 struct xfs_ail_cursor *cur;
265 226
266 /* need to search all cursors */ 227 list_for_each_entry(cur, &ailp->xa_cursors, list) {
267 for (cur = &ailp->xa_cursors; cur; cur = cur->next) {
268 if (cur->item == lip) 228 if (cur->item == lip)
269 cur->item = (struct xfs_log_item *) 229 cur->item = (struct xfs_log_item *)
270 ((__psint_t)cur->item | 1); 230 ((__psint_t)cur->item | 1);
@@ -272,9 +232,10 @@ xfs_trans_ail_cursor_clear(
272} 232}
273 233
274/* 234/*
275 * Return the item in the AIL with the current lsn. 235 * Find the first item in the AIL with the given @lsn by searching in ascending
276 * Return the current tree generation number for use 236 * LSN order and initialise the cursor to point to the next item for a
277 * in calls to xfs_trans_next_ail(). 237 * ascending traversal. Pass a @lsn of zero to initialise the cursor to the
238 * first item in the AIL. Returns NULL if the list is empty.
278 */ 239 */
279xfs_log_item_t * 240xfs_log_item_t *
280xfs_trans_ail_cursor_first( 241xfs_trans_ail_cursor_first(
@@ -285,46 +246,112 @@ xfs_trans_ail_cursor_first(
285 xfs_log_item_t *lip; 246 xfs_log_item_t *lip;
286 247
287 xfs_trans_ail_cursor_init(ailp, cur); 248 xfs_trans_ail_cursor_init(ailp, cur);
288 lip = xfs_ail_min(ailp); 249
289 if (lsn == 0) 250 if (lsn == 0) {
251 lip = xfs_ail_min(ailp);
290 goto out; 252 goto out;
253 }
291 254
292 list_for_each_entry(lip, &ailp->xa_ail, li_ail) { 255 list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
293 if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0) 256 if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
294 goto out; 257 goto out;
295 } 258 }
296 lip = NULL; 259 return NULL;
260
297out: 261out:
298 xfs_trans_ail_cursor_set(ailp, cur, lip); 262 if (lip)
263 cur->item = xfs_ail_next(ailp, lip);
299 return lip; 264 return lip;
300} 265}
301 266
267static struct xfs_log_item *
268__xfs_trans_ail_cursor_last(
269 struct xfs_ail *ailp,
270 xfs_lsn_t lsn)
271{
272 xfs_log_item_t *lip;
273
274 list_for_each_entry_reverse(lip, &ailp->xa_ail, li_ail) {
275 if (XFS_LSN_CMP(lip->li_lsn, lsn) <= 0)
276 return lip;
277 }
278 return NULL;
279}
280
281/*
282 * Find the last item in the AIL with the given @lsn by searching in descending
283 * LSN order and initialise the cursor to point to that item. If there is no
284 * item with the value of @lsn, then it sets the cursor to the last item with an
285 * LSN lower than @lsn. Returns NULL if the list is empty.
286 */
287struct xfs_log_item *
288xfs_trans_ail_cursor_last(
289 struct xfs_ail *ailp,
290 struct xfs_ail_cursor *cur,
291 xfs_lsn_t lsn)
292{
293 xfs_trans_ail_cursor_init(ailp, cur);
294 cur->item = __xfs_trans_ail_cursor_last(ailp, lsn);
295 return cur->item;
296}
297
302/* 298/*
303 * splice the log item list into the AIL at the given LSN. 299 * Splice the log item list into the AIL at the given LSN. We splice to the
300 * tail of the given LSN to maintain insert order for push traversals. The
301 * cursor is optional, allowing repeated updates to the same LSN to avoid
302 * repeated traversals.
304 */ 303 */
305static void 304static void
306xfs_ail_splice( 305xfs_ail_splice(
307 struct xfs_ail *ailp, 306 struct xfs_ail *ailp,
308 struct list_head *list, 307 struct xfs_ail_cursor *cur,
309 xfs_lsn_t lsn) 308 struct list_head *list,
309 xfs_lsn_t lsn)
310{ 310{
311 xfs_log_item_t *next_lip; 311 struct xfs_log_item *lip = cur ? cur->item : NULL;
312 struct xfs_log_item *next_lip;
312 313
313 /* If the list is empty, just insert the item. */ 314 /*
314 if (list_empty(&ailp->xa_ail)) { 315 * Get a new cursor if we don't have a placeholder or the existing one
315 list_splice(list, &ailp->xa_ail); 316 * has been invalidated.
316 return; 317 */
318 if (!lip || (__psint_t)lip & 1) {
319 lip = __xfs_trans_ail_cursor_last(ailp, lsn);
320
321 if (!lip) {
322 /* The list is empty, so just splice and return. */
323 if (cur)
324 cur->item = NULL;
325 list_splice(list, &ailp->xa_ail);
326 return;
327 }
317 } 328 }
318 329
319 list_for_each_entry_reverse(next_lip, &ailp->xa_ail, li_ail) { 330 /*
320 if (XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0) 331 * Our cursor points to the item we want to insert _after_, so we have
321 break; 332 * to update the cursor to point to the end of the list we are splicing
333 * in so that it points to the correct location for the next splice.
334 * i.e. before the splice
335 *
336 * lsn -> lsn -> lsn + x -> lsn + x ...
337 * ^
338 * | cursor points here
339 *
340 * After the splice we have:
341 *
342 * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ...
343 * ^ ^
344 * | cursor points here | needs to move here
345 *
346 * So we set the cursor to the last item in the list to be spliced
347 * before we execute the splice, resulting in the cursor pointing to
348 * the correct item after the splice occurs.
349 */
350 if (cur) {
351 next_lip = list_entry(list->prev, struct xfs_log_item, li_ail);
352 cur->item = next_lip;
322 } 353 }
323 354 list_splice(list, &lip->li_ail);
324 ASSERT(&next_lip->li_ail == &ailp->xa_ail ||
325 XFS_LSN_CMP(next_lip->li_lsn, lsn) <= 0);
326
327 list_splice_init(list, &next_lip->li_ail);
328} 355}
329 356
330/* 357/*
@@ -351,7 +378,7 @@ xfs_ail_worker(
351 struct xfs_ail *ailp = container_of(to_delayed_work(work), 378 struct xfs_ail *ailp = container_of(to_delayed_work(work),
352 struct xfs_ail, xa_work); 379 struct xfs_ail, xa_work);
353 xfs_mount_t *mp = ailp->xa_mount; 380 xfs_mount_t *mp = ailp->xa_mount;
354 struct xfs_ail_cursor *cur = &ailp->xa_cursors; 381 struct xfs_ail_cursor cur;
355 xfs_log_item_t *lip; 382 xfs_log_item_t *lip;
356 xfs_lsn_t lsn; 383 xfs_lsn_t lsn;
357 xfs_lsn_t target; 384 xfs_lsn_t target;
@@ -363,13 +390,12 @@ xfs_ail_worker(
363 390
364 spin_lock(&ailp->xa_lock); 391 spin_lock(&ailp->xa_lock);
365 target = ailp->xa_target; 392 target = ailp->xa_target;
366 xfs_trans_ail_cursor_init(ailp, cur); 393 lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn);
367 lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn);
368 if (!lip || XFS_FORCED_SHUTDOWN(mp)) { 394 if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
369 /* 395 /*
370 * AIL is empty or our push has reached the end. 396 * AIL is empty or our push has reached the end.
371 */ 397 */
372 xfs_trans_ail_cursor_done(ailp, cur); 398 xfs_trans_ail_cursor_done(ailp, &cur);
373 spin_unlock(&ailp->xa_lock); 399 spin_unlock(&ailp->xa_lock);
374 goto out_done; 400 goto out_done;
375 } 401 }
@@ -457,12 +483,12 @@ xfs_ail_worker(
457 if (stuck > 100) 483 if (stuck > 100)
458 break; 484 break;
459 485
460 lip = xfs_trans_ail_cursor_next(ailp, cur); 486 lip = xfs_trans_ail_cursor_next(ailp, &cur);
461 if (lip == NULL) 487 if (lip == NULL)
462 break; 488 break;
463 lsn = lip->li_lsn; 489 lsn = lip->li_lsn;
464 } 490 }
465 xfs_trans_ail_cursor_done(ailp, cur); 491 xfs_trans_ail_cursor_done(ailp, &cur);
466 spin_unlock(&ailp->xa_lock); 492 spin_unlock(&ailp->xa_lock);
467 493
468 if (flush_log) { 494 if (flush_log) {
@@ -645,6 +671,7 @@ xfs_trans_unlocked_item(
645void 671void
646xfs_trans_ail_update_bulk( 672xfs_trans_ail_update_bulk(
647 struct xfs_ail *ailp, 673 struct xfs_ail *ailp,
674 struct xfs_ail_cursor *cur,
648 struct xfs_log_item **log_items, 675 struct xfs_log_item **log_items,
649 int nr_items, 676 int nr_items,
650 xfs_lsn_t lsn) __releases(ailp->xa_lock) 677 xfs_lsn_t lsn) __releases(ailp->xa_lock)
@@ -674,7 +701,7 @@ xfs_trans_ail_update_bulk(
674 list_add(&lip->li_ail, &tmp); 701 list_add(&lip->li_ail, &tmp);
675 } 702 }
676 703
677 xfs_ail_splice(ailp, &tmp, lsn); 704 xfs_ail_splice(ailp, cur, &tmp, lsn);
678 705
679 if (!mlip_changed) { 706 if (!mlip_changed) {
680 spin_unlock(&ailp->xa_lock); 707 spin_unlock(&ailp->xa_lock);
@@ -793,6 +820,7 @@ xfs_trans_ail_init(
793 820
794 ailp->xa_mount = mp; 821 ailp->xa_mount = mp;
795 INIT_LIST_HEAD(&ailp->xa_ail); 822 INIT_LIST_HEAD(&ailp->xa_ail);
823 INIT_LIST_HEAD(&ailp->xa_cursors);
796 spin_lock_init(&ailp->xa_lock); 824 spin_lock_init(&ailp->xa_lock);
797 INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker); 825 INIT_DELAYED_WORK(&ailp->xa_work, xfs_ail_worker);
798 mp->m_ail = ailp; 826 mp->m_ail = ailp;
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 03b3b7f85a3b..15584fc3ed7d 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -81,7 +81,7 @@ _xfs_trans_bjoin(
81 struct xfs_buf_log_item *bip; 81 struct xfs_buf_log_item *bip;
82 82
83 ASSERT(XFS_BUF_ISBUSY(bp)); 83 ASSERT(XFS_BUF_ISBUSY(bp));
84 ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); 84 ASSERT(bp->b_transp == NULL);
85 85
86 /* 86 /*
87 * The xfs_buf_log_item pointer is stored in b_fsprivate. If 87 * The xfs_buf_log_item pointer is stored in b_fsprivate. If
@@ -89,7 +89,7 @@ _xfs_trans_bjoin(
89 * The checks to see if one is there are in xfs_buf_item_init(). 89 * The checks to see if one is there are in xfs_buf_item_init().
90 */ 90 */
91 xfs_buf_item_init(bp, tp->t_mountp); 91 xfs_buf_item_init(bp, tp->t_mountp);
92 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 92 bip = bp->b_fspriv;
93 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 93 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
94 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); 94 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
95 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED)); 95 ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
@@ -110,7 +110,7 @@ _xfs_trans_bjoin(
110 * Initialize b_fsprivate2 so we can find it with incore_match() 110 * Initialize b_fsprivate2 so we can find it with incore_match()
111 * in xfs_trans_get_buf() and friends above. 111 * in xfs_trans_get_buf() and friends above.
112 */ 112 */
113 XFS_BUF_SET_FSPRIVATE2(bp, tp); 113 bp->b_transp = tp;
114 114
115} 115}
116 116
@@ -160,7 +160,7 @@ xfs_trans_get_buf(xfs_trans_t *tp,
160 */ 160 */
161 bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len); 161 bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
162 if (bp != NULL) { 162 if (bp != NULL) {
163 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 163 ASSERT(xfs_buf_islocked(bp));
164 if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) 164 if (XFS_FORCED_SHUTDOWN(tp->t_mountp))
165 XFS_BUF_SUPER_STALE(bp); 165 XFS_BUF_SUPER_STALE(bp);
166 166
@@ -172,8 +172,8 @@ xfs_trans_get_buf(xfs_trans_t *tp,
172 else if (XFS_BUF_ISSTALE(bp)) 172 else if (XFS_BUF_ISSTALE(bp))
173 ASSERT(!XFS_BUF_ISDELAYWRITE(bp)); 173 ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
174 174
175 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 175 ASSERT(bp->b_transp == tp);
176 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 176 bip = bp->b_fspriv;
177 ASSERT(bip != NULL); 177 ASSERT(bip != NULL);
178 ASSERT(atomic_read(&bip->bli_refcount) > 0); 178 ASSERT(atomic_read(&bip->bli_refcount) > 0);
179 bip->bli_recur++; 179 bip->bli_recur++;
@@ -232,8 +232,8 @@ xfs_trans_getsb(xfs_trans_t *tp,
232 * recursion count and return the buffer to the caller. 232 * recursion count and return the buffer to the caller.
233 */ 233 */
234 bp = mp->m_sb_bp; 234 bp = mp->m_sb_bp;
235 if (XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp) { 235 if (bp->b_transp == tp) {
236 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 236 bip = bp->b_fspriv;
237 ASSERT(bip != NULL); 237 ASSERT(bip != NULL);
238 ASSERT(atomic_read(&bip->bli_refcount) > 0); 238 ASSERT(atomic_read(&bip->bli_refcount) > 0);
239 bip->bli_recur++; 239 bip->bli_recur++;
@@ -327,9 +327,9 @@ xfs_trans_read_buf(
327 */ 327 */
328 bp = xfs_trans_buf_item_match(tp, target, blkno, len); 328 bp = xfs_trans_buf_item_match(tp, target, blkno, len);
329 if (bp != NULL) { 329 if (bp != NULL) {
330 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 330 ASSERT(xfs_buf_islocked(bp));
331 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 331 ASSERT(bp->b_transp == tp);
332 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 332 ASSERT(bp->b_fspriv != NULL);
333 ASSERT((XFS_BUF_ISERROR(bp)) == 0); 333 ASSERT((XFS_BUF_ISERROR(bp)) == 0);
334 if (!(XFS_BUF_ISDONE(bp))) { 334 if (!(XFS_BUF_ISDONE(bp))) {
335 trace_xfs_trans_read_buf_io(bp, _RET_IP_); 335 trace_xfs_trans_read_buf_io(bp, _RET_IP_);
@@ -363,7 +363,7 @@ xfs_trans_read_buf(
363 } 363 }
364 364
365 365
366 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*); 366 bip = bp->b_fspriv;
367 bip->bli_recur++; 367 bip->bli_recur++;
368 368
369 ASSERT(atomic_read(&bip->bli_refcount) > 0); 369 ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -460,32 +460,30 @@ xfs_trans_brelse(xfs_trans_t *tp,
460 xfs_buf_t *bp) 460 xfs_buf_t *bp)
461{ 461{
462 xfs_buf_log_item_t *bip; 462 xfs_buf_log_item_t *bip;
463 xfs_log_item_t *lip;
464 463
465 /* 464 /*
466 * Default to a normal brelse() call if the tp is NULL. 465 * Default to a normal brelse() call if the tp is NULL.
467 */ 466 */
468 if (tp == NULL) { 467 if (tp == NULL) {
469 ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL); 468 struct xfs_log_item *lip = bp->b_fspriv;
469
470 ASSERT(bp->b_transp == NULL);
471
470 /* 472 /*
471 * If there's a buf log item attached to the buffer, 473 * If there's a buf log item attached to the buffer,
472 * then let the AIL know that the buffer is being 474 * then let the AIL know that the buffer is being
473 * unlocked. 475 * unlocked.
474 */ 476 */
475 if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { 477 if (lip != NULL && lip->li_type == XFS_LI_BUF) {
476 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); 478 bip = bp->b_fspriv;
477 if (lip->li_type == XFS_LI_BUF) { 479 xfs_trans_unlocked_item(bip->bli_item.li_ailp, lip);
478 bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
479 xfs_trans_unlocked_item(bip->bli_item.li_ailp,
480 lip);
481 }
482 } 480 }
483 xfs_buf_relse(bp); 481 xfs_buf_relse(bp);
484 return; 482 return;
485 } 483 }
486 484
487 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 485 ASSERT(bp->b_transp == tp);
488 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *); 486 bip = bp->b_fspriv;
489 ASSERT(bip->bli_item.li_type == XFS_LI_BUF); 487 ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
490 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 488 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
491 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); 489 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
@@ -556,7 +554,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
556 xfs_buf_item_relse(bp); 554 xfs_buf_item_relse(bp);
557 bip = NULL; 555 bip = NULL;
558 } 556 }
559 XFS_BUF_SET_FSPRIVATE2(bp, NULL); 557 bp->b_transp = NULL;
560 558
561 /* 559 /*
562 * If we've still got a buf log item on the buffer, then 560 * If we've still got a buf log item on the buffer, then
@@ -581,16 +579,15 @@ void
581xfs_trans_bhold(xfs_trans_t *tp, 579xfs_trans_bhold(xfs_trans_t *tp,
582 xfs_buf_t *bp) 580 xfs_buf_t *bp)
583{ 581{
584 xfs_buf_log_item_t *bip; 582 xfs_buf_log_item_t *bip = bp->b_fspriv;
585 583
586 ASSERT(XFS_BUF_ISBUSY(bp)); 584 ASSERT(XFS_BUF_ISBUSY(bp));
587 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 585 ASSERT(bp->b_transp == tp);
588 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 586 ASSERT(bip != NULL);
589
590 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
591 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 587 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
592 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); 588 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
593 ASSERT(atomic_read(&bip->bli_refcount) > 0); 589 ASSERT(atomic_read(&bip->bli_refcount) > 0);
590
594 bip->bli_flags |= XFS_BLI_HOLD; 591 bip->bli_flags |= XFS_BLI_HOLD;
595 trace_xfs_trans_bhold(bip); 592 trace_xfs_trans_bhold(bip);
596} 593}
@@ -603,19 +600,17 @@ void
603xfs_trans_bhold_release(xfs_trans_t *tp, 600xfs_trans_bhold_release(xfs_trans_t *tp,
604 xfs_buf_t *bp) 601 xfs_buf_t *bp)
605{ 602{
606 xfs_buf_log_item_t *bip; 603 xfs_buf_log_item_t *bip = bp->b_fspriv;
607 604
608 ASSERT(XFS_BUF_ISBUSY(bp)); 605 ASSERT(XFS_BUF_ISBUSY(bp));
609 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 606 ASSERT(bp->b_transp == tp);
610 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 607 ASSERT(bip != NULL);
611
612 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
613 ASSERT(!(bip->bli_flags & XFS_BLI_STALE)); 608 ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
614 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL)); 609 ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
615 ASSERT(atomic_read(&bip->bli_refcount) > 0); 610 ASSERT(atomic_read(&bip->bli_refcount) > 0);
616 ASSERT(bip->bli_flags & XFS_BLI_HOLD); 611 ASSERT(bip->bli_flags & XFS_BLI_HOLD);
617 bip->bli_flags &= ~XFS_BLI_HOLD;
618 612
613 bip->bli_flags &= ~XFS_BLI_HOLD;
619 trace_xfs_trans_bhold_release(bip); 614 trace_xfs_trans_bhold_release(bip);
620} 615}
621 616
@@ -634,14 +629,14 @@ xfs_trans_log_buf(xfs_trans_t *tp,
634 uint first, 629 uint first,
635 uint last) 630 uint last)
636{ 631{
637 xfs_buf_log_item_t *bip; 632 xfs_buf_log_item_t *bip = bp->b_fspriv;
638 633
639 ASSERT(XFS_BUF_ISBUSY(bp)); 634 ASSERT(XFS_BUF_ISBUSY(bp));
640 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 635 ASSERT(bp->b_transp == tp);
641 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 636 ASSERT(bip != NULL);
642 ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp))); 637 ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
643 ASSERT((XFS_BUF_IODONE_FUNC(bp) == NULL) || 638 ASSERT(bp->b_iodone == NULL ||
644 (XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks)); 639 bp->b_iodone == xfs_buf_iodone_callbacks);
645 640
646 /* 641 /*
647 * Mark the buffer as needing to be written out eventually, 642 * Mark the buffer as needing to be written out eventually,
@@ -656,9 +651,8 @@ xfs_trans_log_buf(xfs_trans_t *tp,
656 XFS_BUF_DELAYWRITE(bp); 651 XFS_BUF_DELAYWRITE(bp);
657 XFS_BUF_DONE(bp); 652 XFS_BUF_DONE(bp);
658 653
659 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
660 ASSERT(atomic_read(&bip->bli_refcount) > 0); 654 ASSERT(atomic_read(&bip->bli_refcount) > 0);
661 XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks); 655 bp->b_iodone = xfs_buf_iodone_callbacks;
662 bip->bli_item.li_cb = xfs_buf_iodone; 656 bip->bli_item.li_cb = xfs_buf_iodone;
663 657
664 trace_xfs_trans_log_buf(bip); 658 trace_xfs_trans_log_buf(bip);
@@ -706,13 +700,11 @@ xfs_trans_binval(
706 xfs_trans_t *tp, 700 xfs_trans_t *tp,
707 xfs_buf_t *bp) 701 xfs_buf_t *bp)
708{ 702{
709 xfs_buf_log_item_t *bip; 703 xfs_buf_log_item_t *bip = bp->b_fspriv;
710 704
711 ASSERT(XFS_BUF_ISBUSY(bp)); 705 ASSERT(XFS_BUF_ISBUSY(bp));
712 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 706 ASSERT(bp->b_transp == tp);
713 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 707 ASSERT(bip != NULL);
714
715 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
716 ASSERT(atomic_read(&bip->bli_refcount) > 0); 708 ASSERT(atomic_read(&bip->bli_refcount) > 0);
717 709
718 trace_xfs_trans_binval(bip); 710 trace_xfs_trans_binval(bip);
@@ -780,13 +772,11 @@ xfs_trans_inode_buf(
780 xfs_trans_t *tp, 772 xfs_trans_t *tp,
781 xfs_buf_t *bp) 773 xfs_buf_t *bp)
782{ 774{
783 xfs_buf_log_item_t *bip; 775 xfs_buf_log_item_t *bip = bp->b_fspriv;
784 776
785 ASSERT(XFS_BUF_ISBUSY(bp)); 777 ASSERT(XFS_BUF_ISBUSY(bp));
786 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 778 ASSERT(bp->b_transp == tp);
787 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 779 ASSERT(bip != NULL);
788
789 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
790 ASSERT(atomic_read(&bip->bli_refcount) > 0); 780 ASSERT(atomic_read(&bip->bli_refcount) > 0);
791 781
792 bip->bli_flags |= XFS_BLI_INODE_BUF; 782 bip->bli_flags |= XFS_BLI_INODE_BUF;
@@ -806,13 +796,11 @@ xfs_trans_stale_inode_buf(
806 xfs_trans_t *tp, 796 xfs_trans_t *tp,
807 xfs_buf_t *bp) 797 xfs_buf_t *bp)
808{ 798{
809 xfs_buf_log_item_t *bip; 799 xfs_buf_log_item_t *bip = bp->b_fspriv;
810 800
811 ASSERT(XFS_BUF_ISBUSY(bp)); 801 ASSERT(XFS_BUF_ISBUSY(bp));
812 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 802 ASSERT(bp->b_transp == tp);
813 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 803 ASSERT(bip != NULL);
814
815 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
816 ASSERT(atomic_read(&bip->bli_refcount) > 0); 804 ASSERT(atomic_read(&bip->bli_refcount) > 0);
817 805
818 bip->bli_flags |= XFS_BLI_STALE_INODE; 806 bip->bli_flags |= XFS_BLI_STALE_INODE;
@@ -833,13 +821,11 @@ xfs_trans_inode_alloc_buf(
833 xfs_trans_t *tp, 821 xfs_trans_t *tp,
834 xfs_buf_t *bp) 822 xfs_buf_t *bp)
835{ 823{
836 xfs_buf_log_item_t *bip; 824 xfs_buf_log_item_t *bip = bp->b_fspriv;
837 825
838 ASSERT(XFS_BUF_ISBUSY(bp)); 826 ASSERT(XFS_BUF_ISBUSY(bp));
839 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 827 ASSERT(bp->b_transp == tp);
840 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 828 ASSERT(bip != NULL);
841
842 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
843 ASSERT(atomic_read(&bip->bli_refcount) > 0); 829 ASSERT(atomic_read(&bip->bli_refcount) > 0);
844 830
845 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF; 831 bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
@@ -863,16 +849,14 @@ xfs_trans_dquot_buf(
863 xfs_buf_t *bp, 849 xfs_buf_t *bp,
864 uint type) 850 uint type)
865{ 851{
866 xfs_buf_log_item_t *bip; 852 xfs_buf_log_item_t *bip = bp->b_fspriv;
867 853
868 ASSERT(XFS_BUF_ISBUSY(bp)); 854 ASSERT(XFS_BUF_ISBUSY(bp));
869 ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp); 855 ASSERT(bp->b_transp == tp);
870 ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL); 856 ASSERT(bip != NULL);
871 ASSERT(type == XFS_BLF_UDQUOT_BUF || 857 ASSERT(type == XFS_BLF_UDQUOT_BUF ||
872 type == XFS_BLF_PDQUOT_BUF || 858 type == XFS_BLF_PDQUOT_BUF ||
873 type == XFS_BLF_GDQUOT_BUF); 859 type == XFS_BLF_GDQUOT_BUF);
874
875 bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
876 ASSERT(atomic_read(&bip->bli_refcount) > 0); 860 ASSERT(atomic_read(&bip->bli_refcount) > 0);
877 861
878 bip->bli_format.blf_flags |= type; 862 bip->bli_format.blf_flags |= type;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 048b0c689d3e..c8dea2fd7e68 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -55,7 +55,6 @@ xfs_trans_ijoin(
55{ 55{
56 xfs_inode_log_item_t *iip; 56 xfs_inode_log_item_t *iip;
57 57
58 ASSERT(ip->i_transp == NULL);
59 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 58 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
60 if (ip->i_itemp == NULL) 59 if (ip->i_itemp == NULL)
61 xfs_inode_item_init(ip, ip->i_mount); 60 xfs_inode_item_init(ip, ip->i_mount);
@@ -68,12 +67,6 @@ xfs_trans_ijoin(
68 xfs_trans_add_item(tp, &iip->ili_item); 67 xfs_trans_add_item(tp, &iip->ili_item);
69 68
70 xfs_trans_inode_broot_debug(ip); 69 xfs_trans_inode_broot_debug(ip);
71
72 /*
73 * Initialize i_transp so we can find it with xfs_inode_incore()
74 * in xfs_trans_iget() above.
75 */
76 ip->i_transp = tp;
77} 70}
78 71
79/* 72/*
@@ -111,7 +104,6 @@ xfs_trans_ichgtime(
111 104
112 ASSERT(tp); 105 ASSERT(tp);
113 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 106 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
114 ASSERT(ip->i_transp == tp);
115 107
116 tv = current_fs_time(inode->i_sb); 108 tv = current_fs_time(inode->i_sb);
117 109
@@ -140,7 +132,6 @@ xfs_trans_log_inode(
140 xfs_inode_t *ip, 132 xfs_inode_t *ip,
141 uint flags) 133 uint flags)
142{ 134{
143 ASSERT(ip->i_transp == tp);
144 ASSERT(ip->i_itemp != NULL); 135 ASSERT(ip->i_itemp != NULL);
145 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 136 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
146 137
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index 6b164e9e9a1f..212946b97239 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -53,7 +53,7 @@ void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
53 * of the list to trigger traversal restarts. 53 * of the list to trigger traversal restarts.
54 */ 54 */
55struct xfs_ail_cursor { 55struct xfs_ail_cursor {
56 struct xfs_ail_cursor *next; 56 struct list_head list;
57 struct xfs_log_item *item; 57 struct xfs_log_item *item;
58}; 58};
59 59
@@ -66,7 +66,7 @@ struct xfs_ail {
66 struct xfs_mount *xa_mount; 66 struct xfs_mount *xa_mount;
67 struct list_head xa_ail; 67 struct list_head xa_ail;
68 xfs_lsn_t xa_target; 68 xfs_lsn_t xa_target;
69 struct xfs_ail_cursor xa_cursors; 69 struct list_head xa_cursors;
70 spinlock_t xa_lock; 70 spinlock_t xa_lock;
71 struct delayed_work xa_work; 71 struct delayed_work xa_work;
72 xfs_lsn_t xa_last_pushed_lsn; 72 xfs_lsn_t xa_last_pushed_lsn;
@@ -82,6 +82,7 @@ struct xfs_ail {
82extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */ 82extern struct workqueue_struct *xfs_ail_wq; /* AIL workqueue */
83 83
84void xfs_trans_ail_update_bulk(struct xfs_ail *ailp, 84void xfs_trans_ail_update_bulk(struct xfs_ail *ailp,
85 struct xfs_ail_cursor *cur,
85 struct xfs_log_item **log_items, int nr_items, 86 struct xfs_log_item **log_items, int nr_items,
86 xfs_lsn_t lsn) __releases(ailp->xa_lock); 87 xfs_lsn_t lsn) __releases(ailp->xa_lock);
87static inline void 88static inline void
@@ -90,7 +91,7 @@ xfs_trans_ail_update(
90 struct xfs_log_item *lip, 91 struct xfs_log_item *lip,
91 xfs_lsn_t lsn) __releases(ailp->xa_lock) 92 xfs_lsn_t lsn) __releases(ailp->xa_lock)
92{ 93{
93 xfs_trans_ail_update_bulk(ailp, &lip, 1, lsn); 94 xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
94} 95}
95 96
96void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp, 97void xfs_trans_ail_delete_bulk(struct xfs_ail *ailp,
@@ -111,10 +112,13 @@ xfs_lsn_t xfs_ail_min_lsn(struct xfs_ail *ailp);
111void xfs_trans_unlocked_item(struct xfs_ail *, 112void xfs_trans_unlocked_item(struct xfs_ail *,
112 xfs_log_item_t *); 113 xfs_log_item_t *);
113 114
114struct xfs_log_item *xfs_trans_ail_cursor_first(struct xfs_ail *ailp, 115struct xfs_log_item * xfs_trans_ail_cursor_first(struct xfs_ail *ailp,
115 struct xfs_ail_cursor *cur, 116 struct xfs_ail_cursor *cur,
116 xfs_lsn_t lsn); 117 xfs_lsn_t lsn);
117struct xfs_log_item *xfs_trans_ail_cursor_next(struct xfs_ail *ailp, 118struct xfs_log_item * xfs_trans_ail_cursor_last(struct xfs_ail *ailp,
119 struct xfs_ail_cursor *cur,
120 xfs_lsn_t lsn);
121struct xfs_log_item * xfs_trans_ail_cursor_next(struct xfs_ail *ailp,
118 struct xfs_ail_cursor *cur); 122 struct xfs_ail_cursor *cur);
119void xfs_trans_ail_cursor_done(struct xfs_ail *ailp, 123void xfs_trans_ail_cursor_done(struct xfs_ail *ailp,
120 struct xfs_ail_cursor *cur); 124 struct xfs_ail_cursor *cur);
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 619720705bc6..88d121486c52 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -50,430 +50,6 @@
50#include "xfs_vnodeops.h" 50#include "xfs_vnodeops.h"
51#include "xfs_trace.h" 51#include "xfs_trace.h"
52 52
53int
54xfs_setattr(
55 struct xfs_inode *ip,
56 struct iattr *iattr,
57 int flags)
58{
59 xfs_mount_t *mp = ip->i_mount;
60 struct inode *inode = VFS_I(ip);
61 int mask = iattr->ia_valid;
62 xfs_trans_t *tp;
63 int code;
64 uint lock_flags;
65 uint commit_flags=0;
66 uid_t uid=0, iuid=0;
67 gid_t gid=0, igid=0;
68 struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
69 int need_iolock = 1;
70
71 trace_xfs_setattr(ip);
72
73 if (mp->m_flags & XFS_MOUNT_RDONLY)
74 return XFS_ERROR(EROFS);
75
76 if (XFS_FORCED_SHUTDOWN(mp))
77 return XFS_ERROR(EIO);
78
79 code = -inode_change_ok(inode, iattr);
80 if (code)
81 return code;
82
83 olddquot1 = olddquot2 = NULL;
84 udqp = gdqp = NULL;
85
86 /*
87 * If disk quotas is on, we make sure that the dquots do exist on disk,
88 * before we start any other transactions. Trying to do this later
89 * is messy. We don't care to take a readlock to look at the ids
90 * in inode here, because we can't hold it across the trans_reserve.
91 * If the IDs do change before we take the ilock, we're covered
92 * because the i_*dquot fields will get updated anyway.
93 */
94 if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
95 uint qflags = 0;
96
97 if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
98 uid = iattr->ia_uid;
99 qflags |= XFS_QMOPT_UQUOTA;
100 } else {
101 uid = ip->i_d.di_uid;
102 }
103 if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
104 gid = iattr->ia_gid;
105 qflags |= XFS_QMOPT_GQUOTA;
106 } else {
107 gid = ip->i_d.di_gid;
108 }
109
110 /*
111 * We take a reference when we initialize udqp and gdqp,
112 * so it is important that we never blindly double trip on
113 * the same variable. See xfs_create() for an example.
114 */
115 ASSERT(udqp == NULL);
116 ASSERT(gdqp == NULL);
117 code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
118 qflags, &udqp, &gdqp);
119 if (code)
120 return code;
121 }
122
123 /*
124 * For the other attributes, we acquire the inode lock and
125 * first do an error checking pass.
126 */
127 tp = NULL;
128 lock_flags = XFS_ILOCK_EXCL;
129 if (flags & XFS_ATTR_NOLOCK)
130 need_iolock = 0;
131 if (!(mask & ATTR_SIZE)) {
132 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
133 commit_flags = 0;
134 code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp),
135 0, 0, 0);
136 if (code) {
137 lock_flags = 0;
138 goto error_return;
139 }
140 } else {
141 if (need_iolock)
142 lock_flags |= XFS_IOLOCK_EXCL;
143 }
144
145 xfs_ilock(ip, lock_flags);
146
147 /*
148 * Change file ownership. Must be the owner or privileged.
149 */
150 if (mask & (ATTR_UID|ATTR_GID)) {
151 /*
152 * These IDs could have changed since we last looked at them.
153 * But, we're assured that if the ownership did change
154 * while we didn't have the inode locked, inode's dquot(s)
155 * would have changed also.
156 */
157 iuid = ip->i_d.di_uid;
158 igid = ip->i_d.di_gid;
159 gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
160 uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
161
162 /*
163 * Do a quota reservation only if uid/gid is actually
164 * going to change.
165 */
166 if (XFS_IS_QUOTA_RUNNING(mp) &&
167 ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
168 (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
169 ASSERT(tp);
170 code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
171 capable(CAP_FOWNER) ?
172 XFS_QMOPT_FORCE_RES : 0);
173 if (code) /* out of quota */
174 goto error_return;
175 }
176 }
177
178 /*
179 * Truncate file. Must have write permission and not be a directory.
180 */
181 if (mask & ATTR_SIZE) {
182 /* Short circuit the truncate case for zero length files */
183 if (iattr->ia_size == 0 &&
184 ip->i_size == 0 && ip->i_d.di_nextents == 0) {
185 xfs_iunlock(ip, XFS_ILOCK_EXCL);
186 lock_flags &= ~XFS_ILOCK_EXCL;
187 if (mask & ATTR_CTIME) {
188 inode->i_mtime = inode->i_ctime =
189 current_fs_time(inode->i_sb);
190 xfs_mark_inode_dirty_sync(ip);
191 }
192 code = 0;
193 goto error_return;
194 }
195
196 if (S_ISDIR(ip->i_d.di_mode)) {
197 code = XFS_ERROR(EISDIR);
198 goto error_return;
199 } else if (!S_ISREG(ip->i_d.di_mode)) {
200 code = XFS_ERROR(EINVAL);
201 goto error_return;
202 }
203
204 /*
205 * Make sure that the dquots are attached to the inode.
206 */
207 code = xfs_qm_dqattach_locked(ip, 0);
208 if (code)
209 goto error_return;
210
211 /*
212 * Now we can make the changes. Before we join the inode
213 * to the transaction, if ATTR_SIZE is set then take care of
214 * the part of the truncation that must be done without the
215 * inode lock. This needs to be done before joining the inode
216 * to the transaction, because the inode cannot be unlocked
217 * once it is a part of the transaction.
218 */
219 if (iattr->ia_size > ip->i_size) {
220 /*
221 * Do the first part of growing a file: zero any data
222 * in the last block that is beyond the old EOF. We
223 * need to do this before the inode is joined to the
224 * transaction to modify the i_size.
225 */
226 code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
227 if (code)
228 goto error_return;
229 }
230 xfs_iunlock(ip, XFS_ILOCK_EXCL);
231 lock_flags &= ~XFS_ILOCK_EXCL;
232
233 /*
234 * We are going to log the inode size change in this
235 * transaction so any previous writes that are beyond the on
236 * disk EOF and the new EOF that have not been written out need
237 * to be written here. If we do not write the data out, we
238 * expose ourselves to the null files problem.
239 *
240 * Only flush from the on disk size to the smaller of the in
241 * memory file size or the new size as that's the range we
242 * really care about here and prevents waiting for other data
243 * not within the range we care about here.
244 */
245 if (ip->i_size != ip->i_d.di_size &&
246 iattr->ia_size > ip->i_d.di_size) {
247 code = xfs_flush_pages(ip,
248 ip->i_d.di_size, iattr->ia_size,
249 XBF_ASYNC, FI_NONE);
250 if (code)
251 goto error_return;
252 }
253
254 /* wait for all I/O to complete */
255 xfs_ioend_wait(ip);
256
257 code = -block_truncate_page(inode->i_mapping, iattr->ia_size,
258 xfs_get_blocks);
259 if (code)
260 goto error_return;
261
262 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
263 code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
264 XFS_TRANS_PERM_LOG_RES,
265 XFS_ITRUNCATE_LOG_COUNT);
266 if (code)
267 goto error_return;
268
269 truncate_setsize(inode, iattr->ia_size);
270
271 commit_flags = XFS_TRANS_RELEASE_LOG_RES;
272 lock_flags |= XFS_ILOCK_EXCL;
273
274 xfs_ilock(ip, XFS_ILOCK_EXCL);
275
276 xfs_trans_ijoin(tp, ip);
277
278 /*
279 * Only change the c/mtime if we are changing the size
280 * or we are explicitly asked to change it. This handles
281 * the semantic difference between truncate() and ftruncate()
282 * as implemented in the VFS.
283 *
284 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME
285 * is a special case where we need to update the times despite
286 * not having these flags set. For all other operations the
287 * VFS set these flags explicitly if it wants a timestamp
288 * update.
289 */
290 if (iattr->ia_size != ip->i_size &&
291 (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
292 iattr->ia_ctime = iattr->ia_mtime =
293 current_fs_time(inode->i_sb);
294 mask |= ATTR_CTIME | ATTR_MTIME;
295 }
296
297 if (iattr->ia_size > ip->i_size) {
298 ip->i_d.di_size = iattr->ia_size;
299 ip->i_size = iattr->ia_size;
300 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
301 } else if (iattr->ia_size <= ip->i_size ||
302 (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
303 /*
304 * signal a sync transaction unless
305 * we're truncating an already unlinked
306 * file on a wsync filesystem
307 */
308 code = xfs_itruncate_finish(&tp, ip, iattr->ia_size,
309 XFS_DATA_FORK,
310 ((ip->i_d.di_nlink != 0 ||
311 !(mp->m_flags & XFS_MOUNT_WSYNC))
312 ? 1 : 0));
313 if (code)
314 goto abort_return;
315 /*
316 * Truncated "down", so we're removing references
317 * to old data here - if we now delay flushing for
318 * a long time, we expose ourselves unduly to the
319 * notorious NULL files problem. So, we mark this
320 * vnode and flush it when the file is closed, and
321 * do not wait the usual (long) time for writeout.
322 */
323 xfs_iflags_set(ip, XFS_ITRUNCATED);
324 }
325 } else if (tp) {
326 xfs_trans_ijoin(tp, ip);
327 }
328
329 /*
330 * Change file ownership. Must be the owner or privileged.
331 */
332 if (mask & (ATTR_UID|ATTR_GID)) {
333 /*
334 * CAP_FSETID overrides the following restrictions:
335 *
336 * The set-user-ID and set-group-ID bits of a file will be
337 * cleared upon successful return from chown()
338 */
339 if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
340 !capable(CAP_FSETID)) {
341 ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
342 }
343
344 /*
345 * Change the ownerships and register quota modifications
346 * in the transaction.
347 */
348 if (iuid != uid) {
349 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
350 ASSERT(mask & ATTR_UID);
351 ASSERT(udqp);
352 olddquot1 = xfs_qm_vop_chown(tp, ip,
353 &ip->i_udquot, udqp);
354 }
355 ip->i_d.di_uid = uid;
356 inode->i_uid = uid;
357 }
358 if (igid != gid) {
359 if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
360 ASSERT(!XFS_IS_PQUOTA_ON(mp));
361 ASSERT(mask & ATTR_GID);
362 ASSERT(gdqp);
363 olddquot2 = xfs_qm_vop_chown(tp, ip,
364 &ip->i_gdquot, gdqp);
365 }
366 ip->i_d.di_gid = gid;
367 inode->i_gid = gid;
368 }
369 }
370
371 /*
372 * Change file access modes.
373 */
374 if (mask & ATTR_MODE) {
375 umode_t mode = iattr->ia_mode;
376
377 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
378 mode &= ~S_ISGID;
379
380 ip->i_d.di_mode &= S_IFMT;
381 ip->i_d.di_mode |= mode & ~S_IFMT;
382
383 inode->i_mode &= S_IFMT;
384 inode->i_mode |= mode & ~S_IFMT;
385 }
386
387 /*
388 * Change file access or modified times.
389 */
390 if (mask & ATTR_ATIME) {
391 inode->i_atime = iattr->ia_atime;
392 ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
393 ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
394 ip->i_update_core = 1;
395 }
396 if (mask & ATTR_CTIME) {
397 inode->i_ctime = iattr->ia_ctime;
398 ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
399 ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
400 ip->i_update_core = 1;
401 }
402 if (mask & ATTR_MTIME) {
403 inode->i_mtime = iattr->ia_mtime;
404 ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
405 ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
406 ip->i_update_core = 1;
407 }
408
409 /*
410 * And finally, log the inode core if any attribute in it
411 * has been changed.
412 */
413 if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE|
414 ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
415 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
416
417 XFS_STATS_INC(xs_ig_attrchg);
418
419 /*
420 * If this is a synchronous mount, make sure that the
421 * transaction goes to disk before returning to the user.
422 * This is slightly sub-optimal in that truncates require
423 * two sync transactions instead of one for wsync filesystems.
424 * One for the truncate and one for the timestamps since we
425 * don't want to change the timestamps unless we're sure the
426 * truncate worked. Truncates are less than 1% of the laddis
427 * mix so this probably isn't worth the trouble to optimize.
428 */
429 code = 0;
430 if (mp->m_flags & XFS_MOUNT_WSYNC)
431 xfs_trans_set_sync(tp);
432
433 code = xfs_trans_commit(tp, commit_flags);
434
435 xfs_iunlock(ip, lock_flags);
436
437 /*
438 * Release any dquot(s) the inode had kept before chown.
439 */
440 xfs_qm_dqrele(olddquot1);
441 xfs_qm_dqrele(olddquot2);
442 xfs_qm_dqrele(udqp);
443 xfs_qm_dqrele(gdqp);
444
445 if (code)
446 return code;
447
448 /*
449 * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
450 * update. We could avoid this with linked transactions
451 * and passing down the transaction pointer all the way
452 * to attr_set. No previous user of the generic
453 * Posix ACL code seems to care about this issue either.
454 */
455 if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
456 code = -xfs_acl_chmod(inode);
457 if (code)
458 return XFS_ERROR(code);
459 }
460
461 return 0;
462
463 abort_return:
464 commit_flags |= XFS_TRANS_ABORT;
465 error_return:
466 xfs_qm_dqrele(udqp);
467 xfs_qm_dqrele(gdqp);
468 if (tp) {
469 xfs_trans_cancel(tp, commit_flags);
470 }
471 if (lock_flags != 0) {
472 xfs_iunlock(ip, lock_flags);
473 }
474 return code;
475}
476
477/* 53/*
478 * The maximum pathlen is 1024 bytes. Since the minimum file system 54 * The maximum pathlen is 1024 bytes. Since the minimum file system
479 * blocksize is 512 bytes, we can get a max of 2 extents back from 55 * blocksize is 512 bytes, we can get a max of 2 extents back from
@@ -621,13 +197,6 @@ xfs_free_eofblocks(
621 */ 197 */
622 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 198 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
623 199
624 /*
625 * Do the xfs_itruncate_start() call before
626 * reserving any log space because
627 * itruncate_start will call into the buffer
628 * cache and we can't
629 * do that within a transaction.
630 */
631 if (flags & XFS_FREE_EOF_TRYLOCK) { 200 if (flags & XFS_FREE_EOF_TRYLOCK) {
632 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) { 201 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
633 xfs_trans_cancel(tp, 0); 202 xfs_trans_cancel(tp, 0);
@@ -636,13 +205,6 @@ xfs_free_eofblocks(
636 } else { 205 } else {
637 xfs_ilock(ip, XFS_IOLOCK_EXCL); 206 xfs_ilock(ip, XFS_IOLOCK_EXCL);
638 } 207 }
639 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
640 ip->i_size);
641 if (error) {
642 xfs_trans_cancel(tp, 0);
643 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
644 return error;
645 }
646 208
647 error = xfs_trans_reserve(tp, 0, 209 error = xfs_trans_reserve(tp, 0,
648 XFS_ITRUNCATE_LOG_RES(mp), 210 XFS_ITRUNCATE_LOG_RES(mp),
@@ -658,15 +220,12 @@ xfs_free_eofblocks(
658 xfs_ilock(ip, XFS_ILOCK_EXCL); 220 xfs_ilock(ip, XFS_ILOCK_EXCL);
659 xfs_trans_ijoin(tp, ip); 221 xfs_trans_ijoin(tp, ip);
660 222
661 error = xfs_itruncate_finish(&tp, ip, 223 error = xfs_itruncate_data(&tp, ip, ip->i_size);
662 ip->i_size,
663 XFS_DATA_FORK,
664 0);
665 /*
666 * If we get an error at this point we
667 * simply don't bother truncating the file.
668 */
669 if (error) { 224 if (error) {
225 /*
226 * If we get an error at this point we simply don't
227 * bother truncating the file.
228 */
670 xfs_trans_cancel(tp, 229 xfs_trans_cancel(tp,
671 (XFS_TRANS_RELEASE_LOG_RES | 230 (XFS_TRANS_RELEASE_LOG_RES |
672 XFS_TRANS_ABORT)); 231 XFS_TRANS_ABORT));
@@ -1084,20 +643,9 @@ xfs_inactive(
1084 643
1085 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); 644 tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
1086 if (truncate) { 645 if (truncate) {
1087 /*
1088 * Do the xfs_itruncate_start() call before
1089 * reserving any log space because itruncate_start
1090 * will call into the buffer cache and we can't
1091 * do that within a transaction.
1092 */
1093 xfs_ilock(ip, XFS_IOLOCK_EXCL); 646 xfs_ilock(ip, XFS_IOLOCK_EXCL);
1094 647
1095 error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0); 648 xfs_ioend_wait(ip);
1096 if (error) {
1097 xfs_trans_cancel(tp, 0);
1098 xfs_iunlock(ip, XFS_IOLOCK_EXCL);
1099 return VN_INACTIVE_CACHE;
1100 }
1101 649
1102 error = xfs_trans_reserve(tp, 0, 650 error = xfs_trans_reserve(tp, 0,
1103 XFS_ITRUNCATE_LOG_RES(mp), 651 XFS_ITRUNCATE_LOG_RES(mp),
@@ -1114,16 +662,7 @@ xfs_inactive(
1114 xfs_ilock(ip, XFS_ILOCK_EXCL); 662 xfs_ilock(ip, XFS_ILOCK_EXCL);
1115 xfs_trans_ijoin(tp, ip); 663 xfs_trans_ijoin(tp, ip);
1116 664
1117 /* 665 error = xfs_itruncate_data(&tp, ip, 0);
1118 * normally, we have to run xfs_itruncate_finish sync.
1119 * But if filesystem is wsync and we're in the inactive
1120 * path, then we know that nlink == 0, and that the
1121 * xaction that made nlink == 0 is permanently committed
1122 * since xfs_remove runs as a synchronous transaction.
1123 */
1124 error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK,
1125 (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0));
1126
1127 if (error) { 666 if (error) {
1128 xfs_trans_cancel(tp, 667 xfs_trans_cancel(tp,
1129 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); 668 XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -2430,6 +1969,8 @@ xfs_zero_remaining_bytes(
2430 if (!bp) 1969 if (!bp)
2431 return XFS_ERROR(ENOMEM); 1970 return XFS_ERROR(ENOMEM);
2432 1971
1972 xfs_buf_unlock(bp);
1973
2433 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { 1974 for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
2434 offset_fsb = XFS_B_TO_FSBT(mp, offset); 1975 offset_fsb = XFS_B_TO_FSBT(mp, offset);
2435 nimap = 1; 1976 nimap = 1;
@@ -2784,7 +2325,7 @@ xfs_change_file_space(
2784 iattr.ia_valid = ATTR_SIZE; 2325 iattr.ia_valid = ATTR_SIZE;
2785 iattr.ia_size = startoffset; 2326 iattr.ia_size = startoffset;
2786 2327
2787 error = xfs_setattr(ip, &iattr, attr_flags); 2328 error = xfs_setattr_size(ip, &iattr, attr_flags);
2788 2329
2789 if (error) 2330 if (error)
2790 return error; 2331 return error;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 3bcd23353d6c..35d3d513e1e9 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -13,7 +13,8 @@ struct xfs_inode;
13struct xfs_iomap; 13struct xfs_iomap;
14 14
15 15
16int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags); 16int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags);
17int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags);
17#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ 18#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
18#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ 19#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
19#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ 20#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */